Changed the formats of private keys and shared secrets. Added SHA512 support. It's slow and probably has endian bugs. Signatures are now supported. Renamed a bunch of internal functions to be more readable and consistent. Began documenting functions with Doxygen. See HISTORY.txt for more details.master
| @@ -0,0 +1,31 @@ | |||||
| March 5, 2014: | |||||
| First revision. | |||||
| Private keys are now longer. They now store a copy of the public key, and | |||||
| a secret symmetric key for signing purposes. | |||||
| Signatures are now supported, though like everything else in this library, | |||||
| their format is not stable. They use a deterministic Schnorr mode, | |||||
| similar to EdDSA. Precomputed low-latency signing is not supported (yet?). | |||||
| The hash function is SHA-512. | |||||
| The deterministic hashing mode needs to be changed to HMAC (TODO!). It's | |||||
| currently envelope-MAC. | |||||
| Probably in the future there will be a distinction between ECDH key and | |||||
| signing keys (and possibly also MQV keys etc). | |||||
| Began renaming internal functions. Removing p448_ prefixes from EC point | |||||
| operations. Trying to put the verb first. For example, | |||||
| "p448_isogeny_un_to_tw" is now called "twist_and_double". | |||||
| Began documenting with Doxygen. Use "make doc" to make a very incomplete | |||||
| documentation directory. | |||||
| There have been many other internal changes. | |||||
| Feb 21, 2014: | |||||
| Initial import and benchmarking scripts. | |||||
| Keygen and ECDH are implemented, but there's no hash function. | |||||
| @@ -3,19 +3,20 @@ | |||||
| CC = clang | CC = clang | ||||
| CFLAGS = -O3 -std=c99 -pedantic -Wall -Wextra -Werror \ | CFLAGS = -O3 -std=c99 -pedantic -Wall -Wextra -Werror \ | ||||
| -mavx2 -DMUST_HAVE_SSSE3 -mbmi2 \ | |||||
| -ffunction-sections -fdata-sections -fomit-frame-pointer -fPIC | |||||
| -mssse3 -maes -mavx2 -DMUST_HAVE_AVX -mbmi2 \ | |||||
| -ffunction-sections -fdata-sections -fomit-frame-pointer -fPIC \ | |||||
| -DEXPERIMENT_ECDH_OBLITERATE_CT=1 -DEXPERIMENT_ECDH_STIR_IN_PUBKEYS=1 | |||||
| .PHONY: clean all runbench | |||||
| .PHONY: clean all runbench todo doc | |||||
| .PRECIOUS: build/%.s | .PRECIOUS: build/%.s | ||||
| HEADERS= Makefile $(shell find . -name "*.h") build/timestamp | HEADERS= Makefile $(shell find . -name "*.h") build/timestamp | ||||
| LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \ | LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \ | ||||
| build/p448.o build/ec_point.o build/scalarmul.o | |||||
| build/p448.o build/ec_point.o build/scalarmul.o build/sha512.o | |||||
| all: bench | all: bench | ||||
| bench: *.h *.c | bench: *.h *.c | ||||
| $(CC) $(CFLAGS) -o $@ *.c | $(CC) $(CFLAGS) -o $@ *.c | ||||
| @@ -34,7 +35,26 @@ build/goldilocks.so: $(LIBCOMPONENTS) | |||||
| libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \ | libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \ | ||||
| -exported_symbols_list exported.sym \ | -exported_symbols_list exported.sym \ | ||||
| $(LIBCOMPONENTS) | $(LIBCOMPONENTS) | ||||
| doc/timestamp: | |||||
| mkdir -p doc | |||||
| touch $@ | |||||
| doc: Doxyfile doc/timestamp *.c *.h | |||||
| doxygen | |||||
| todo:: | |||||
| @egrep --color=auto -w -i 'hack|todo|fixme|bug|xxx|perf|future|remove' *.h *.c | |||||
| @echo '=============================' | |||||
| @(for i in FIXME BUG XXX TODO HACK PERF FUTURE REMOVE; do \ | |||||
| egrep -w -i $$i *.h *.c > /dev/null || continue; \ | |||||
| /bin/echo -n $$i' ' | head -c 10; \ | |||||
| egrep -w -i $$i *.h *.c | wc -l; \ | |||||
| done) | |||||
| @echo '=============================' | |||||
| @echo -n 'Total ' | |||||
| @egrep -w -i 'hack|todo|fixme|bug|xxx|perf|future|remove' *.h *.c | wc -l | |||||
| runbench: bench | runbench: bench | ||||
| ./$< | ./$< | ||||
| @@ -1,15 +1,8 @@ | |||||
| Important work items for Ed448-Goldilocks: | Important work items for Ed448-Goldilocks: | ||||
| * Import SHA-512 or SHA-3. | |||||
| * Decide which. | |||||
| * Get a public-domain version which is 64-bit and 32-bit clean. | |||||
| * Update LICENSE and README to reflect that SHA is not my code. | |||||
| * Incorporate hashing into goldilocks_shared_secret. | |||||
| * It's a pretty terrible shared secret right now. | |||||
| * Decide on output size | |||||
| * Documentation: write high-level API docs, and internal docs to help | * Documentation: write high-level API docs, and internal docs to help | ||||
| other implementors. | other implementors. | ||||
| * Partial progress on Doxygenating the code. | |||||
| * Documentation: write a spec or add to Watson's | * Documentation: write a spec or add to Watson's | ||||
| @@ -37,12 +30,13 @@ Important work items for Ed448-Goldilocks: | |||||
| * Testing: | * Testing: | ||||
| * Corner-case testing | * Corner-case testing | ||||
| * more bulk random testing | |||||
| * More bulk random testing | |||||
| * Negative testing. | |||||
| * SAGE-(auto?)-generated test vectors | * SAGE-(auto?)-generated test vectors | ||||
| * Test the Barrett fields | * Test the Barrett fields | ||||
| * Safety: add static analysis attributes for compilers that support them | * Safety: add static analysis attributes for compilers that support them | ||||
| * EG, warn on ignored return types | |||||
| * Most functions now have warn on ignored return. | |||||
| * Safety: | * Safety: | ||||
| * Check for init() if it's still required once we've done the above | * Check for init() if it's still required once we've done the above | ||||
| @@ -65,17 +59,19 @@ Important work items for Ed448-Goldilocks: | |||||
| * Scalarmul with other cofactor modes. | * Scalarmul with other cofactor modes. | ||||
| * High-level API: | * High-level API: | ||||
| * Signatures. | |||||
| * Decide on strictness level. | |||||
| * SPAKE2 Elligator Edition? Maybe write a paper first. | * SPAKE2 Elligator Edition? Maybe write a paper first. | ||||
| * Elligator. | * Elligator. | ||||
| * Need to write Elligator inverse. Might not be Elligator-2S. | * Need to write Elligator inverse. Might not be Elligator-2S. | ||||
| * FHMQV? Is this patented? | |||||
| * What low-level APIs to expose? | * What low-level APIs to expose? | ||||
| * Edwards points with add, sub, scalarmul, =, ==, ser/deser? | * Edwards points with add, sub, scalarmul, =, ==, ser/deser? | ||||
| * Portability: test and make clean with other compilers | |||||
| * Using a fair amount of __attribute__ code. | |||||
| * Portability: try to make the vector code as portable as possible | * Portability: try to make the vector code as portable as possible | ||||
| * Currently using clang ext_vector_length. | * Currently using clang ext_vector_length. | ||||
| * I can't get a simple for-loop to autovectorize :-/ | * I can't get a simple for-loop to autovectorize :-/ | ||||
| @@ -89,8 +85,7 @@ Important work items for Ed448-Goldilocks: | |||||
| * Run through the SAGE tool to generate new bias & bound. | * Run through the SAGE tool to generate new bias & bound. | ||||
| * Portability: make the outer layers of the code 32-bit clean. | * Portability: make the outer layers of the code 32-bit clean. | ||||
| * I don't think that there are endian bugs, but who knows? | |||||
| * There are endian bugs in the signing algorithm. | |||||
| * NEON and vectorless constant-time comparison. | * NEON and vectorless constant-time comparison. | ||||
| * Performance: write and incorporate some extra routines | * Performance: write and incorporate some extra routines | ||||
| @@ -99,6 +94,11 @@ Important work items for Ed448-Goldilocks: | |||||
| * Performance: fixed parameters? | * Performance: fixed parameters? | ||||
| * Perhaps useful for comb precomputation. | * Perhaps useful for comb precomputation. | ||||
| * Performance: Improve SHA512. | |||||
| * Improve portability. | |||||
| * Improve speed. | |||||
| * Decide what things to stir into hashes for various functions. | |||||
| * Performance: improve the Barrett field code. | * Performance: improve the Barrett field code. | ||||
| * Support other primes? | * Support other primes? | ||||
| @@ -109,6 +109,42 @@ widemac( | |||||
| return carry; | return carry; | ||||
| } | } | ||||
| void | |||||
| barrett_negate ( | |||||
| word_t *a, | |||||
| int nwords_a, | |||||
| const word_t *p_lo, | |||||
| int nwords_p, | |||||
| int nwords_lo, | |||||
| int p_shift | |||||
| ) { | |||||
| int i; | |||||
| dsword_t carry = 0; | |||||
| barrett_reduce(a,nwords_a,0,p_lo,nwords_p,nwords_lo,p_shift); | |||||
| /* Have p = 2^big - p_lo. Want p - a = 2^big - p_lo - a */ | |||||
| for (i=0; i<nwords_lo; i++) { | |||||
| a[i] = carry = carry - p_lo[i] - a[i]; | |||||
| carry >>= WORD_BITS; | |||||
| } | |||||
| for (; i<nwords_p; i++) { | |||||
| a[i] = carry = carry - a[i]; | |||||
| if (i<nwords_p-1) { | |||||
| carry >>= WORD_BITS; | |||||
| } | |||||
| } | |||||
| a[nwords_p-1] = carry = carry + (((word_t)1) << p_shift); | |||||
| for (; i<nwords_a; i++) { | |||||
| assert(!a[i]); | |||||
| } | |||||
| assert(!(carry>>64)); | |||||
| } | |||||
| void | void | ||||
| barrett_reduce( | barrett_reduce( | ||||
| word_t *a, | word_t *a, | ||||
| @@ -195,14 +231,6 @@ barrett_mul_or_mac( | |||||
| tmp[i] = 0; | tmp[i] = 0; | ||||
| } | } | ||||
| if (doMac) { | |||||
| for (i=0; i<nwords_accum; i++) { | |||||
| tmp[i] = accum[i]; | |||||
| } | |||||
| barrett_reduce(tmp, nwords_tmp, 0, p_lo, nwords_p, nwords_lo, p_shift); | |||||
| } | |||||
| for (bpos=nwords_b-1; bpos >= 0; bpos--) { | for (bpos=nwords_b-1; bpos >= 0; bpos--) { | ||||
| /* Invariant at the beginning of the loop: the high word is unused. */ | /* Invariant at the beginning of the loop: the high word is unused. */ | ||||
| assert(tmp[nwords_tmp-1] == 0); | assert(tmp[nwords_tmp-1] == 0); | ||||
| @@ -211,6 +239,7 @@ barrett_mul_or_mac( | |||||
| for (i=nwords_tmp-2; i>=0; i--) { | for (i=nwords_tmp-2; i>=0; i--) { | ||||
| tmp[i+1] = tmp[i]; | tmp[i+1] = tmp[i]; | ||||
| } | } | ||||
| tmp[0] = 0; | |||||
| /* mac and reduce */ | /* mac and reduce */ | ||||
| word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0); | word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0); | ||||
| @@ -223,6 +252,11 @@ barrett_mul_or_mac( | |||||
| * so the high word is again clear */ | * so the high word is again clear */ | ||||
| } | } | ||||
| if (doMac) { | |||||
| word_t cout = add_nr_packed(tmp, accum, nwords_accum); | |||||
| barrett_reduce(tmp, nwords_tmp, cout, p_lo, nwords_p, nwords_lo, p_shift); | |||||
| } | |||||
| for (i=0; i<nwords_tmp && i<nwords_accum; i++) { | for (i=0; i<nwords_tmp && i<nwords_accum; i++) { | ||||
| accum[i] = tmp[i]; | accum[i] = tmp[i]; | ||||
| } | } | ||||
| @@ -44,6 +44,16 @@ sub_nr_ext_packed( | |||||
| int nwords_c, | int nwords_c, | ||||
| word_t mask | word_t mask | ||||
| ); | ); | ||||
| void | |||||
| barrett_negate ( | |||||
| word_t *a, | |||||
| int nwords_a, | |||||
| const word_t *p_lo, | |||||
| int nwords_p, | |||||
| int nwords_lo, | |||||
| int p_shift | |||||
| ); | |||||
| /* | /* | ||||
| * If doMac, accum = accum + a*b mod p. | * If doMac, accum = accum + a*b mod p. | ||||
| @@ -14,6 +14,7 @@ | |||||
| #include "barrett_field.h" | #include "barrett_field.h" | ||||
| #include "crandom.h" | #include "crandom.h" | ||||
| #include "goldilocks.h" | #include "goldilocks.h" | ||||
| #include "sha512.h" | |||||
| word_t q448_lo[4] = { | word_t q448_lo[4] = { | ||||
| 0xdc873d6d54a7bb0dull, | 0xdc873d6d54a7bb0dull, | ||||
| @@ -129,6 +130,23 @@ int main(int argc, char **argv) { | |||||
| when = now() - when; | when = now() - when; | ||||
| printf("rand448: %5.1fns\n", when * 1e9 / i); | printf("rand448: %5.1fns\n", when * 1e9 / i); | ||||
| struct sha512_ctx_t sha; | |||||
| uint8_t hashout[128]; | |||||
| when = now(); | |||||
| for (i=0; i<10000; i++) { | |||||
| sha512_init(&sha); | |||||
| sha512_final(&sha, hashout); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("sha512 1blk: %5.1fns\n", when * 1e9 / i); | |||||
| when = now(); | |||||
| for (i=0; i<10000; i++) { | |||||
| sha512_update(&sha, hashout, 128); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("sha512 blk: %5.1fns (%0.2f MB/s)\n", when * 1e9 / i, 128*i/when/1e6); | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| p448_isr(&c, &a); | p448_isr(&c, &a); | ||||
| @@ -161,7 +179,7 @@ int main(int argc, char **argv) { | |||||
| for (i=0; i<100; i++) { | for (i=0; i<100; i++) { | ||||
| p448_randomize(&crand, &a); | p448_randomize(&crand, &a); | ||||
| elligator_2s_inject(&affine, &a); | elligator_2s_inject(&affine, &a); | ||||
| if (!p448_affine_validate(&affine)) { | |||||
| if (!validate_affine(&affine)) { | |||||
| printf("Elligator validation failure!\n"); | printf("Elligator validation failure!\n"); | ||||
| p448_print("a", &a); | p448_print("a", &a); | ||||
| p448_print("x", &affine.x); | p448_print("x", &affine.x); | ||||
| @@ -171,14 +189,14 @@ int main(int argc, char **argv) { | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| affine_deserialize(&affine, &a); | |||||
| deserialize_affine(&affine, &a); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("decompress: %5.1fµs\n", when * 1e6 / i); | printf("decompress: %5.1fµs\n", when * 1e6 / i); | ||||
| when = now(); | when = now(); | ||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| extensible_serialize(&a, &exta); | |||||
| serialize_extensible(&a, &exta); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("compress: %5.1fµs\n", when * 1e6 / i); | printf("compress: %5.1fµs\n", when * 1e6 / i); | ||||
| @@ -186,8 +204,8 @@ int main(int argc, char **argv) { | |||||
| int goods = 0; | int goods = 0; | ||||
| for (i=0; i<100; i++) { | for (i=0; i<100; i++) { | ||||
| p448_randomize(&crand, &a); | p448_randomize(&crand, &a); | ||||
| mask_t good = affine_deserialize(&affine, &a); | |||||
| if (good & !p448_affine_validate(&affine)) { | |||||
| mask_t good = deserialize_affine(&affine, &a); | |||||
| if (good & !validate_affine(&affine)) { | |||||
| printf("Deserialize validation failure!\n"); | printf("Deserialize validation failure!\n"); | ||||
| p448_print("a", &a); | p448_print("a", &a); | ||||
| p448_print("x", &affine.x); | p448_print("x", &affine.x); | ||||
| @@ -195,7 +213,7 @@ int main(int argc, char **argv) { | |||||
| } else if (good) { | } else if (good) { | ||||
| goods++; | goods++; | ||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| extensible_serialize(&b, &exta); | |||||
| serialize_extensible(&b, &exta); | |||||
| p448_sub(&c,&b,&a); | p448_sub(&c,&b,&a); | ||||
| p448_bias(&c,2); | p448_bias(&c,2); | ||||
| if (!p448_is_zero(&c)) { | if (!p448_is_zero(&c)) { | ||||
| @@ -203,7 +221,7 @@ int main(int argc, char **argv) { | |||||
| p448_print("a", &a); | p448_print("a", &a); | ||||
| p448_print("x", &affine.x); | p448_print("x", &affine.x); | ||||
| p448_print("y", &affine.y); | p448_print("y", &affine.y); | ||||
| affine_deserialize(&affine, &b); | |||||
| deserialize_affine(&affine, &b); | |||||
| p448_print("b", &b); | p448_print("b", &b); | ||||
| p448_print("x", &affine.x); | p448_print("x", &affine.x); | ||||
| p448_print("y", &affine.y); | p448_print("y", &affine.y); | ||||
| @@ -230,52 +248,52 @@ int main(int argc, char **argv) { | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("barrett red: %5.1fns\n", when * 1e9 / i); | printf("barrett red: %5.1fns\n", when * 1e9 / i); | ||||
| when = now(); | |||||
| for (i=0; i<100000; i++) { | |||||
| barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("barrett mac: %5.1fns\n", when * 1e9 / i); | |||||
| // | |||||
| // when = now(); | |||||
| // for (i=0; i<100000; i++) { | |||||
| // barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62); | |||||
| // } | |||||
| // when = now() - when; | |||||
| // printf("barrett mac: %5.1fns\n", when * 1e9 / i); | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000000; i++) { | for (i=0; i<1000000; i++) { | ||||
| p448_tw_extensible_add_niels(&ext, &niels); | |||||
| add_tw_niels_to_tw_extensible(&ext, &niels); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("exti+niels: %5.1fns\n", when * 1e9 / i); | printf("exti+niels: %5.1fns\n", when * 1e9 / i); | ||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000000; i++) { | for (i=0; i<1000000; i++) { | ||||
| p448_tw_extensible_add_pniels(&ext, &pniels); | |||||
| add_tw_pniels_to_tw_extensible(&ext, &pniels); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("exti+pniels: %5.1fns\n", when * 1e9 / i); | printf("exti+pniels: %5.1fns\n", when * 1e9 / i); | ||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000000; i++) { | for (i=0; i<1000000; i++) { | ||||
| p448_tw_extensible_double(&ext); | |||||
| double_tw_extensible(&ext); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("exti dbl: %5.1fns\n", when * 1e9 / i); | printf("exti dbl: %5.1fns\n", when * 1e9 / i); | ||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000000; i++) { | for (i=0; i<1000000; i++) { | ||||
| p448_isogeny_tw_to_un(&exta, &ext); | |||||
| untwist_and_double(&exta, &ext); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("i->a isog: %5.1fns\n", when * 1e9 / i); | printf("i->a isog: %5.1fns\n", when * 1e9 / i); | ||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000000; i++) { | for (i=0; i<1000000; i++) { | ||||
| p448_isogeny_un_to_tw(&ext, &exta); | |||||
| twist_and_double(&ext, &exta); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("a->i isog: %5.1fns\n", when * 1e9 / i); | printf("a->i isog: %5.1fns\n", when * 1e9 / i); | ||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000000; i++) { | for (i=0; i<1000000; i++) { | ||||
| p448_montgomery_step(&mb); | |||||
| montgomery_step(&mb); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("monty step: %5.1fns\n", when * 1e9 / i); | printf("monty step: %5.1fns\n", when * 1e9 / i); | ||||
| @@ -295,14 +313,20 @@ int main(int argc, char **argv) { | |||||
| printf("edwards smz: %5.1fµs\n", when * 1e6 / i); | printf("edwards smz: %5.1fµs\n", when * 1e6 / i); | ||||
| when = now(); | when = now(); | ||||
| int sum = 0; | |||||
| for (i=0; i<1000; i++) { | |||||
| edwards_scalar_multiply_vlook(&ext,sk); | |||||
| untwist_and_double_and_serialize(&a,&ext); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("edwards svl: %5.1fµs\n", when * 1e6 / i); | |||||
| when = now(); | |||||
| for (i=0; i<1000; i++) { | for (i=0; i<1000; i++) { | ||||
| q448_randomize(&crand, sk); | q448_randomize(&crand, sk); | ||||
| sum += edwards_scalar_multiply_vt(&ext,sk); | |||||
| edwards_scalar_multiply_vt(&ext,sk); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("edwards vtm: %5.1fµs (%0.2f avg bits = 1.5 + 448/%0.2f)\n", | |||||
| when * 1e6 / i, 1.0*sum/i, 448.0*i/(sum-1.5*i)); | |||||
| printf("edwards vtm: %5.1fµs\n", when * 1e6 / i); | |||||
| struct tw_niels_t wnaft[1<<6]; | struct tw_niels_t wnaft[1<<6]; | ||||
| when = now(); | when = now(); | ||||
| @@ -351,23 +375,22 @@ int main(int argc, char **argv) { | |||||
| printf("edwards vt5: %5.1fµs\n", when * 1e6 / i); | printf("edwards vt5: %5.1fµs\n", when * 1e6 / i); | ||||
| when = now(); | when = now(); | ||||
| sum = 0; | |||||
| for (i=0; i<1000; i++) { | for (i=0; i<1000; i++) { | ||||
| q448_randomize(&crand, sk); | q448_randomize(&crand, sk); | ||||
| q448_randomize(&crand, tk); | q448_randomize(&crand, tk); | ||||
| sum += edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); | |||||
| edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("vt vf combo: %5.1fµs (avg = %0.3f)\n", when * 1e6 / i, 1.0*sum/i); | |||||
| printf("vt vf combo: %5.1fµs\n", when * 1e6 / i); | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000; i++) { | for (i=0; i<1000; i++) { | ||||
| affine_deserialize(&affine, &a); | |||||
| deserialize_affine(&affine, &a); | |||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| edwards_scalar_multiply(&ext,sk); | edwards_scalar_multiply(&ext,sk); | ||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("edwards sm: %5.1fµs\n", when * 1e6 / i); | printf("edwards sm: %5.1fµs\n", when * 1e6 / i); | ||||
| @@ -376,10 +399,10 @@ int main(int argc, char **argv) { | |||||
| while (1) { | while (1) { | ||||
| p448_randomize(&crand, &a); | p448_randomize(&crand, &a); | ||||
| if (affine_deserialize(&affine, &a)) break; | |||||
| if (deserialize_affine(&affine, &a)) break; | |||||
| } | } | ||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<1000; i++) { | for (i=0; i<1000; i++) { | ||||
| precompute_for_combs(table, &ext, 5, 5, 18); | precompute_for_combs(table, &ext, 5, 5, 18); | ||||
| @@ -400,13 +423,6 @@ int main(int argc, char **argv) { | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("com(3,5,30): %5.1fµs\n", when * 1e6 / i); | printf("com(3,5,30): %5.1fµs\n", when * 1e6 / i); | ||||
| when = now(); | |||||
| for (i=0; i<10000; i++) { | |||||
| edwards_comb(&ext, sk, table, 2, 5, 45); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("com(2,5,45): %5.1fµs\n", when * 1e6 / i); | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| @@ -419,8 +435,8 @@ int main(int argc, char **argv) { | |||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| q448_randomize(&crand, sk); | q448_randomize(&crand, sk); | ||||
| edwards_comb(&ext, sk, table, 5, 5, 18); | edwards_comb(&ext, sk, table, 5, 5, 18); | ||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("keygen: %5.1fµs\n", when * 1e6 / i); | printf("keygen: %5.1fµs\n", when * 1e6 / i); | ||||
| @@ -430,14 +446,15 @@ int main(int argc, char **argv) { | |||||
| int res = goldilocks_init(); | int res = goldilocks_init(); | ||||
| assert(!res); | assert(!res); | ||||
| uint8_t gpk[56],gsk[56],hsk[56],hpk[56]; | |||||
| struct goldilocks_public_key_t gpk,hpk; | |||||
| struct goldilocks_private_key_t gsk,hsk; | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| if (i&1) { | if (i&1) { | ||||
| res = goldilocks_keygen(gsk,gpk); | |||||
| res = goldilocks_keygen(&gsk,&gpk); | |||||
| } else { | } else { | ||||
| res = goldilocks_keygen(hsk,hpk); | |||||
| res = goldilocks_keygen(&hsk,&hpk); | |||||
| } | } | ||||
| assert(!res); | assert(!res); | ||||
| } | } | ||||
| @@ -449,14 +466,14 @@ int main(int argc, char **argv) { | |||||
| when = now(); | when = now(); | ||||
| for (i=0; i<10000; i++) { | for (i=0; i<10000; i++) { | ||||
| if (i&1) { | if (i&1) { | ||||
| gres1 = goldilocks_shared_secret(ss1,gsk,hpk); | |||||
| gres1 = goldilocks_shared_secret(ss1,&gsk,&hpk); | |||||
| } else { | } else { | ||||
| gres2 = goldilocks_shared_secret(ss2,hsk,gpk); | |||||
| gres2 = goldilocks_shared_secret(ss2,&hsk,&gpk); | |||||
| } | } | ||||
| } | } | ||||
| when = now() - when; | when = now() - when; | ||||
| printf("ecdh: %5.1fµs\n", when * 1e6 / i); | printf("ecdh: %5.1fµs\n", when * 1e6 / i); | ||||
| if (gres1 || gres2 || memcmp(ss1,ss2,56)) { | |||||
| if (gres1 || gres2 || memcmp(ss1,ss2,64)) { | |||||
| printf("[FAIL] %d %d\n",gres1,gres2); | printf("[FAIL] %d %d\n",gres1,gres2); | ||||
| printf("ss1 = "); | printf("ss1 = "); | ||||
| @@ -470,9 +487,39 @@ int main(int argc, char **argv) { | |||||
| printf("\n"); | printf("\n"); | ||||
| } | } | ||||
| uint8_t sout[56*2]; | |||||
| const char *message = "hello world"; | |||||
| uint64_t message_len = strlen(message); | |||||
| when = now(); | |||||
| for (i=0; i<10000; i++) { | |||||
| res = goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); | |||||
| assert(!res); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("sign: %5.1fµs\n", when * 1e6 / i); | |||||
| when = now(); | |||||
| for (i=0; i<10000; i++) { | |||||
| res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); | |||||
| } | |||||
| when = now() - when; | |||||
| printf("verify: %5.1fµs\n", when * 1e6 / i); | |||||
| printf("\nTesting...\n"); | printf("\nTesting...\n"); | ||||
| int failures=0, successes = 0; | int failures=0, successes = 0; | ||||
| for (i=0; i<1000; i++) { | |||||
| (void)goldilocks_keygen(&gsk,&gpk); | |||||
| goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); | |||||
| res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); | |||||
| if (res) failures++; | |||||
| } | |||||
| if (failures) { | |||||
| printf("FAIL %d/%d signature checks!\n", failures, i); | |||||
| } | |||||
| failures=0; successes = 0; | |||||
| for (i=0; i<1000; i++) { | for (i=0; i<1000; i++) { | ||||
| p448_randomize(&crand, &a); | p448_randomize(&crand, &a); | ||||
| uint64_t two = 2; | uint64_t two = 2; | ||||
| @@ -501,14 +548,14 @@ int main(int argc, char **argv) { | |||||
| mask_t good; | mask_t good; | ||||
| do { | do { | ||||
| p448_randomize(&crand, &a); | p448_randomize(&crand, &a); | ||||
| good = affine_deserialize(&affine, &a); | |||||
| good = deserialize_affine(&affine, &a); | |||||
| } while (!good); | } while (!good); | ||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| isogeny_and_serialize(&c, &ext); | |||||
| twist_and_double(&ext,&exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| untwist_and_double_and_serialize(&c, &ext); | |||||
| p448_sub(&d,&b,&c); | p448_sub(&d,&b,&c); | ||||
| p448_bias(&d,2); | p448_bias(&d,2); | ||||
| @@ -536,12 +583,12 @@ int main(int argc, char **argv) { | |||||
| mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); | mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); | ||||
| good &= p448_montgomery_ladder(&c,&b,sk,448,0); | good &= p448_montgomery_ladder(&c,&b,sk,448,0); | ||||
| mask_t goodb = affine_deserialize(&affine, &a); | |||||
| mask_t goodb = deserialize_affine(&affine, &a); | |||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| edwards_scalar_multiply(&ext,sk); | edwards_scalar_multiply(&ext,sk); | ||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| p448_sub(&d,&b,&c); | p448_sub(&d,&b,&c); | ||||
| p448_bias(&d,2); | p448_bias(&d,2); | ||||
| @@ -573,14 +620,14 @@ int main(int argc, char **argv) { | |||||
| good &= p448_montgomery_ladder(&c,&b,sk,448,0); | good &= p448_montgomery_ladder(&c,&b,sk,448,0); | ||||
| if (!good) continue; | if (!good) continue; | ||||
| affine_deserialize(&affine, &a); | |||||
| deserialize_affine(&affine, &a); | |||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| precompute_for_combs(table, &ext, 5, 5, 18); | precompute_for_combs(table, &ext, 5, 5, 18); | ||||
| edwards_comb(&ext, sk, table, 5, 5, 18); | edwards_comb(&ext, sk, table, 5, 5, 18); | ||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| p448_sub(&d,&b,&c); | p448_sub(&d,&b,&c); | ||||
| p448_bias(&d,2); | p448_bias(&d,2); | ||||
| @@ -606,21 +653,21 @@ int main(int argc, char **argv) { | |||||
| q448_randomize(&crand, sk); | q448_randomize(&crand, sk); | ||||
| if (!i) bzero(&sk, sizeof(sk)); | if (!i) bzero(&sk, sizeof(sk)); | ||||
| mask_t good = affine_deserialize(&affine, &a); | |||||
| mask_t good = deserialize_affine(&affine, &a); | |||||
| if (!good) continue; | if (!good) continue; | ||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| struct tw_extensible_t exu; | struct tw_extensible_t exu; | ||||
| copy_tw_extensible(&exu, &ext); | copy_tw_extensible(&exu, &ext); | ||||
| edwards_scalar_multiply(&ext,sk); | edwards_scalar_multiply(&ext,sk); | ||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| edwards_scalar_multiply_vt(&exu,sk); | edwards_scalar_multiply_vt(&exu,sk); | ||||
| p448_isogeny_tw_to_un(&exta,&exu); | |||||
| extensible_serialize(&c, &exta); | |||||
| untwist_and_double(&exta,&exu); | |||||
| serialize_extensible(&c, &exta); | |||||
| p448_sub(&d,&b,&c); | p448_sub(&d,&b,&c); | ||||
| p448_bias(&d,2); | p448_bias(&d,2); | ||||
| @@ -646,22 +693,22 @@ int main(int argc, char **argv) { | |||||
| q448_randomize(&crand, sk); | q448_randomize(&crand, sk); | ||||
| if (!i) bzero(&sk, sizeof(sk)); | if (!i) bzero(&sk, sizeof(sk)); | ||||
| mask_t good = affine_deserialize(&affine, &a); | |||||
| mask_t good = deserialize_affine(&affine, &a); | |||||
| if (!good) continue; | if (!good) continue; | ||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| struct tw_extensible_t exu; | struct tw_extensible_t exu; | ||||
| copy_tw_extensible(&exu, &ext); | copy_tw_extensible(&exu, &ext); | ||||
| edwards_scalar_multiply(&ext,sk); | edwards_scalar_multiply(&ext,sk); | ||||
| p448_isogeny_tw_to_un(&exta,&ext); | |||||
| extensible_serialize(&b, &exta); | |||||
| untwist_and_double(&exta,&ext); | |||||
| serialize_extensible(&b, &exta); | |||||
| precompute_for_wnaf(wnaft,&exu,5); | precompute_for_wnaf(wnaft,&exu,5); | ||||
| edwards_scalar_multiply_vt_pre(&exu,sk,wnaft,5); | edwards_scalar_multiply_vt_pre(&exu,sk,wnaft,5); | ||||
| p448_isogeny_tw_to_un(&exta,&exu); | |||||
| extensible_serialize(&c, &exta); | |||||
| untwist_and_double(&exta,&exu); | |||||
| serialize_extensible(&c, &exta); | |||||
| p448_sub(&d,&b,&c); | p448_sub(&d,&b,&c); | ||||
| p448_bias(&d,2); | p448_bias(&d,2); | ||||
| @@ -695,15 +742,15 @@ int main(int argc, char **argv) { | |||||
| mask_t good; | mask_t good; | ||||
| do { | do { | ||||
| p448_randomize(&crand, &a); | p448_randomize(&crand, &a); | ||||
| good = affine_deserialize(&affine, &a); | |||||
| good = deserialize_affine(&affine, &a); | |||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&ext,&exta); | |||||
| twist_and_double(&ext,&exta); | |||||
| } while (!good); | } while (!good); | ||||
| do { | do { | ||||
| p448_randomize(&crand, &aa); | p448_randomize(&crand, &aa); | ||||
| good = affine_deserialize(&affine, &aa); | |||||
| good = deserialize_affine(&affine, &aa); | |||||
| convert_affine_to_extensible(&exta,&affine); | convert_affine_to_extensible(&exta,&affine); | ||||
| p448_isogeny_un_to_tw(&exu,&exta); | |||||
| twist_and_double(&exu,&exta); | |||||
| } while (!good); | } while (!good); | ||||
| p448_randomize(&crand, &aa); | p448_randomize(&crand, &aa); | ||||
| @@ -717,14 +764,14 @@ int main(int argc, char **argv) { | |||||
| edwards_scalar_multiply(&exv,sk); | edwards_scalar_multiply(&exv,sk); | ||||
| edwards_scalar_multiply(&exw,tk); | edwards_scalar_multiply(&exw,tk); | ||||
| convert_tw_extensible_to_tw_pniels(&pniels, &exw); | convert_tw_extensible_to_tw_pniels(&pniels, &exw); | ||||
| p448_tw_extensible_add_pniels(&exv,&pniels); | |||||
| p448_isogeny_tw_to_un(&exta,&exv); | |||||
| extensible_serialize(&b, &exta); | |||||
| add_tw_pniels_to_tw_extensible(&exv,&pniels); | |||||
| untwist_and_double(&exta,&exv); | |||||
| serialize_extensible(&b, &exta); | |||||
| precompute_for_wnaf(wnaft,&exu,5); | precompute_for_wnaf(wnaft,&exu,5); | ||||
| edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); | edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); | ||||
| p448_isogeny_tw_to_un(&exta,&exv); | |||||
| extensible_serialize(&c, &exta); | |||||
| untwist_and_double(&exta,&exv); | |||||
| serialize_extensible(&c, &exta); | |||||
| p448_sub(&d,&b,&c); | p448_sub(&d,&b,&c); | ||||
| p448_bias(&d,2); | p448_bias(&d,2); | ||||
| @@ -7,6 +7,7 @@ | |||||
| #include "intrinsics.h" | #include "intrinsics.h" | ||||
| #include "crandom.h" | #include "crandom.h" | ||||
| #include <stdio.h> | |||||
| volatile unsigned int crandom_features = 0; | volatile unsigned int crandom_features = 0; | ||||
| @@ -26,11 +27,60 @@ unsigned int crandom_detect_features() { | |||||
| a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); | a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); | ||||
| if (c & 1<<11) out |= XOP; | if (c & 1<<11) out |= XOP; | ||||
| if (c & 1<<30) out |= RDRAND; | |||||
| # endif | # endif | ||||
| return out; | return out; | ||||
| } | } | ||||
| INTRINSIC u_int64_t rdrand(int abort_on_fail) { | |||||
| uint64_t out = 0; | |||||
| int tries = 1000; | |||||
| if (HAVE(RDRAND)) { | |||||
| # if defined(__x86_64__) | |||||
| u_int64_t out, a=0; | |||||
| for (; tries && !a; tries--) { | |||||
| __asm__ __volatile__ ( | |||||
| "rdrand %0\n\tsetc %%al" | |||||
| : "=r"(out), "+a"(a) :: "cc" | |||||
| ); | |||||
| } | |||||
| # elif (defined(__i386__)) | |||||
| u_int32_t reg, a=0; | |||||
| uint64_t out; | |||||
| for (; tries && !a; tries--) { | |||||
| __asm__ __volatile__ ( | |||||
| "rdrand %0\n\tsetc %%al" | |||||
| : "=r"(reg), "+a"(a) :: "cc" | |||||
| ); | |||||
| } | |||||
| out = reg; a = 0; | |||||
| for (; tries && !a; tries--) { | |||||
| __asm__ __volatile__ ( | |||||
| "rdrand %0\n\tsetc %%al" | |||||
| : "=r"(reg), "+a"(a) :: "cc" | |||||
| ); | |||||
| } | |||||
| out = out << 32 | reg; | |||||
| return out; | |||||
| # else | |||||
| abort(); // whut | |||||
| # endif | |||||
| } else { | |||||
| tries = 0; | |||||
| } | |||||
| if (abort_on_fail && !tries) { | |||||
| abort(); | |||||
| } | |||||
| return out; | |||||
| } | |||||
| /* ------------------------------- Vectorized code ------------------------------- */ | /* ------------------------------- Vectorized code ------------------------------- */ | ||||
| #define shuffle(x,i) _mm_shuffle_epi32(x, \ | #define shuffle(x,i) _mm_shuffle_epi32(x, \ | ||||
| i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64) | i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64) | ||||
| @@ -278,7 +328,7 @@ crandom_init_from_file( | |||||
| return err ? err : -1; | return err ? err : -1; | ||||
| } | } | ||||
| bzero(state->buffer, 96); | |||||
| memset(state->buffer, 0, 96); | |||||
| state->magic = CRANDOM_MAGIC; | state->magic = CRANDOM_MAGIC; | ||||
| state->reseeds_mandatory = reseeds_mandatory; | state->reseeds_mandatory = reseeds_mandatory; | ||||
| @@ -292,7 +342,7 @@ crandom_init_from_buffer( | |||||
| const char initial_seed[32] | const char initial_seed[32] | ||||
| ) { | ) { | ||||
| memcpy(state->seed, initial_seed, 32); | memcpy(state->seed, initial_seed, 32); | ||||
| bzero(state->buffer, 96); | |||||
| memset(state->buffer, 0, 96); | |||||
| state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0; | state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0; | ||||
| state->randomfd = -1; | state->randomfd = -1; | ||||
| state->magic = CRANDOM_MAGIC; | state->magic = CRANDOM_MAGIC; | ||||
| @@ -305,7 +355,9 @@ crandom_generate( | |||||
| unsigned long long length | unsigned long long length | ||||
| ) { | ) { | ||||
| /* the generator isn't seeded; maybe they ignored the return value of init_from_file */ | /* the generator isn't seeded; maybe they ignored the return value of init_from_file */ | ||||
| if (unlikely(state->magic != CRANDOM_MAGIC)) abort(); | |||||
| if (unlikely(state->magic != CRANDOM_MAGIC)) { | |||||
| abort(); | |||||
| } | |||||
| int ret = 0; | int ret = 0; | ||||
| @@ -313,8 +365,13 @@ crandom_generate( | |||||
| if (unlikely(state->fill <= 0)) { | if (unlikely(state->fill <= 0)) { | ||||
| uint64_t iv = 0; | uint64_t iv = 0; | ||||
| if (state->reseed_interval) { | if (state->reseed_interval) { | ||||
| /* it's nondeterministic, stir in some rdtsc() */ | |||||
| iv = rdtsc(); | |||||
| /* it's nondeterministic, stir in some rdrand() or rdtsc() */ | |||||
| if (HAVE(RDRAND)) { | |||||
| iv = rdrand(0); | |||||
| if (!iv) iv = rdtsc(); | |||||
| } else { | |||||
| iv = rdtsc(); | |||||
| } | |||||
| state->reseed_countdown--; | state->reseed_countdown--; | ||||
| if (unlikely(state->reseed_countdown <= 0)) { | if (unlikely(state->reseed_countdown <= 0)) { | ||||
| @@ -335,11 +392,13 @@ crandom_generate( | |||||
| * is basically over-engineering for caution. Also, the user might ignore | * is basically over-engineering for caution. Also, the user might ignore | ||||
| * the return code, so we still need to fill the request. | * the return code, so we still need to fill the request. | ||||
| * | * | ||||
| * Set reseed_countdown = 1 so we'll try again later. If the user's perf | |||||
| * sucks as a result of ignoring the error code while calling us in a loop, | |||||
| * well, he gets what he deserves. | |||||
| * Set reseed_countdown = 1 so we'll try again later. If the user's | |||||
| * performance sucks as a result of ignoring the error code while calling | |||||
| * us in a loop, well, that's life. | |||||
| */ | */ | ||||
| if (state->reseeds_mandatory) abort(); | |||||
| if (state->reseeds_mandatory) { | |||||
| abort(); | |||||
| } | |||||
| ret = errno; | ret = errno; | ||||
| if (ret == 0) ret = -1; | if (ret == 0) ret = -1; | ||||
| @@ -361,7 +420,7 @@ crandom_generate( | |||||
| unsigned long long copy = (length > state->fill) ? state->fill : length; | unsigned long long copy = (length > state->fill) ? state->fill : length; | ||||
| state->fill -= copy; | state->fill -= copy; | ||||
| memcpy(output, state->buffer + state->fill, copy); | memcpy(output, state->buffer + state->fill, copy); | ||||
| bzero(state->buffer + state->fill, copy); | |||||
| memset(state->buffer + state->fill, 0, copy); | |||||
| output += copy; length -= copy; | output += copy; length -= copy; | ||||
| } | } | ||||
| @@ -371,11 +430,13 @@ crandom_generate( | |||||
| void | void | ||||
| crandom_destroy( | crandom_destroy( | ||||
| struct crandom_state_t *state | struct crandom_state_t *state | ||||
| ) { | |||||
| if (state->randomfd) close(state->randomfd); | |||||
| /* Ignore the return value, because what would it mean? | |||||
| * "Your random device, which you were reading over NFS, lost some data"? | |||||
| */ | |||||
| ) { | |||||
| if (state->magic == CRANDOM_MAGIC && state->randomfd) { | |||||
| (void) close(state->randomfd); | |||||
| /* Ignore the return value from close(), because what would it mean? | |||||
| * "Your random device, which you were reading over NFS, lost some data"? | |||||
| */ | |||||
| } | |||||
| bzero(state, sizeof(*state)); | |||||
| memset(state, 0, sizeof(*state)); | |||||
| } | } | ||||
| @@ -3,7 +3,11 @@ | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
| */ | */ | ||||
| /* A miniature version of the (as of yet incomplete) crandom project. */ | |||||
| /** | |||||
| * @file crandom.h | |||||
| * @author Mike Hamburg | |||||
| * @brief A miniature version of the (as of yet incomplete) crandom project. | |||||
| */ | |||||
| #ifndef __GOLDI_CRANDOM_H__ | #ifndef __GOLDI_CRANDOM_H__ | ||||
| #define __GOLDI_CRANDOM_H__ 1 | #define __GOLDI_CRANDOM_H__ 1 | ||||
| @@ -16,7 +20,14 @@ | |||||
| #include <strings.h> /* for bzero */ | #include <strings.h> /* for bzero */ | ||||
| #include <unistd.h> /* for read */ | #include <unistd.h> /* for read */ | ||||
| /** | |||||
| * @brief The state of a crandom generator. | |||||
| * | |||||
| * This object is opaque. It is not protected by a lock, and so must | |||||
| * not be accessed by multiple threads at the same time. | |||||
| */ | |||||
| struct crandom_state_t { | struct crandom_state_t { | ||||
| /** @privatesection */ | |||||
| unsigned char seed[32]; | unsigned char seed[32]; | ||||
| unsigned char buffer[96]; | unsigned char buffer[96]; | ||||
| uint64_t ctr; | uint64_t ctr; | ||||
| @@ -32,30 +43,93 @@ struct crandom_state_t { | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| /** | |||||
| * Initialize a crandom state from the chosen file. | |||||
| * | |||||
| * This function initializes a state from a given state file, or | |||||
| * from a random device (eg. /dev/random or /dev/urandom). | |||||
| * | |||||
| * You must check the return value of this function. | |||||
| * | |||||
| * @param [out] state The crandom state variable to initalize. | |||||
| * @param [in] filename The name of the seed file or random device. | |||||
| * @param [in] reseed_interval The number of 96-byte blocks which can be | |||||
| * generated without reseeding. Suggest 10000. | |||||
| * @param [in] reseeds_mandatory If nonzero, call abort() if a reseed fails. | |||||
| * Suggest 1. | |||||
| * | |||||
| * @retval 0 Success. | |||||
| * @retval Nonzero An error to be interpreted by strerror(). | |||||
| */ | |||||
| int | int | ||||
| crandom_init_from_file( | |||||
| crandom_init_from_file ( | |||||
| struct crandom_state_t *state, | struct crandom_state_t *state, | ||||
| const char *filename, | const char *filename, | ||||
| int reseed_interval, | int reseed_interval, | ||||
| int reseeds_mandatory | int reseeds_mandatory | ||||
| ) __attribute__((warn_unused_result)); | ) __attribute__((warn_unused_result)); | ||||
| /** | |||||
| * Initialize a crandom state from a buffer, for deterministic operation. | |||||
| * | |||||
| * This function is used to initialize a crandom state deterministically, | |||||
| * mainly for testing purposes. It can also be used to expand a secret | |||||
| * random value deterministically. | |||||
| * | |||||
| * @warning The crandom implementation is not guaranteed to be stable. | |||||
| * That is, a later release might produce a different random stream from | |||||
| * the same seed. | |||||
| * | |||||
| * @param [out] state The crandom state variable to initalize. | |||||
| * @param [in] initial_seed The seed value. | |||||
| */ | |||||
| void | void | ||||
| crandom_init_from_buffer( | |||||
| crandom_init_from_buffer ( | |||||
| struct crandom_state_t *state, | struct crandom_state_t *state, | ||||
| const char initial_seed[32] | const char initial_seed[32] | ||||
| ); | ); | ||||
| /* TODO : attribute warn for not checking return type? */ | |||||
| /** | |||||
| * Fill the output buffer with random data. | |||||
| * | |||||
| * This function uses the given crandom state to produce pseudorandom data | |||||
| * in the output buffer. | |||||
| * | |||||
| * This function may perform reads from the state's random device if it needs | |||||
| * to reseed. This could block if that file is a blocking source, such as | |||||
| * a pipe or /dev/random on Linux. If reseeding fails and the state has | |||||
| * reseeds_mandatory set, this function will call abort(). Otherwise, it will | |||||
| * return an error code, but it will still randomize the buffer. | |||||
| * | |||||
| * If called on a corrupted, uninitialized or destroyed state, this function | |||||
| * will abort(). | |||||
| * | |||||
| * @warning This function is not thread-safe with respect to the state. Don't | |||||
| * call it from multiple threads with the same state at the same time. | |||||
| * | |||||
| * @param [inout] state The crandom state to use for generation. | |||||
| * @param [out] output The buffer to fill with random data. | |||||
| * @param [in] length The length of the buffer. | |||||
| * | |||||
| * @retval 0 Success. | |||||
| * @retval Nonezero A non-mandatory reseed operation failed. | |||||
| */ | |||||
| int | int | ||||
| crandom_generate( | |||||
| crandom_generate ( | |||||
| struct crandom_state_t *state, | struct crandom_state_t *state, | ||||
| unsigned char *output, | unsigned char *output, | ||||
| unsigned long long length | unsigned long long length | ||||
| ); | ); | ||||
| /** | |||||
| * Destroy the random state. Further calls to crandom_generate() on that state | |||||
| * will abort(). | |||||
| * | |||||
| * @param [inout] state The state to be destroyed. | |||||
| */ | |||||
| void | void | ||||
| crandom_destroy( | |||||
| crandom_destroy ( | |||||
| struct crandom_state_t *state | struct crandom_state_t *state | ||||
| ); | ); | ||||
| @@ -1,8 +1,13 @@ | |||||
| /* Copyright (c) 2014 Cryptography Research, Inc. | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| /** | |||||
| * @cond internal | |||||
| * @file ec_point.c | |||||
| * @copyright | |||||
| * Copyright (c) 2014 Cryptography Research, Inc. \n | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| * @author Mike Hamburg | |||||
| * @warning This file was automatically generated. | |||||
| */ | */ | ||||
| /* This file was generated with the assistance of a tool written in SAGE. */ | |||||
| #include "ec_point.h" | #include "ec_point.h" | ||||
| @@ -51,7 +56,7 @@ p448_inverse ( | |||||
| } | } | ||||
| void | void | ||||
| p448_tw_extensible_add_niels ( | |||||
| add_tw_niels_to_tw_extensible ( | |||||
| struct tw_extensible_t* d, | struct tw_extensible_t* d, | ||||
| const struct tw_niels_t* e | const struct tw_niels_t* e | ||||
| ) { | ) { | ||||
| @@ -75,18 +80,53 @@ p448_tw_extensible_add_niels ( | |||||
| } | } | ||||
| void | void | ||||
| p448_tw_extensible_add_pniels ( | |||||
| sub_tw_niels_from_tw_extensible ( | |||||
| struct tw_extensible_t* d, | |||||
| const struct tw_niels_t* e | |||||
| ) { | |||||
| struct p448_t L0, L1; | |||||
| p448_bias ( &d->y, 2 ); | |||||
| p448_bias ( &d->z, 2 ); | |||||
| p448_sub ( &L1, &d->y, &d->x ); | |||||
| p448_mul ( &L0, &e->b, &L1 ); | |||||
| p448_add ( &L1, &d->x, &d->y ); | |||||
| p448_mul ( &d->y, &e->a, &L1 ); | |||||
| p448_bias ( &d->y, 2 ); | |||||
| p448_mul ( &L1, &d->u, &d->t ); | |||||
| p448_mul ( &d->x, &e->c, &L1 ); | |||||
| p448_add ( &d->u, &L0, &d->y ); | |||||
| p448_sub ( &d->t, &d->y, &L0 ); | |||||
| p448_add ( &d->y, &d->x, &d->z ); | |||||
| p448_sub ( &L0, &d->z, &d->x ); | |||||
| p448_mul ( &d->z, &L0, &d->y ); | |||||
| p448_mul ( &d->x, &d->y, &d->t ); | |||||
| p448_mul ( &d->y, &L0, &d->u ); | |||||
| } | |||||
| void | |||||
| add_tw_pniels_to_tw_extensible ( | |||||
| struct tw_extensible_t* e, | struct tw_extensible_t* e, | ||||
| const struct tw_pniels_t* a | const struct tw_pniels_t* a | ||||
| ) { | ) { | ||||
| struct p448_t L0; | struct p448_t L0; | ||||
| p448_mul ( &L0, &e->z, &a->z ); | p448_mul ( &L0, &e->z, &a->z ); | ||||
| p448_copy ( &e->z, &L0 ); | p448_copy ( &e->z, &L0 ); | ||||
| p448_tw_extensible_add_niels( e, &a->n ); | |||||
| add_tw_niels_to_tw_extensible( e, &a->n ); | |||||
| } | } | ||||
| void | void | ||||
| p448_tw_extensible_double ( | |||||
| sub_tw_pniels_from_tw_extensible ( | |||||
| struct tw_extensible_t* e, | |||||
| const struct tw_pniels_t* a | |||||
| ) { | |||||
| struct p448_t L0; | |||||
| p448_mul ( &L0, &e->z, &a->z ); | |||||
| p448_copy ( &e->z, &L0 ); | |||||
| sub_tw_niels_from_tw_extensible( e, &a->n ); | |||||
| } | |||||
| void | |||||
| double_tw_extensible ( | |||||
| struct tw_extensible_t* a | struct tw_extensible_t* a | ||||
| ) { | ) { | ||||
| struct p448_t L0, L1, L2; | struct p448_t L0, L1, L2; | ||||
| @@ -109,7 +149,7 @@ p448_tw_extensible_double ( | |||||
| } | } | ||||
| void | void | ||||
| p448_extensible_double ( | |||||
| double_extensible ( | |||||
| struct extensible_t* a | struct extensible_t* a | ||||
| ) { | ) { | ||||
| struct p448_t L0, L1, L2; | struct p448_t L0, L1, L2; | ||||
| @@ -132,7 +172,7 @@ p448_extensible_double ( | |||||
| } | } | ||||
| void | void | ||||
| p448_isogeny_un_to_tw ( | |||||
| twist_and_double ( | |||||
| struct tw_extensible_t* b, | struct tw_extensible_t* b, | ||||
| const struct extensible_t* a | const struct extensible_t* a | ||||
| ) { | ) { | ||||
| @@ -156,7 +196,7 @@ p448_isogeny_un_to_tw ( | |||||
| } | } | ||||
| void | void | ||||
| p448_isogeny_tw_to_un ( | |||||
| untwist_and_double ( | |||||
| struct extensible_t* b, | struct extensible_t* b, | ||||
| const struct tw_extensible_t* a | const struct tw_extensible_t* a | ||||
| ) { | ) { | ||||
| @@ -269,7 +309,7 @@ convert_tw_niels_to_tw_extensible ( | |||||
| } | } | ||||
| void | void | ||||
| p448_montgomery_step ( | |||||
| montgomery_step ( | |||||
| struct montgomery_t* a | struct montgomery_t* a | ||||
| ) { | ) { | ||||
| struct p448_t L0, L1; | struct p448_t L0, L1; | ||||
| @@ -299,7 +339,7 @@ p448_montgomery_step ( | |||||
| } | } | ||||
| void | void | ||||
| p448_montgomery_serialize ( | |||||
| serialize_montgomery ( | |||||
| struct p448_t* sign, | struct p448_t* sign, | ||||
| struct p448_t* ser, | struct p448_t* ser, | ||||
| const struct montgomery_t* a, | const struct montgomery_t* a, | ||||
| @@ -330,7 +370,7 @@ p448_montgomery_serialize ( | |||||
| } | } | ||||
| void | void | ||||
| extensible_serialize ( | |||||
| serialize_extensible ( | |||||
| struct p448_t* b, | struct p448_t* b, | ||||
| const struct extensible_t* a | const struct extensible_t* a | ||||
| ) { | ) { | ||||
| @@ -350,37 +390,70 @@ extensible_serialize ( | |||||
| } | } | ||||
| void | void | ||||
| isogeny_and_serialize ( | |||||
| untwist_and_double_and_serialize ( | |||||
| struct p448_t* b, | struct p448_t* b, | ||||
| const struct tw_extensible_t* a | const struct tw_extensible_t* a | ||||
| ) { | ) { | ||||
| struct p448_t L0, L1, L2, L3; | struct p448_t L0, L1, L2, L3; | ||||
| p448_mul ( &L3, &a->y, &a->x ); | p448_mul ( &L3, &a->y, &a->x ); | ||||
| p448_add ( &L1, &a->y, &a->x ); | |||||
| p448_sqr ( b, &L1 ); | |||||
| p448_add ( b, &a->y, &a->x ); | |||||
| p448_sqr ( &L1, b ); | |||||
| p448_add ( &L2, &L3, &L3 ); | p448_add ( &L2, &L3, &L3 ); | ||||
| p448_sub ( &L1, b, &L2 ); | |||||
| p448_bias ( &L1, 3 ); | |||||
| p448_sub ( b, &L1, &L2 ); | |||||
| p448_bias ( b, 3 ); | |||||
| p448_sqr ( &L2, &a->z ); | p448_sqr ( &L2, &a->z ); | ||||
| p448_sqr ( b, &L2 ); | |||||
| p448_add ( &L2, &L1, &L1 ); | |||||
| p448_mulw ( &L1, &L2, 39082 ); | |||||
| p448_neg ( &L2, &L1 ); | |||||
| p448_sqr ( &L1, &L2 ); | |||||
| p448_add ( &L2, b, b ); | |||||
| p448_mulw ( b, &L2, 39082 ); | |||||
| p448_neg ( &L2, b ); | |||||
| p448_bias ( &L2, 2 ); | p448_bias ( &L2, 2 ); | ||||
| p448_mulw ( &L0, &L2, 39082 ); | p448_mulw ( &L0, &L2, 39082 ); | ||||
| p448_neg ( &L1, &L0 ); | |||||
| p448_bias ( &L1, 2 ); | |||||
| p448_neg ( b, &L0 ); | |||||
| p448_bias ( b, 2 ); | |||||
| p448_mul ( &L0, &L2, &L1 ); | |||||
| p448_mul ( &L2, b, &L0 ); | |||||
| p448_isr ( &L0, &L2 ); | |||||
| p448_mul ( &L1, b, &L0 ); | |||||
| p448_sqr ( b, &L0 ); | |||||
| p448_mul ( &L0, &L2, b ); | p448_mul ( &L0, &L2, b ); | ||||
| p448_mul ( b, &L1, &L0 ); | |||||
| p448_isr ( &L0, b ); | |||||
| p448_mul ( &L2, &L1, &L0 ); | |||||
| p448_sqr ( &L1, &L0 ); | |||||
| p448_mul ( &L0, b, &L1 ); | |||||
| p448_mul ( b, &L2, &L3 ); | |||||
| p448_mul ( b, &L1, &L3 ); | |||||
| } | |||||
| void | |||||
| twist ( | |||||
| struct tw_extensible_t* b, | |||||
| const struct extensible_t* a | |||||
| ) { | |||||
| mask_t L0, L1; | |||||
| p448_sqr ( &b->y, &a->z ); | |||||
| p448_sqr ( &b->z, &a->x ); | |||||
| p448_sub ( &b->u, &b->y, &b->z ); | |||||
| p448_bias ( &b->u, 2 ); | |||||
| p448_sub ( &b->z, &a->z, &a->x ); | |||||
| p448_bias ( &b->z, 2 ); | |||||
| p448_mul ( &b->y, &b->z, &a->y ); | |||||
| p448_sub ( &b->z, &a->z, &a->y ); | |||||
| p448_bias ( &b->z, 2 ); | |||||
| p448_mul ( &b->x, &b->z, &b->y ); | |||||
| p448_mul ( &b->t, &b->x, &b->u ); | |||||
| p448_mul ( &b->y, &b->x, &b->t ); | |||||
| p448_isr ( &b->t, &b->y ); | |||||
| p448_mul ( &b->u, &b->x, &b->t ); | |||||
| p448_sqr ( &b->x, &b->t ); | |||||
| p448_mul ( &b->t, &b->y, &b->x ); | |||||
| p448_mul ( &b->x, &a->x, &b->u ); | |||||
| p448_mul ( &b->y, &a->y, &b->u ); | |||||
| L1 = p448_is_zero( &b->z ); | |||||
| L0 = - L1; | |||||
| p448_addw ( &b->y, L0 ); | |||||
| p448_weak_reduce( &b->y ); | |||||
| p448_set_ui( &b->z, 1 ); | |||||
| p448_copy ( &b->t, &b->x ); | |||||
| p448_copy ( &b->u, &b->y ); | |||||
| } | } | ||||
| mask_t | mask_t | ||||
| affine_deserialize ( | |||||
| deserialize_affine ( | |||||
| struct affine_t* a, | struct affine_t* a, | ||||
| const struct p448_t* sz | const struct p448_t* sz | ||||
| ) { | ) { | ||||
| @@ -417,6 +490,57 @@ affine_deserialize ( | |||||
| return p448_is_zero( &L0 ); | return p448_is_zero( &L0 ); | ||||
| } | } | ||||
| mask_t | |||||
| deserialize_and_twist_approx ( | |||||
| struct tw_extensible_t* a, | |||||
| const struct p448_t* sdm1, | |||||
| const struct p448_t* sz | |||||
| ) { | |||||
| struct p448_t L0, L1; | |||||
| p448_sqr ( &a->z, sz ); | |||||
| p448_copy ( &a->y, &a->z ); | |||||
| p448_addw ( &a->y, 1 ); | |||||
| p448_sqr ( &a->x, &a->y ); | |||||
| p448_mulw ( &a->y, &a->x, 39082 ); | |||||
| p448_neg ( &a->x, &a->y ); | |||||
| p448_add ( &a->y, &a->z, &a->z ); | |||||
| p448_bias ( &a->y, 1 ); | |||||
| p448_add ( &a->u, &a->y, &a->y ); | |||||
| p448_add ( &a->y, &a->u, &a->x ); | |||||
| p448_sqr ( &a->x, &a->z ); | |||||
| p448_subw ( &a->x, 1 ); | |||||
| p448_neg ( &a->u, &a->x ); | |||||
| p448_bias ( &a->u, 2 ); | |||||
| p448_mul ( &a->x, sdm1, &a->u ); | |||||
| p448_mul ( &L0, &a->x, &a->y ); | |||||
| p448_mul ( &a->t, &L0, &a->y ); | |||||
| p448_mul ( &a->u, &a->x, &a->t ); | |||||
| p448_mul ( &a->t, &a->u, &L0 ); | |||||
| p448_mul ( &a->y, &a->x, &a->t ); | |||||
| p448_isr ( &L0, &a->y ); | |||||
| p448_mul ( &a->y, &a->u, &L0 ); | |||||
| p448_sqr ( &L1, &L0 ); | |||||
| p448_mul ( &a->u, &a->t, &L1 ); | |||||
| p448_mul ( &a->t, &a->x, &a->u ); | |||||
| p448_bias ( &a->t, 1 ); | |||||
| p448_add ( &a->x, sz, sz ); | |||||
| p448_mul ( &L0, &a->u, &a->x ); | |||||
| p448_copy ( &a->x, &a->z ); | |||||
| p448_subw ( &a->x, 1 ); | |||||
| p448_neg ( &L1, &a->x ); | |||||
| p448_bias ( &L1, 2 ); | |||||
| p448_mul ( &a->x, &L1, &L0 ); | |||||
| p448_mul ( &L0, &a->u, &a->y ); | |||||
| p448_addw ( &a->z, 1 ); | |||||
| p448_mul ( &a->y, &a->z, &L0 ); | |||||
| p448_subw ( &a->t, 1 ); | |||||
| mask_t ret = p448_is_zero( &a->t ); | |||||
| p448_set_ui( &a->z, 1 ); | |||||
| p448_copy ( &a->t, &a->x ); | |||||
| p448_copy ( &a->u, &a->y ); | |||||
| return ret; | |||||
| } | |||||
| void | void | ||||
| set_identity_extensible ( | set_identity_extensible ( | ||||
| struct extensible_t* a | struct extensible_t* a | ||||
| @@ -452,15 +576,15 @@ eq_affine ( | |||||
| const struct affine_t* a, | const struct affine_t* a, | ||||
| const struct affine_t* b | const struct affine_t* b | ||||
| ) { | ) { | ||||
| mask_t L0, L1; | |||||
| struct p448_t L2; | |||||
| p448_sub ( &L2, &a->x, &b->x ); | |||||
| p448_bias ( &L2, 2 ); | |||||
| L1 = p448_is_zero( &L2 ); | |||||
| p448_sub ( &L2, &a->y, &b->y ); | |||||
| p448_bias ( &L2, 2 ); | |||||
| L0 = p448_is_zero( &L2 ); | |||||
| return L1 & L0; | |||||
| mask_t L1, L2; | |||||
| struct p448_t L0; | |||||
| p448_sub ( &L0, &a->x, &b->x ); | |||||
| p448_bias ( &L0, 2 ); | |||||
| L2 = p448_is_zero( &L0 ); | |||||
| p448_sub ( &L0, &a->y, &b->y ); | |||||
| p448_bias ( &L0, 2 ); | |||||
| L1 = p448_is_zero( &L0 ); | |||||
| return L2 & L1; | |||||
| } | } | ||||
| mask_t | mask_t | ||||
| @@ -468,19 +592,19 @@ eq_extensible ( | |||||
| const struct extensible_t* a, | const struct extensible_t* a, | ||||
| const struct extensible_t* b | const struct extensible_t* b | ||||
| ) { | ) { | ||||
| mask_t L0, L1; | |||||
| struct p448_t L2, L3, L4; | |||||
| p448_mul ( &L4, &b->z, &a->x ); | |||||
| p448_mul ( &L3, &a->z, &b->x ); | |||||
| p448_sub ( &L2, &L4, &L3 ); | |||||
| p448_bias ( &L2, 2 ); | |||||
| L1 = p448_is_zero( &L2 ); | |||||
| p448_mul ( &L4, &b->z, &a->y ); | |||||
| p448_mul ( &L3, &a->z, &b->y ); | |||||
| p448_sub ( &L2, &L4, &L3 ); | |||||
| p448_bias ( &L2, 2 ); | |||||
| L0 = p448_is_zero( &L2 ); | |||||
| return L1 & L0; | |||||
| mask_t L3, L4; | |||||
| struct p448_t L0, L1, L2; | |||||
| p448_mul ( &L2, &b->z, &a->x ); | |||||
| p448_mul ( &L1, &a->z, &b->x ); | |||||
| p448_sub ( &L0, &L2, &L1 ); | |||||
| p448_bias ( &L0, 2 ); | |||||
| L4 = p448_is_zero( &L0 ); | |||||
| p448_mul ( &L2, &b->z, &a->y ); | |||||
| p448_mul ( &L1, &a->z, &b->y ); | |||||
| p448_sub ( &L0, &L2, &L1 ); | |||||
| p448_bias ( &L0, 2 ); | |||||
| L3 = p448_is_zero( &L0 ); | |||||
| return L4 & L3; | |||||
| } | } | ||||
| mask_t | mask_t | ||||
| @@ -488,19 +612,19 @@ eq_tw_extensible ( | |||||
| const struct tw_extensible_t* a, | const struct tw_extensible_t* a, | ||||
| const struct tw_extensible_t* b | const struct tw_extensible_t* b | ||||
| ) { | ) { | ||||
| mask_t L0, L1; | |||||
| struct p448_t L2, L3, L4; | |||||
| p448_mul ( &L4, &b->z, &a->x ); | |||||
| p448_mul ( &L3, &a->z, &b->x ); | |||||
| p448_sub ( &L2, &L4, &L3 ); | |||||
| p448_bias ( &L2, 2 ); | |||||
| L1 = p448_is_zero( &L2 ); | |||||
| p448_mul ( &L4, &b->z, &a->y ); | |||||
| p448_mul ( &L3, &a->z, &b->y ); | |||||
| p448_sub ( &L2, &L4, &L3 ); | |||||
| p448_bias ( &L2, 2 ); | |||||
| L0 = p448_is_zero( &L2 ); | |||||
| return L1 & L0; | |||||
| mask_t L3, L4; | |||||
| struct p448_t L0, L1, L2; | |||||
| p448_mul ( &L2, &b->z, &a->x ); | |||||
| p448_mul ( &L1, &a->z, &b->x ); | |||||
| p448_sub ( &L0, &L2, &L1 ); | |||||
| p448_bias ( &L0, 2 ); | |||||
| L4 = p448_is_zero( &L0 ); | |||||
| p448_mul ( &L2, &b->z, &a->y ); | |||||
| p448_mul ( &L1, &a->z, &b->y ); | |||||
| p448_sub ( &L0, &L2, &L1 ); | |||||
| p448_bias ( &L0, 2 ); | |||||
| L3 = p448_is_zero( &L0 ); | |||||
| return L4 & L3; | |||||
| } | } | ||||
| void | void | ||||
| @@ -559,7 +683,7 @@ elligator_2s_inject ( | |||||
| } | } | ||||
| mask_t | mask_t | ||||
| p448_affine_validate ( | |||||
| validate_affine ( | |||||
| const struct affine_t* a | const struct affine_t* a | ||||
| ) { | ) { | ||||
| struct p448_t L0, L1, L2, L3; | struct p448_t L0, L1, L2, L3; | ||||
| @@ -577,45 +701,45 @@ p448_affine_validate ( | |||||
| } | } | ||||
| mask_t | mask_t | ||||
| p448_tw_extensible_validate ( | |||||
| validate_tw_extensible ( | |||||
| const struct tw_extensible_t* ext | const struct tw_extensible_t* ext | ||||
| ) { | ) { | ||||
| mask_t L0, L1; | |||||
| struct p448_t L2, L3, L4, L5; | |||||
| mask_t L4, L5; | |||||
| struct p448_t L0, L1, L2, L3; | |||||
| /* | /* | ||||
| * Check invariant: | * Check invariant: | ||||
| * 0 = -x*y + z*t*u | * 0 = -x*y + z*t*u | ||||
| */ | */ | ||||
| p448_mul ( &L2, &ext->t, &ext->u ); | |||||
| p448_mul ( &L4, &ext->z, &L2 ); | |||||
| p448_addw ( &L4, 0 ); | |||||
| p448_mul ( &L3, &ext->x, &ext->y ); | |||||
| p448_neg ( &L2, &L3 ); | |||||
| p448_add ( &L3, &L2, &L4 ); | |||||
| p448_bias ( &L3, 2 ); | |||||
| L1 = p448_is_zero( &L3 ); | |||||
| p448_mul ( &L0, &ext->t, &ext->u ); | |||||
| p448_mul ( &L2, &ext->z, &L0 ); | |||||
| p448_addw ( &L2, 0 ); | |||||
| p448_mul ( &L1, &ext->x, &ext->y ); | |||||
| p448_neg ( &L0, &L1 ); | |||||
| p448_add ( &L1, &L0, &L2 ); | |||||
| p448_bias ( &L1, 2 ); | |||||
| L5 = p448_is_zero( &L1 ); | |||||
| /* | /* | ||||
| * Check invariant: | * Check invariant: | ||||
| * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 | * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 | ||||
| */ | */ | ||||
| p448_sqr ( &L4, &ext->y ); | |||||
| p448_neg ( &L2, &L4 ); | |||||
| p448_addw ( &L2, 0 ); | |||||
| p448_sqr ( &L3, &ext->x ); | |||||
| p448_bias ( &L3, 4 ); | |||||
| p448_add ( &L4, &L3, &L2 ); | |||||
| p448_sqr ( &L5, &ext->u ); | |||||
| p448_sqr ( &L3, &ext->t ); | |||||
| p448_mul ( &L2, &L3, &L5 ); | |||||
| p448_mulw ( &L3, &L2, 39081 ); | |||||
| p448_neg ( &L5, &L3 ); | |||||
| p448_add ( &L3, &L5, &L4 ); | |||||
| p448_neg ( &L5, &L2 ); | |||||
| p448_add ( &L4, &L5, &L3 ); | |||||
| p448_sqr ( &L3, &ext->z ); | |||||
| p448_add ( &L2, &L3, &L4 ); | |||||
| L0 = p448_is_zero( &L2 ); | |||||
| return L1 & L0; | |||||
| p448_sqr ( &L2, &ext->y ); | |||||
| p448_neg ( &L0, &L2 ); | |||||
| p448_addw ( &L0, 0 ); | |||||
| p448_sqr ( &L1, &ext->x ); | |||||
| p448_bias ( &L1, 4 ); | |||||
| p448_add ( &L2, &L1, &L0 ); | |||||
| p448_sqr ( &L3, &ext->u ); | |||||
| p448_sqr ( &L1, &ext->t ); | |||||
| p448_mul ( &L0, &L1, &L3 ); | |||||
| p448_mulw ( &L1, &L0, 39081 ); | |||||
| p448_neg ( &L3, &L1 ); | |||||
| p448_add ( &L1, &L3, &L2 ); | |||||
| p448_neg ( &L3, &L0 ); | |||||
| p448_add ( &L2, &L3, &L1 ); | |||||
| p448_sqr ( &L1, &ext->z ); | |||||
| p448_add ( &L0, &L1, &L2 ); | |||||
| L4 = p448_is_zero( &L0 ); | |||||
| return L5 & L4; | |||||
| } | } | ||||
| @@ -1,10 +1,14 @@ | |||||
| /* Copyright (c) 2014 Cryptography Research, Inc. | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| /** | |||||
| * @file ec_point.h | |||||
| * @copyright | |||||
| * Copyright (c) 2014 Cryptography Research, Inc. \n | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| * @author Mike Hamburg | |||||
| * @warning This file was automatically generated. | |||||
| */ | */ | ||||
| /* This file was generated with the assistance of a tool written in SAGE. */ | |||||
| #ifndef __CC_INCLUDED_P448_EDWARDS_H__ | |||||
| #define __CC_INCLUDED_P448_EDWARDS_H__ | |||||
| #ifndef __CC_INCLUDED_EC_POINT_H__ | |||||
| #define __CC_INCLUDED_EC_POINT_H__ | |||||
| #include "p448.h" | #include "p448.h" | ||||
| @@ -12,28 +16,28 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| /* | |||||
| /** | |||||
| * Affine point on an Edwards curve. | * Affine point on an Edwards curve. | ||||
| */ | */ | ||||
| struct affine_t { | struct affine_t { | ||||
| struct p448_t x, y; | struct p448_t x, y; | ||||
| }; | }; | ||||
| /* | |||||
| /** | |||||
| * Affine point on a twisted Edwards curve. | * Affine point on a twisted Edwards curve. | ||||
| */ | */ | ||||
| struct tw_affine_t { | struct tw_affine_t { | ||||
| struct p448_t x, y; | struct p448_t x, y; | ||||
| }; | }; | ||||
| /* | |||||
| /** | |||||
| * Montgomery buffer. | * Montgomery buffer. | ||||
| */ | */ | ||||
| struct montgomery_t { | struct montgomery_t { | ||||
| struct p448_t z0, xd, zd, xa, za; | struct p448_t z0, xd, zd, xa, za; | ||||
| }; | }; | ||||
| /* | |||||
| /** | |||||
| * Extensible coordinates for Edwards curves, suitable for | * Extensible coordinates for Edwards curves, suitable for | ||||
| * accumulators. | * accumulators. | ||||
| * | * | ||||
| @@ -55,7 +59,7 @@ struct extensible_t { | |||||
| struct p448_t x, y, z, t, u; | struct p448_t x, y, z, t, u; | ||||
| }; | }; | ||||
| /* | |||||
| /** | |||||
| * Extensible coordinates for twisted Edwards curves, | * Extensible coordinates for twisted Edwards curves, | ||||
| * suitable for accumulators. | * suitable for accumulators. | ||||
| */ | */ | ||||
| @@ -63,16 +67,18 @@ struct tw_extensible_t { | |||||
| struct p448_t x, y, z, t, u; | struct p448_t x, y, z, t, u; | ||||
| }; | }; | ||||
| /* | |||||
| * Niels coordinates for twisted Edwards curves. Good for | |||||
| * mixed readdition; suitable for fixed tables. | |||||
| /** | |||||
| * Niels coordinates for twisted Edwards curves. | |||||
| * | |||||
| * Good for mixed readdition; suitable for fixed tables. | |||||
| */ | */ | ||||
| struct tw_niels_t { | struct tw_niels_t { | ||||
| struct p448_t a, b, c; | struct p448_t a, b, c; | ||||
| }; | }; | ||||
| /* | |||||
| /** | |||||
| * Projective niels coordinates for twisted Edwards curves. | * Projective niels coordinates for twisted Edwards curves. | ||||
| * | |||||
| * Good for readdition; suitable for temporary tables. | * Good for readdition; suitable for temporary tables. | ||||
| */ | */ | ||||
| struct tw_pniels_t { | struct tw_pniels_t { | ||||
| @@ -81,7 +87,7 @@ struct tw_pniels_t { | |||||
| }; | }; | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -90,7 +96,7 @@ copy_affine ( | |||||
| const struct affine_t* ds | const struct affine_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -99,7 +105,7 @@ copy_tw_affine ( | |||||
| const struct tw_affine_t* ds | const struct tw_affine_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -108,7 +114,7 @@ copy_montgomery ( | |||||
| const struct montgomery_t* ds | const struct montgomery_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -117,7 +123,7 @@ copy_extensible ( | |||||
| const struct extensible_t* ds | const struct extensible_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -126,7 +132,7 @@ copy_tw_extensible ( | |||||
| const struct tw_extensible_t* ds | const struct tw_extensible_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -135,7 +141,7 @@ copy_tw_niels ( | |||||
| const struct tw_niels_t* ds | const struct tw_niels_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Auto-generated copy method. | * Auto-generated copy method. | ||||
| */ | */ | ||||
| static __inline__ void | static __inline__ void | ||||
| @@ -144,7 +150,7 @@ copy_tw_pniels ( | |||||
| const struct tw_pniels_t* ds | const struct tw_pniels_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /* | |||||
| /** | |||||
| * Returns 1/sqrt(+- x). | * Returns 1/sqrt(+- x). | ||||
| * | * | ||||
| * The Legendre symbol of the result is the same as that of the | * The Legendre symbol of the result is the same as that of the | ||||
| @@ -158,7 +164,7 @@ p448_isr ( | |||||
| const struct p448_t* x | const struct p448_t* x | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * Returns 1/x. | * Returns 1/x. | ||||
| * | * | ||||
| * If x=0, returns 0. | * If x=0, returns 0. | ||||
| @@ -169,56 +175,80 @@ p448_inverse ( | |||||
| const struct p448_t* x | const struct p448_t* x | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * Add two points on a twisted Edwards curve, one in Extensible form | |||||
| * and the other in half-Niels form. | |||||
| */ | |||||
| void | |||||
| add_tw_niels_to_tw_extensible ( | |||||
| struct tw_extensible_t* d, | |||||
| const struct tw_niels_t* e | |||||
| ); | |||||
| /** | |||||
| * Add two points on a twisted Edwards curve, one in Extensible form | * Add two points on a twisted Edwards curve, one in Extensible form | ||||
| * and the other in half-Niels form. | * and the other in half-Niels form. | ||||
| */ | */ | ||||
| void | void | ||||
| p448_tw_extensible_add_niels ( | |||||
| sub_tw_niels_from_tw_extensible ( | |||||
| struct tw_extensible_t* d, | struct tw_extensible_t* d, | ||||
| const struct tw_niels_t* e | const struct tw_niels_t* e | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * Add two points on a twisted Edwards curve, one in Extensible form | * Add two points on a twisted Edwards curve, one in Extensible form | ||||
| * and the other in projective Niels form. | * and the other in projective Niels form. | ||||
| */ | */ | ||||
| void | void | ||||
| p448_tw_extensible_add_pniels ( | |||||
| add_tw_pniels_to_tw_extensible ( | |||||
| struct tw_extensible_t* e, | struct tw_extensible_t* e, | ||||
| const struct tw_pniels_t* a | const struct tw_pniels_t* a | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * Add two points on a twisted Edwards curve, one in Extensible form | |||||
| * and the other in projective Niels form. | |||||
| */ | |||||
| void | |||||
| sub_tw_pniels_from_tw_extensible ( | |||||
| struct tw_extensible_t* e, | |||||
| const struct tw_pniels_t* a | |||||
| ); | |||||
| /** | |||||
| * Double a point on a twisted Edwards curve, in "extensible" coordinates. | * Double a point on a twisted Edwards curve, in "extensible" coordinates. | ||||
| */ | */ | ||||
| void | void | ||||
| p448_tw_extensible_double ( | |||||
| double_tw_extensible ( | |||||
| struct tw_extensible_t* a | struct tw_extensible_t* a | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * Double a point on an Edwards curve, in "extensible" coordinates. | * Double a point on an Edwards curve, in "extensible" coordinates. | ||||
| */ | */ | ||||
| void | void | ||||
| p448_extensible_double ( | |||||
| double_extensible ( | |||||
| struct extensible_t* a | struct extensible_t* a | ||||
| ); | ); | ||||
| /* | |||||
| * 4-isogeny from untwisted to twisted. | |||||
| /** | |||||
| * Double a point, and transfer it to the twisted curve. | |||||
| * | |||||
| * That is, apply the 4-isogeny. | |||||
| */ | */ | ||||
| void | void | ||||
| p448_isogeny_un_to_tw ( | |||||
| twist_and_double ( | |||||
| struct tw_extensible_t* b, | struct tw_extensible_t* b, | ||||
| const struct extensible_t* a | const struct extensible_t* a | ||||
| ); | ); | ||||
| /* | |||||
| * Dual 4-isogeny from twisted to untwisted. | |||||
| /** | |||||
| * Double a point, and transfer it to the untwisted curve. | |||||
| * | |||||
| * That is, apply the dual isogeny. | |||||
| */ | */ | ||||
| void | void | ||||
| p448_isogeny_tw_to_un ( | |||||
| untwist_and_double ( | |||||
| struct extensible_t* b, | struct extensible_t* b, | ||||
| const struct tw_extensible_t* a | const struct tw_extensible_t* a | ||||
| ); | ); | ||||
| @@ -260,49 +290,82 @@ convert_tw_niels_to_tw_extensible ( | |||||
| ); | ); | ||||
| void | void | ||||
| p448_montgomery_step ( | |||||
| montgomery_step ( | |||||
| struct montgomery_t* a | struct montgomery_t* a | ||||
| ); | ); | ||||
| void | void | ||||
| p448_montgomery_serialize ( | |||||
| serialize_montgomery ( | |||||
| struct p448_t* sign, | struct p448_t* sign, | ||||
| struct p448_t* ser, | struct p448_t* ser, | ||||
| const struct montgomery_t* a, | const struct montgomery_t* a, | ||||
| const struct p448_t* sbz | const struct p448_t* sbz | ||||
| ); | ); | ||||
| /* | |||||
| * Serialize a point on an Edwards curve | |||||
| * The serialized form would be sqrt((z-y)/(z+y)) with sign of xz | |||||
| /** | |||||
| * Serialize a point on an Edwards curve. | |||||
| * | |||||
| * The serialized form would be sqrt((z-y)/(z+y)) with sign of xz. | |||||
| * | |||||
| * It would be on 4y^2/(1-d) = x^3 + 2(1+d)/(1-d) * x^2 + x. | * It would be on 4y^2/(1-d) = x^3 + 2(1+d)/(1-d) * x^2 + x. | ||||
| * | |||||
| * But 4/(1-d) isn't square, so we need to twist it: | * But 4/(1-d) isn't square, so we need to twist it: | ||||
| * | |||||
| * -x is on 4y^2/(d-1) = x^3 + 2(d+1)/(d-1) * x^2 + x | * -x is on 4y^2/(d-1) = x^3 + 2(d+1)/(d-1) * x^2 + x | ||||
| */ | */ | ||||
| void | void | ||||
| extensible_serialize ( | |||||
| serialize_extensible ( | |||||
| struct p448_t* b, | struct p448_t* b, | ||||
| const struct extensible_t* a | const struct extensible_t* a | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * | * | ||||
| */ | */ | ||||
| void | void | ||||
| isogeny_and_serialize ( | |||||
| untwist_and_double_and_serialize ( | |||||
| struct p448_t* b, | struct p448_t* b, | ||||
| const struct tw_extensible_t* a | const struct tw_extensible_t* a | ||||
| ); | ); | ||||
| /* | |||||
| * Deserialize a point to an untwisted affine curve | |||||
| /** | |||||
| * Expensive transfer from untwisted to twisted. Roughly equivalent to halve and isogeny. | |||||
| * Correctly transfers point of order 2. | |||||
| * | |||||
| * Can't have x=+1 (it's not even). There is code to fix the exception that would otherwise | |||||
| * occur at (0,1). | |||||
| * | |||||
| * Input point must be even. | |||||
| */ | |||||
| void | |||||
| twist ( | |||||
| struct tw_extensible_t* b, | |||||
| const struct extensible_t* a | |||||
| ); | |||||
| /** | |||||
| * Deserialize a point to an untwisted affine curve. | |||||
| */ | */ | ||||
| mask_t | mask_t | ||||
| affine_deserialize ( | |||||
| deserialize_affine ( | |||||
| struct affine_t* a, | struct affine_t* a, | ||||
| const struct p448_t* sz | const struct p448_t* sz | ||||
| ); | ); | ||||
| /** | |||||
| * Deserialize a point and transfer it to the twist. | |||||
| * | |||||
| * Not guaranteed to preserve the 4-torsion component. | |||||
| * | |||||
| * Refuses to deserialize +-1, which are the points of order 2. | |||||
| */ | |||||
| mask_t | |||||
| deserialize_and_twist_approx ( | |||||
| struct tw_extensible_t* a, | |||||
| const struct p448_t* sdm1, | |||||
| const struct p448_t* sz | |||||
| ); | |||||
| void | void | ||||
| set_identity_extensible ( | set_identity_extensible ( | ||||
| struct extensible_t* a | struct extensible_t* a | ||||
| @@ -343,17 +406,17 @@ elligator_2s_inject ( | |||||
| ); | ); | ||||
| mask_t | mask_t | ||||
| p448_affine_validate ( | |||||
| validate_affine ( | |||||
| const struct affine_t* a | const struct affine_t* a | ||||
| ); | ); | ||||
| /* | |||||
| /** | |||||
| * Check the invariants for struct tw_extensible_t. | * Check the invariants for struct tw_extensible_t. | ||||
| * PERF: This function was automatically generated | * PERF: This function was automatically generated | ||||
| * with no regard for speed. | * with no regard for speed. | ||||
| */ | */ | ||||
| mask_t | mask_t | ||||
| p448_tw_extensible_validate ( | |||||
| validate_tw_extensible ( | |||||
| const struct tw_extensible_t* ext | const struct tw_extensible_t* ext | ||||
| ); | ); | ||||
| @@ -437,4 +500,4 @@ copy_tw_pniels ( | |||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| #endif /* __CC_INCLUDED_P448_EDWARDS_H__ */ | |||||
| #endif /* __CC_INCLUDED_EC_POINT_H__ */ | |||||
| @@ -1,3 +1,5 @@ | |||||
| _goldilocks_init | _goldilocks_init | ||||
| _goldilocks_keygen | _goldilocks_keygen | ||||
| _goldilocks_shared_secret | _goldilocks_shared_secret | ||||
| _goldilocks_sign | |||||
| _goldilocks_verify | |||||
| @@ -1,11 +1,14 @@ | |||||
| /* Copyright (c) 2014 Cryptography Research, Inc. | /* Copyright (c) 2014 Cryptography Research, Inc. | ||||
| * Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
| */ | */ | ||||
| #include <errno.h> | |||||
| #include "goldilocks.h" | #include "goldilocks.h" | ||||
| #include "ec_point.h" | #include "ec_point.h" | ||||
| #include "scalarmul.h" | #include "scalarmul.h" | ||||
| #include "barrett_field.h" | #include "barrett_field.h" | ||||
| #include "crandom.h" | #include "crandom.h" | ||||
| #include "sha512.h" | |||||
| #ifndef GOLDILOCKS_RANDOM_INIT_FILE | #ifndef GOLDILOCKS_RANDOM_INIT_FILE | ||||
| #define GOLDILOCKS_RANDOM_INIT_FILE "/dev/urandom" | #define GOLDILOCKS_RANDOM_INIT_FILE "/dev/urandom" | ||||
| @@ -28,6 +31,14 @@ const struct affine_t goldilocks_base_point = { | |||||
| {{ 19, 0, 0, 0, 0, 0, 0, 0 }} | {{ 19, 0, 0, 0, 0, 0, 0, 0 }} | ||||
| }; | }; | ||||
| // /* TODO: direct */ | |||||
| // void | |||||
| // transfer_and_serialize(struct p448_t *out, const struct tw_extensible_t *twext) { | |||||
| // struct extensible_t ext; | |||||
| // transfer_tw_to_un(&ext, twext); | |||||
| // serialize_extensible(out, &ext); | |||||
| // } | |||||
| // FIXME: threading | // FIXME: threading | ||||
| // TODO: autogen instead of init | // TODO: autogen instead of init | ||||
| struct { | struct { | ||||
| @@ -37,16 +48,17 @@ struct { | |||||
| } goldilocks_global; | } goldilocks_global; | ||||
| int | int | ||||
| goldilocks_init() { | |||||
| goldilocks_init () { | |||||
| struct extensible_t ext; | struct extensible_t ext; | ||||
| struct tw_extensible_t text; | struct tw_extensible_t text; | ||||
| /* Sanity check: the base point is on the curve. */ | /* Sanity check: the base point is on the curve. */ | ||||
| assert(p448_affine_validate(&goldilocks_base_point)); | |||||
| assert(validate_affine(&goldilocks_base_point)); | |||||
| /* Convert it to twisted Edwards. */ | /* Convert it to twisted Edwards. */ | ||||
| convert_affine_to_extensible(&ext, &goldilocks_base_point); | convert_affine_to_extensible(&ext, &goldilocks_base_point); | ||||
| p448_isogeny_un_to_tw(&text, &ext); | |||||
| twist(&text, &ext); | |||||
| //p448_transfer_un_to_tw(&text, &ext); | |||||
| /* Precompute the tables. */ | /* Precompute the tables. */ | ||||
| precompute_for_combs(goldilocks_global.combs, &text, 5, 5, 18); | precompute_for_combs(goldilocks_global.combs, &text, 5, 5, 18); | ||||
| @@ -58,61 +70,6 @@ goldilocks_init() { | |||||
| GOLDILOCKS_RANDOM_RESEEDS_MANDATORY); | GOLDILOCKS_RANDOM_RESEEDS_MANDATORY); | ||||
| } | } | ||||
| // TODO: move to a better place | |||||
| // TODO: word size | |||||
| void | |||||
| p448_serialize(uint8_t *serial, const struct p448_t *x) { | |||||
| int i,j; | |||||
| p448_t red; | |||||
| p448_copy(&red, x); | |||||
| p448_strong_reduce(&red); | |||||
| for (i=0; i<8; i++) { | |||||
| for (j=0; j<7; j++) { | |||||
| serial[7*i+j] = red.limb[i]; | |||||
| red.limb[i] >>= 8; | |||||
| } | |||||
| assert(red.limb[i] == 0); | |||||
| } | |||||
| } | |||||
| void | |||||
| q448_serialize(uint8_t *serial, const word_t x[7]) { | |||||
| int i,j; | |||||
| for (i=0; i<7; i++) { | |||||
| for (j=0; j<8; j++) { | |||||
| serial[8*i+j] = x[i]>>(8*j); | |||||
| } | |||||
| } | |||||
| } | |||||
| mask_t | |||||
| q448_deserialize(word_t x[7], const uint8_t serial[56]) { | |||||
| int i,j; | |||||
| for (i=0; i<7; i++) { | |||||
| word_t out = 0; | |||||
| for (j=0; j<8; j++) { | |||||
| out |= ((word_t)serial[8*i+j])<<(8*j); | |||||
| } | |||||
| x[i] = out; | |||||
| } | |||||
| // TODO: check for reduction | |||||
| return MASK_SUCCESS; | |||||
| } | |||||
| mask_t | |||||
| p448_deserialize(p448_t *x, const uint8_t serial[56]) { | |||||
| int i,j; | |||||
| for (i=0; i<8; i++) { | |||||
| word_t out = 0; | |||||
| for (j=0; j<7; j++) { | |||||
| out |= ((word_t)serial[7*i+j])<<(8*j); | |||||
| } | |||||
| x->limb[i] = out; | |||||
| } | |||||
| // TODO: check for reduction | |||||
| return MASK_SUCCESS; | |||||
| } | |||||
| static word_t | static word_t | ||||
| q448_lo[4] = { | q448_lo[4] = { | ||||
| 0xdc873d6d54a7bb0dull, | 0xdc873d6d54a7bb0dull, | ||||
| @@ -121,10 +78,22 @@ q448_lo[4] = { | |||||
| 0x000000008335dc16ull | 0x000000008335dc16ull | ||||
| }; | }; | ||||
| static const struct p448_t | |||||
| sqrt_d_minus_1 = {{ | |||||
| 0xd2e21836749f46ull, | |||||
| 0x888db42b4f0179ull, | |||||
| 0x5a189aabdeea38ull, | |||||
| 0x51e65ca6f14c06ull, | |||||
| 0xa49f7b424d9770ull, | |||||
| 0xdcac4628c5f656ull, | |||||
| 0x49443b8748734aull, | |||||
| 0x12fec0c0b25b7aull | |||||
| }}; | |||||
| int | int | ||||
| goldilocks_keygen( | |||||
| uint8_t private[56], | |||||
| uint8_t public[56] | |||||
| goldilocks_keygen ( | |||||
| struct goldilocks_private_key_t *privkey, | |||||
| struct goldilocks_public_key_t *pubkey | |||||
| ) { | ) { | ||||
| // TODO: check for init. Also maybe take CRANDOM object? API... | // TODO: check for init. Also maybe take CRANDOM object? API... | ||||
| word_t sk[448*2/WORD_BITS]; | word_t sk[448*2/WORD_BITS]; | ||||
| @@ -134,35 +103,197 @@ goldilocks_keygen( | |||||
| int ret = crandom_generate(&goldilocks_global.rand, (unsigned char *)sk, sizeof(sk)); | int ret = crandom_generate(&goldilocks_global.rand, (unsigned char *)sk, sizeof(sk)); | ||||
| barrett_reduce(sk,sizeof(sk)/sizeof(sk[0]),0,q448_lo,7,4,62); // TODO word size | barrett_reduce(sk,sizeof(sk)/sizeof(sk[0]),0,q448_lo,7,4,62); // TODO word size | ||||
| q448_serialize(private, sk); | |||||
| q448_serialize(privkey->opaque, sk); | |||||
| edwards_comb(&exta, sk, goldilocks_global.combs, 5, 5, 18); | edwards_comb(&exta, sk, goldilocks_global.combs, 5, 5, 18); | ||||
| isogeny_and_serialize(&pk, &exta); | |||||
| p448_serialize(public, &pk); | |||||
| //transfer_and_serialize_qtor(&pk, &sqrt_d_minus_1, &exta); | |||||
| untwist_and_double_and_serialize(&pk, &exta); | |||||
| p448_serialize(pubkey->opaque, &pk); | |||||
| memcpy(&privkey->opaque[56], pubkey->opaque, 56); | |||||
| int ret2 = crandom_generate(&goldilocks_global.rand, &privkey->opaque[112], 32); | |||||
| if (!ret) ret = ret2; | |||||
| return ret; | |||||
| return ret ? GOLDI_ENODICE : GOLDI_EOK; | |||||
| } | } | ||||
| int | int | ||||
| goldilocks_shared_secret( | |||||
| uint8_t shared[56], | |||||
| const uint8_t private[56], | |||||
| const uint8_t public[56] | |||||
| goldilocks_shared_secret ( | |||||
| uint8_t shared[64], | |||||
| const struct goldilocks_private_key_t *my_privkey, | |||||
| const struct goldilocks_public_key_t *your_pubkey | |||||
| ) { | ) { | ||||
| // TODO: SHA | |||||
| word_t sk[448/WORD_BITS]; | word_t sk[448/WORD_BITS]; | ||||
| struct p448_t pk; | struct p448_t pk; | ||||
| mask_t succ = p448_deserialize(&pk,public); | |||||
| succ &= q448_deserialize(sk,private); | |||||
| mask_t succ = p448_deserialize(&pk,your_pubkey->opaque), msucc = -1; | |||||
| #ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS | |||||
| struct p448_t sum, prod; | |||||
| msucc &= p448_deserialize(&sum,&my_privkey->opaque[56]); | |||||
| p448_mul(&prod,&pk,&sum); | |||||
| p448_add(&sum,&pk,&sum); | |||||
| #endif | |||||
| msucc &= q448_deserialize(sk,my_privkey->opaque); | |||||
| succ &= p448_montgomery_ladder(&pk,&pk,sk,446,2); | succ &= p448_montgomery_ladder(&pk,&pk,sk,446,2); | ||||
| p448_serialize(shared,&pk); | p448_serialize(shared,&pk); | ||||
| // TODO: hash | |||||
| if (succ) { | |||||
| return 0; | |||||
| } else { | |||||
| return -1; | |||||
| /* obliterate records of our failure by adjusting with obliteration key */ | |||||
| struct sha512_ctx_t ctx; | |||||
| sha512_init(&ctx); | |||||
| #ifdef EXPERIMENT_ECDH_OBLITERATE_CT | |||||
| uint8_t oblit[40]; | |||||
| unsigned i; | |||||
| for (i=0; i<8; i++) { | |||||
| oblit[i] = "noshared"[i] & ~(succ&msucc); | |||||
| } | |||||
| for (i=0; i<32; i++) { | |||||
| oblit[8+i] = my_privkey->opaque[112+i] & ~(succ&msucc); | |||||
| } | |||||
| sha512_update(&ctx, oblit, 40); | |||||
| #endif | |||||
| #ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS | |||||
| /* stir in the sum and product of the pubkeys. */ | |||||
| uint8_t a_pk[56]; | |||||
| p448_serialize(a_pk, &sum); | |||||
| sha512_update(&ctx, a_pk, 56); | |||||
| p448_serialize(a_pk, &prod); | |||||
| sha512_update(&ctx, a_pk, 56); | |||||
| #endif | |||||
| /* stir in the shared key and finish */ | |||||
| sha512_update(&ctx, shared, 56); | |||||
| sha512_final(&ctx, shared); | |||||
| return (GOLDI_ECORRUPT & ~msucc) | |||||
| | (GOLDI_EINVAL & msucc &~ succ) | |||||
| | (GOLDI_EOK & msucc & succ); | |||||
| } | |||||
| int | |||||
| goldilocks_sign ( | |||||
| uint8_t signature_out[56*2], | |||||
| const uint8_t *message, | |||||
| uint64_t message_len, | |||||
| const struct goldilocks_private_key_t *privkey | |||||
| ) { | |||||
| /* challenge = H(pk, [nonceG], message). FIXME: endian. */ | |||||
| word_t skw[448/WORD_BITS]; | |||||
| mask_t succ = q448_deserialize(skw,privkey->opaque); | |||||
| if (!succ) { | |||||
| memset(skw,0,sizeof(skw)); | |||||
| return GOLDI_ECORRUPT; | |||||
| } | } | ||||
| /* Derive a nonce. TODO: use HMAC. FIXME: endian. FUTURE: factor. */ | |||||
| word_t tk[512/WORD_BITS]; | |||||
| struct sha512_ctx_t ctx; | |||||
| sha512_init(&ctx); | |||||
| sha512_update(&ctx, (const unsigned char *)"signonce", 8); | |||||
| sha512_update(&ctx, &privkey->opaque[112], 32); | |||||
| sha512_update(&ctx, message, message_len); | |||||
| sha512_update(&ctx, &privkey->opaque[112], 32); | |||||
| sha512_final(&ctx, (unsigned char *)tk); | |||||
| barrett_reduce(tk,512/WORD_BITS,0,q448_lo,7,4,62); // TODO word size | |||||
| /* 4[nonce]G */ | |||||
| uint8_t signature_tmp[56]; | |||||
| struct tw_extensible_t exta; | |||||
| struct p448_t gsk; | |||||
| edwards_comb(&exta, tk, goldilocks_global.combs, 5, 5, 18); | |||||
| double_tw_extensible(&exta); | |||||
| untwist_and_double_and_serialize(&gsk, &exta); | |||||
| p448_serialize(signature_tmp, &gsk); | |||||
| word_t challenge[512/WORD_BITS]; | |||||
| sha512_update(&ctx, &privkey->opaque[56], 56); | |||||
| sha512_update(&ctx, signature_tmp, 56); | |||||
| sha512_update(&ctx, message, message_len); | |||||
| sha512_final(&ctx, (unsigned char *)challenge); | |||||
| // reduce challenge and sub. | |||||
| barrett_negate(challenge,512/WORD_BITS,q448_lo,7,4,62); | |||||
| barrett_mac( | |||||
| tk,512/WORD_BITS, | |||||
| challenge,512/WORD_BITS, | |||||
| skw,448/WORD_BITS, | |||||
| q448_lo,7,4,62 | |||||
| ); | |||||
| word_t carry = add_nr_ext_packed(tk,tk,512/WORD_BITS,tk,512/WORD_BITS,-1); | |||||
| barrett_reduce(tk,512/WORD_BITS,carry,q448_lo,7,4,62); | |||||
| memcpy(signature_out, signature_tmp, 56); | |||||
| q448_serialize(signature_out+56, tk); | |||||
| memset((unsigned char *)tk,0,sizeof(tk)); | |||||
| memset((unsigned char *)skw,0,sizeof(skw)); | |||||
| memset((unsigned char *)challenge,0,sizeof(challenge)); | |||||
| /* response = 2(nonce_secret - sk*challenge) | |||||
| * Nonce = 8[nonce_secret]*G | |||||
| * PK = 2[sk]*G, except doubled (TODO) | |||||
| * so [2] ( [response]G + 2[challenge]PK ) = Nonce | |||||
| */ | |||||
| return 0; | |||||
| } | |||||
| int | |||||
| goldilocks_verify ( | |||||
| const uint8_t signature[56*2], | |||||
| const uint8_t *message, | |||||
| uint64_t message_len, | |||||
| const struct goldilocks_public_key_t *pubkey | |||||
| ) { | |||||
| struct p448_t pk; | |||||
| word_t s[448/WORD_BITS]; | |||||
| mask_t succ = p448_deserialize(&pk,pubkey->opaque); | |||||
| if (!succ) return EINVAL; | |||||
| succ = q448_deserialize(s, &signature[56]); | |||||
| if (!succ) return EINVAL; | |||||
| /* challenge = H(pk, [nonceG], message). FIXME: endian. */ | |||||
| word_t challenge[512/WORD_BITS]; | |||||
| struct sha512_ctx_t ctx; | |||||
| sha512_init(&ctx); | |||||
| sha512_update(&ctx, pubkey->opaque, 56); | |||||
| sha512_update(&ctx, signature, 56); | |||||
| sha512_update(&ctx, message, message_len); | |||||
| sha512_final(&ctx, (unsigned char *)challenge); | |||||
| barrett_reduce(challenge,512/WORD_BITS,0,q448_lo,7,4,62); | |||||
| struct p448_t eph; | |||||
| struct tw_extensible_t pk_text; | |||||
| /* deserialize [nonce]G */ | |||||
| succ = p448_deserialize(&eph, signature); | |||||
| if (!succ) return EINVAL; | |||||
| // succ = affine_deserialize(&pk_aff,&pk); | |||||
| // if (!succ) return EINVAL; | |||||
| // | |||||
| // convert_affine_to_extensible(&pk_ext,&pk_aff); | |||||
| // transfer_un_to_tw(&pk_text,&pk_ext); | |||||
| succ = deserialize_and_twist_approx(&pk_text, &sqrt_d_minus_1, &pk); | |||||
| if (!succ) return EINVAL; | |||||
| edwards_combo_var_fixed_vt( &pk_text, challenge, s, goldilocks_global.wnafs, 5 ); | |||||
| untwist_and_double_and_serialize( &pk, &pk_text ); | |||||
| p448_sub(&eph, &eph, &pk); | |||||
| p448_bias(&eph, 2); | |||||
| succ = p448_is_zero(&eph); | |||||
| return succ ? 0 : GOLDI_EINVAL; | |||||
| } | } | ||||
| @@ -2,30 +2,167 @@ | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
| */ | */ | ||||
| /** | |||||
| * @file goldilocks.h | |||||
| * @author Mike Hamburg | |||||
| * @brief Goldilocks high-level functions. | |||||
| */ | |||||
| #ifndef __GOLDILOCKS_H__ | #ifndef __GOLDILOCKS_H__ | ||||
| #define __GOLDILOCKS_H__ 1 | #define __GOLDILOCKS_H__ 1 | ||||
| #include <stdint.h> | #include <stdint.h> | ||||
| /** | |||||
| * @brief Serialized form of a Goldilocks public key. | |||||
| * | |||||
| * @warning This isn't even my final form! | |||||
| */ | |||||
| struct goldilocks_public_key_t { | |||||
| uint8_t opaque[56]; /**< Serialized data. */ | |||||
| }; | |||||
| /** | |||||
| * @brief Serialized form of a Goldilocks private key. | |||||
| * | |||||
| * Contains 56 bytes of actual private key, 56 bytes of | |||||
| * public key, and 32 bytes of symmetric key for randomization. | |||||
| * | |||||
| * @warning This isn't even my final form! | |||||
| */ | |||||
| struct goldilocks_private_key_t { | |||||
| uint8_t opaque[144]; /**< Serialized data. */ | |||||
| }; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| /** @brief No error. */ | |||||
| static const int GOLDI_EOK = 0; | |||||
| /** @brief Error: your key is corrupt. */ | |||||
| static const int GOLDI_ECORRUPT = 44801; | |||||
| /** @brief Error: other party's key is corrupt. */ | |||||
| static const int GOLDI_EINVAL = 44802; | |||||
| /** @brief Error: not enough entropy. */ | |||||
| static const int GOLDI_ENODICE = 44804; | |||||
| /** | |||||
| * @brief Initialize Goldilocks' precomputed tables and | |||||
| * random number generator. | |||||
| * @retval GOLDI_EOK Success. | |||||
| * @retval Nonzero An error occurred. | |||||
| */ | |||||
| int | int | ||||
| goldilocks_init(); | goldilocks_init(); | ||||
| /** | |||||
| * @brief Generate a new random keypair. | |||||
| * @param [out] privkey The generated private key. | |||||
| * @param [out] pubkey The generated public key. | |||||
| * | |||||
| * @warning This isn't even my final form! | |||||
| * | |||||
| * @retval GOLDI_EOK Success. | |||||
| * @retval GOLDI_ENODICE Insufficient entropy. | |||||
| */ | |||||
| int | int | ||||
| goldilocks_keygen( | |||||
| uint8_t private[56], | |||||
| uint8_t public[56] | |||||
| ); | |||||
| goldilocks_keygen ( | |||||
| struct goldilocks_private_key_t *privkey, | |||||
| struct goldilocks_public_key_t *pubkey | |||||
| ) __attribute__((warn_unused_result)); | |||||
| /** | |||||
| * @brief Generate a Diffie-Hellman shared secret in constant time. | |||||
| * | |||||
| * This function uses some compile-time flags whose merit remains to | |||||
| * be decided. | |||||
| * | |||||
| * If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes | |||||
| * of zeros to the secret before hashing. In the case that the other | |||||
| * party's key is detectably corrupt, instead the symmetric part | |||||
| * of the secret key is used to produce a pseudorandom value. | |||||
| * | |||||
| * If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of | |||||
| * the two parties' public keys is prepended to the hash. | |||||
| * | |||||
| * @warning This isn't even my final form! | |||||
| * | |||||
| * @param [out] shared The shared secret established with the other party. | |||||
| * @param [in] my_privkey My private key. | |||||
| * @param [in] your_pubkey The other party's public key. | |||||
| * | |||||
| * @retval GOLDI_EOK Success. | |||||
| * @retval GOLDI_ECORRUPT My key is corrupt. | |||||
| * @retval GOLDI_EINVAL The other party's key is corrupt. | |||||
| */ | |||||
| int | int | ||||
| goldilocks_shared_secret( | |||||
| uint8_t shared[56], | |||||
| const uint8_t private[56], | |||||
| const uint8_t public[56] | |||||
| goldilocks_shared_secret ( | |||||
| uint8_t shared[64], | |||||
| const struct goldilocks_private_key_t *my_privkey, | |||||
| const struct goldilocks_public_key_t *your_pubkey | |||||
| ) __attribute__((warn_unused_result)); | |||||
| /** | |||||
| * @brief Sign a message. | |||||
| * | |||||
| * The signature is deterministic, using the symmetric secret found in the | |||||
| * secret key to form a nonce. | |||||
| * | |||||
| * The technique used in signing is a modified Schnorr system, like EdDSA. | |||||
| * | |||||
| * @warning This isn't even my final form! | |||||
| * @warning This function contains endian bugs. (TODO) | |||||
| * | |||||
| * @param [out] signature_out Space for the output signature. | |||||
| * @param [in] message The message to be signed. | |||||
| * @param [in] message_len The length of the message to be signed. | |||||
| * @param [in] privkey My private key. | |||||
| * | |||||
| * @retval GOLDI_EOK Success. | |||||
| * @retval GOLDI_ECORRUPT My key is corrupt. | |||||
| */ | |||||
| int | |||||
| goldilocks_sign ( | |||||
| uint8_t signature_out[56*2], | |||||
| const uint8_t *message, | |||||
| uint64_t message_len, | |||||
| const struct goldilocks_private_key_t *privkey | |||||
| ); | ); | ||||
| /** | |||||
| * @brief Verify a signature. | |||||
| * | |||||
| * This function is fairly strict. It will correctly detect when | |||||
| * the signature has the wrong cofactor companent. Once deserialization | |||||
| * of numbers is strictified (TODO) it will limit the response to being | |||||
| * less than q as well. | |||||
| * | |||||
| * Currently this function does not detect when the public key is weird, | |||||
| * eg 0, has cofactor, etc. As a result, a party with a bogus public | |||||
| * key could create signatures that succeed on some systems and fail on | |||||
| * others. | |||||
| * | |||||
| * @warning This isn't even my final form! | |||||
| * @warning This function contains endian bugs. (TODO) | |||||
| * | |||||
| * @param [out] signature_out Space for the output signature. | |||||
| * @param [in] message The message to be signed. | |||||
| * @param [in] message_len The length of the message to be signed. | |||||
| * @param [in] privkey My private key. | |||||
| * | |||||
| * @retval GOLDI_EOK Success. | |||||
| * @retval GOLDI_ECORRUPT My key is corrupt. | |||||
| */ | |||||
| int | |||||
| goldilocks_verify ( | |||||
| const uint8_t signature[56*2], | |||||
| const uint8_t *message, | |||||
| uint64_t message_len, | |||||
| const struct goldilocks_public_key_t *pubkey | |||||
| ) __attribute__((warn_unused_result)); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| @@ -3,7 +3,9 @@ | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
| */ | */ | ||||
| /* cRandom intrinsics header. */ | |||||
| /** @file crandom.h | |||||
| * @brief cRandom intrinsics header. | |||||
| */ | |||||
| #ifndef __CRANDOM_INTRINSICS_H__ | #ifndef __CRANDOM_INTRINSICS_H__ | ||||
| #define __CRANDOM_INTRINSICS_H__ 1 | #define __CRANDOM_INTRINSICS_H__ 1 | ||||
| @@ -22,6 +24,7 @@ | |||||
| #define XOP 16 | #define XOP 16 | ||||
| #define AVX 32 | #define AVX 32 | ||||
| #define AVX2 64 | #define AVX2 64 | ||||
| #define RDRAND 128 | |||||
| INTRINSIC u_int64_t rdtsc() { | INTRINSIC u_int64_t rdtsc() { | ||||
| u_int64_t out = 0; | u_int64_t out = 0; | ||||
| @@ -31,6 +34,15 @@ INTRINSIC u_int64_t rdtsc() { | |||||
| return out; | return out; | ||||
| } | } | ||||
| /** | |||||
| * Return x unchanged, but confuse the compiler. | |||||
| * | |||||
| * This is mainly for use in test scripts, to prevent the value from | |||||
| * being constant-folded or removed by dead code elimination. | |||||
| * | |||||
| * @param x A 64-bit number. | |||||
| * @return The same number in a register. | |||||
| */ | |||||
| INTRINSIC u_int64_t opacify(u_int64_t x) { | INTRINSIC u_int64_t opacify(u_int64_t x) { | ||||
| __asm__ volatile("mov %0, %0" : "+r"(x)); | __asm__ volatile("mov %0, %0" : "+r"(x)); | ||||
| return x; | return x; | ||||
| @@ -87,6 +99,12 @@ INTRINSIC ssereg sse2_rotate(int r, ssereg a) { | |||||
| #ifdef __AES__ | #ifdef __AES__ | ||||
| /* don't include intrinsics file, because not all platforms have it */ | /* don't include intrinsics file, because not all platforms have it */ | ||||
| # define MIGHT_HAVE_AESNI 1 | # define MIGHT_HAVE_AESNI 1 | ||||
| # ifndef MIGHT_HAVE_RDRAND | |||||
| # define MIGHT_HAVE_RDRAND 1 | |||||
| # endif | |||||
| # ifndef MUST_HAVE_RDRAND | |||||
| # define MUST_HAVE_RDRAND 0 | |||||
| # endif | |||||
| # ifndef MUST_HAVE_AESNI | # ifndef MUST_HAVE_AESNI | ||||
| # define MUST_HAVE_AESNI 0 | # define MUST_HAVE_AESNI 0 | ||||
| # endif | # endif | ||||
| @@ -112,6 +130,8 @@ INTRINSIC ssereg aesenclast(ssereg subkey, ssereg block) { | |||||
| #else | #else | ||||
| # define MIGHT_HAVE_AESNI 0 | # define MIGHT_HAVE_AESNI 0 | ||||
| # define MUST_HAVE_AESNI 0 | # define MUST_HAVE_AESNI 0 | ||||
| # define MIGHT_HAVE_RDRAND 0 | |||||
| # define MUST_HAVE_RDRAND 0 | |||||
| #endif | #endif | ||||
| #ifdef __XOP__ | #ifdef __XOP__ | ||||
| @@ -131,20 +151,22 @@ INTRINSIC ssereg xop_rotate(int amount, ssereg x) { | |||||
| #endif | #endif | ||||
| #define MIGHT_MASK \ | #define MIGHT_MASK \ | ||||
| ( SSE2 * MIGHT_HAVE_SSE2 \ | |||||
| | SSSE3 * MIGHT_HAVE_SSSE3 \ | |||||
| | AESNI * MIGHT_HAVE_AESNI \ | |||||
| | XOP * MIGHT_HAVE_XOP \ | |||||
| | AVX * MIGHT_HAVE_AVX \ | |||||
| | AVX2 * MIGHT_HAVE_AVX2) | |||||
| ( SSE2 * MIGHT_HAVE_SSE2 \ | |||||
| | SSSE3 * MIGHT_HAVE_SSSE3 \ | |||||
| | AESNI * MIGHT_HAVE_AESNI \ | |||||
| | XOP * MIGHT_HAVE_XOP \ | |||||
| | AVX * MIGHT_HAVE_AVX \ | |||||
| | RDRAND * MIGHT_HAVE_RDRAND \ | |||||
| | AVX2 * MIGHT_HAVE_AVX2) | |||||
| #define MUST_MASK \ | #define MUST_MASK \ | ||||
| ( SSE2 * MUST_HAVE_SSE2 \ | |||||
| | SSSE3 * MUST_HAVE_SSSE3 \ | |||||
| | AESNI * MUST_HAVE_AESNI \ | |||||
| | XOP * MUST_HAVE_XOP \ | |||||
| | AVX * MUST_HAVE_AVX \ | |||||
| | AVX2 * MUST_HAVE_AVX2 ) | |||||
| ( SSE2 * MUST_HAVE_SSE2 \ | |||||
| | SSSE3 * MUST_HAVE_SSSE3 \ | |||||
| | AESNI * MUST_HAVE_AESNI \ | |||||
| | XOP * MUST_HAVE_XOP \ | |||||
| | AVX * MUST_HAVE_AVX \ | |||||
| | RDRAND * MUST_HAVE_RDRAND \ | |||||
| | AVX2 * MUST_HAVE_AVX2 ) | |||||
| #define MIGHT_HAVE(feature) ((MIGHT_MASK & feature) == feature) | #define MIGHT_HAVE(feature) ((MIGHT_MASK & feature) == feature) | ||||
| #define MUST_HAVE(feature) ((MUST_MASK & feature) == feature) | #define MUST_HAVE(feature) ((MUST_MASK & feature) == feature) | ||||
| @@ -5,383 +5,442 @@ | |||||
| #include "p448.h" | #include "p448.h" | ||||
| #include "x86-64-arith.h" | #include "x86-64-arith.h" | ||||
| void p448_mul(p448_t *__restrict__ cs, const p448_t *as, const p448_t *bs) { | |||||
| const uint64_t *a = as->limb, *b = bs->limb; | |||||
| uint64_t *c = cs->limb; | |||||
| void | |||||
| p448_mul ( | |||||
| p448_t *__restrict__ cs, | |||||
| const p448_t *as, | |||||
| const p448_t *bs | |||||
| ) { | |||||
| const uint64_t *a = as->limb, *b = bs->limb; | |||||
| uint64_t *c = cs->limb; | |||||
| __uint128_t accum0 = 0, accum1 = 0, accum2; | __uint128_t accum0 = 0, accum1 = 0, accum2; | ||||
| uint64_t mask = (1ull<<56) - 1; | |||||
| uint64_t aa[4], bb[4]; | |||||
| /* For some reason clang doesn't vectorize this without prompting? */ | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { | |||||
| ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; | |||||
| ((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; | |||||
| } | |||||
| /* | |||||
| for (int i=0; i<4; i++) { | |||||
| uint64_t mask = (1ull<<56) - 1; | |||||
| uint64_t aa[4], bb[4]; | |||||
| /* For some reason clang doesn't vectorize this without prompting? */ | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { | |||||
| ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; | |||||
| ((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; | |||||
| } | |||||
| /* | |||||
| for (int i=0; i<4; i++) { | |||||
| aa[i] = a[i] + a[i+4]; | aa[i] = a[i] + a[i+4]; | ||||
| bb[i] = b[i] + b[i+4]; | bb[i] = b[i] + b[i+4]; | ||||
| } | |||||
| */ | |||||
| accum2 = widemul(&a[0],&b[3]); | |||||
| accum1 = widemul(&aa[0],&bb[3]); | |||||
| accum0 = widemul(&a[4],&b[7]); | |||||
| mac(&accum2, &a[1], &b[2]); | |||||
| mac(&accum1, &aa[1], &bb[2]); | |||||
| mac(&accum0, &a[5], &b[6]); | |||||
| mac(&accum2, &a[2], &b[1]); | |||||
| mac(&accum1, &aa[2], &bb[1]); | |||||
| mac(&accum0, &a[6], &b[5]); | |||||
| mac(&accum2, &a[3], &b[0]); | |||||
| mac(&accum1, &aa[3], &bb[0]); | |||||
| mac(&accum0, &a[7], &b[4]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[3] = ((uint64_t)(accum0)) & mask; | |||||
| c[7] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| { | |||||
| accum2 = accum1; | |||||
| accum1 += accum0; | |||||
| accum0 = accum2; | |||||
| } | |||||
| accum2 = widemul(&a[0],&b[0]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul(&aa[1],&bb[3]); | |||||
| msb(&accum0, &a[1], &b[3]); | |||||
| mac(&accum1, &a[5], &b[7]); | |||||
| msb(&accum0, &a[2], &b[2]); | |||||
| mac(&accum2, &aa[2], &bb[2]); | |||||
| mac(&accum1, &a[6], &b[6]); | |||||
| msb(&accum0, &a[3], &b[1]); | |||||
| mac(&accum1, &a[7], &b[5]); | |||||
| mac(&accum2, &aa[3], &bb[1]); | |||||
| accum0 += accum2; | |||||
| accum1 += accum2; | |||||
| mac(&accum0, &a[4], &b[4]); | |||||
| mac(&accum1, &aa[0], &bb[0]); | |||||
| c[0] = ((uint64_t)(accum0)) & mask; | |||||
| c[4] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul(&aa[2],&bb[3]); | |||||
| msb(&accum0, &a[2], &b[3]); | |||||
| mac(&accum1, &a[6], &b[7]); | |||||
| mac(&accum2, &aa[3], &bb[2]); | |||||
| msb(&accum0, &a[3], &b[2]); | |||||
| mac(&accum1, &a[7], &b[6]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul(&a[0],&b[1]); | |||||
| mac(&accum1, &aa[0], &bb[1]); | |||||
| mac(&accum0, &a[4], &b[5]); | |||||
| mac(&accum2, &a[1], &b[0]); | |||||
| mac(&accum1, &aa[1], &bb[0]); | |||||
| mac(&accum0, &a[5], &b[4]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[1] = ((uint64_t)(accum0)) & mask; | |||||
| c[5] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul(&aa[3],&bb[3]); | |||||
| msb(&accum0, &a[3], &b[3]); | |||||
| mac(&accum1, &a[7], &b[7]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul(&a[0],&b[2]); | |||||
| mac(&accum1, &aa[0], &bb[2]); | |||||
| mac(&accum0, &a[4], &b[6]); | |||||
| mac(&accum2, &a[1], &b[1]); | |||||
| mac(&accum1, &aa[1], &bb[1]); | |||||
| mac(&accum0, &a[5], &b[5]); | |||||
| mac(&accum2, &a[2], &b[0]); | |||||
| mac(&accum1, &aa[2], &bb[0]); | |||||
| mac(&accum0, &a[6], &b[4]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[2] = ((uint64_t)(accum0)) & mask; | |||||
| c[6] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum0 += c[3]; | |||||
| accum1 += c[7]; | |||||
| c[3] = ((uint64_t)(accum0)) & mask; | |||||
| c[7] = ((uint64_t)(accum1)) & mask; | |||||
| /* we could almost stop here, but it wouldn't be stable, so... */ | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||||
| c[0] += ((uint64_t)(accum1)); | |||||
| } | |||||
| */ | |||||
| accum2 = widemul(&a[0],&b[3]); | |||||
| accum1 = widemul(&aa[0],&bb[3]); | |||||
| accum0 = widemul(&a[4],&b[7]); | |||||
| mac(&accum2, &a[1], &b[2]); | |||||
| mac(&accum1, &aa[1], &bb[2]); | |||||
| mac(&accum0, &a[5], &b[6]); | |||||
| mac(&accum2, &a[2], &b[1]); | |||||
| mac(&accum1, &aa[2], &bb[1]); | |||||
| mac(&accum0, &a[6], &b[5]); | |||||
| mac(&accum2, &a[3], &b[0]); | |||||
| mac(&accum1, &aa[3], &bb[0]); | |||||
| mac(&accum0, &a[7], &b[4]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[3] = ((uint64_t)(accum0)) & mask; | |||||
| c[7] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| { | |||||
| accum2 = accum1; | |||||
| accum1 += accum0; | |||||
| accum0 = accum2; | |||||
| } | |||||
| accum2 = widemul(&a[0],&b[0]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul(&aa[1],&bb[3]); | |||||
| msb(&accum0, &a[1], &b[3]); | |||||
| mac(&accum1, &a[5], &b[7]); | |||||
| msb(&accum0, &a[2], &b[2]); | |||||
| mac(&accum2, &aa[2], &bb[2]); | |||||
| mac(&accum1, &a[6], &b[6]); | |||||
| msb(&accum0, &a[3], &b[1]); | |||||
| mac(&accum1, &a[7], &b[5]); | |||||
| mac(&accum2, &aa[3], &bb[1]); | |||||
| accum0 += accum2; | |||||
| accum1 += accum2; | |||||
| mac(&accum0, &a[4], &b[4]); | |||||
| mac(&accum1, &aa[0], &bb[0]); | |||||
| c[0] = ((uint64_t)(accum0)) & mask; | |||||
| c[4] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul(&aa[2],&bb[3]); | |||||
| msb(&accum0, &a[2], &b[3]); | |||||
| mac(&accum1, &a[6], &b[7]); | |||||
| mac(&accum2, &aa[3], &bb[2]); | |||||
| msb(&accum0, &a[3], &b[2]); | |||||
| mac(&accum1, &a[7], &b[6]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul(&a[0],&b[1]); | |||||
| mac(&accum1, &aa[0], &bb[1]); | |||||
| mac(&accum0, &a[4], &b[5]); | |||||
| mac(&accum2, &a[1], &b[0]); | |||||
| mac(&accum1, &aa[1], &bb[0]); | |||||
| mac(&accum0, &a[5], &b[4]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[1] = ((uint64_t)(accum0)) & mask; | |||||
| c[5] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul(&aa[3],&bb[3]); | |||||
| msb(&accum0, &a[3], &b[3]); | |||||
| mac(&accum1, &a[7], &b[7]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul(&a[0],&b[2]); | |||||
| mac(&accum1, &aa[0], &bb[2]); | |||||
| mac(&accum0, &a[4], &b[6]); | |||||
| mac(&accum2, &a[1], &b[1]); | |||||
| mac(&accum1, &aa[1], &bb[1]); | |||||
| mac(&accum0, &a[5], &b[5]); | |||||
| mac(&accum2, &a[2], &b[0]); | |||||
| mac(&accum1, &aa[2], &bb[0]); | |||||
| mac(&accum0, &a[6], &b[4]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[2] = ((uint64_t)(accum0)) & mask; | |||||
| c[6] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum0 += c[3]; | |||||
| accum1 += c[7]; | |||||
| c[3] = ((uint64_t)(accum0)) & mask; | |||||
| c[7] = ((uint64_t)(accum1)) & mask; | |||||
| /* we could almost stop here, but it wouldn't be stable, so... */ | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||||
| c[0] += ((uint64_t)(accum1)); | |||||
| } | } | ||||
| void p448_mulw(p448_t *__restrict__ cs, const p448_t *as, uint64_t b) { | |||||
| const uint64_t *a = as->limb; | |||||
| uint64_t *c = cs->limb; | |||||
| __uint128_t accum0, accum4; | |||||
| uint64_t mask = (1ull<<56) - 1; | |||||
| accum0 = widemul_rm(b, &a[0]); | |||||
| accum4 = widemul_rm(b, &a[4]); | |||||
| c[0] = accum0 & mask; accum0 >>= 56; | |||||
| c[4] = accum4 & mask; accum4 >>= 56; | |||||
| mac_rm(&accum0, b, &a[1]); | |||||
| mac_rm(&accum4, b, &a[5]); | |||||
| c[1] = accum0 & mask; accum0 >>= 56; | |||||
| c[5] = accum4 & mask; accum4 >>= 56; | |||||
| mac_rm(&accum0, b, &a[2]); | |||||
| mac_rm(&accum4, b, &a[6]); | |||||
| c[2] = accum0 & mask; accum0 >>= 56; | |||||
| c[6] = accum4 & mask; accum4 >>= 56; | |||||
| mac_rm(&accum0, b, &a[3]); | |||||
| mac_rm(&accum4, b, &a[7]); | |||||
| c[3] = accum0 & mask; accum0 >>= 56; | |||||
| c[7] = accum4 & mask; accum4 >>= 56; | |||||
| c[4] += accum0 + accum4; | |||||
| c[0] += accum4; | |||||
| void | |||||
| p448_mulw ( | |||||
| p448_t *__restrict__ cs, | |||||
| const p448_t *as, | |||||
| uint64_t b | |||||
| ) { | |||||
| const uint64_t *a = as->limb; | |||||
| uint64_t *c = cs->limb; | |||||
| __uint128_t accum0, accum4; | |||||
| uint64_t mask = (1ull<<56) - 1; | |||||
| accum0 = widemul_rm(b, &a[0]); | |||||
| accum4 = widemul_rm(b, &a[4]); | |||||
| c[0] = accum0 & mask; accum0 >>= 56; | |||||
| c[4] = accum4 & mask; accum4 >>= 56; | |||||
| mac_rm(&accum0, b, &a[1]); | |||||
| mac_rm(&accum4, b, &a[5]); | |||||
| c[1] = accum0 & mask; accum0 >>= 56; | |||||
| c[5] = accum4 & mask; accum4 >>= 56; | |||||
| mac_rm(&accum0, b, &a[2]); | |||||
| mac_rm(&accum4, b, &a[6]); | |||||
| c[2] = accum0 & mask; accum0 >>= 56; | |||||
| c[6] = accum4 & mask; accum4 >>= 56; | |||||
| mac_rm(&accum0, b, &a[3]); | |||||
| mac_rm(&accum4, b, &a[7]); | |||||
| c[3] = accum0 & mask; accum0 >>= 56; | |||||
| c[7] = accum4 & mask; accum4 >>= 56; | |||||
| c[4] += accum0 + accum4; | |||||
| c[0] += accum4; | |||||
| } | } | ||||
| void p448_sqr(p448_t *__restrict__ cs, const p448_t *as) { | |||||
| const uint64_t *a = as->limb; | |||||
| uint64_t *c = cs->limb; | |||||
| __uint128_t accum0 = 0, accum1 = 0, accum2; | |||||
| uint64_t mask = (1ull<<56) - 1; | |||||
| uint64_t aa[4]; | |||||
| /* For some reason clang doesn't vectorize this without prompting? */ | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { | |||||
| void | |||||
| p448_sqr ( | |||||
| p448_t *__restrict__ cs, | |||||
| const p448_t *as | |||||
| ) { | |||||
| const uint64_t *a = as->limb; | |||||
| uint64_t *c = cs->limb; | |||||
| __uint128_t accum0 = 0, accum1 = 0, accum2; | |||||
| uint64_t mask = (1ull<<56) - 1; | |||||
| uint64_t aa[4]; | |||||
| /* For some reason clang doesn't vectorize this without prompting? */ | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { | |||||
| ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; | ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; | ||||
| } | |||||
| accum2 = widemul(&a[0],&a[3]); | |||||
| accum1 = widemul(&aa[0],&aa[3]); | |||||
| accum0 = widemul(&a[4],&a[7]); | |||||
| mac(&accum2, &a[1], &a[2]); | |||||
| mac(&accum1, &aa[1], &aa[2]); | |||||
| mac(&accum0, &a[5], &a[6]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[3] = ((uint64_t)(accum0))<<1 & mask; | |||||
| c[7] = ((uint64_t)(accum1))<<1 & mask; | |||||
| accum0 >>= 55; | |||||
| accum1 >>= 55; | |||||
| { | |||||
| accum2 = accum1; | |||||
| accum1 += accum0; | |||||
| accum0 = accum2; | |||||
| } | |||||
| accum2 = widemul(&a[0],&a[0]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul2(&aa[1],&aa[3]); | |||||
| msb2(&accum0, &a[1], &a[3]); | |||||
| mac2(&accum1, &a[5], &a[7]); | |||||
| msb(&accum0, &a[2], &a[2]); | |||||
| mac(&accum2, &aa[2], &aa[2]); | |||||
| mac(&accum1, &a[6], &a[6]); | |||||
| accum0 += accum2; | |||||
| accum1 += accum2; | |||||
| mac(&accum0, &a[4], &a[4]); | |||||
| mac(&accum1, &aa[0], &aa[0]); | |||||
| c[0] = ((uint64_t)(accum0)) & mask; | |||||
| c[4] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul2(&aa[2],&aa[3]); | |||||
| msb2(&accum0, &a[2], &a[3]); | |||||
| mac2(&accum1, &a[6], &a[7]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul2(&a[0],&a[1]); | |||||
| mac2(&accum1, &aa[0], &aa[1]); | |||||
| mac2(&accum0, &a[4], &a[5]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[1] = ((uint64_t)(accum0)) & mask; | |||||
| c[5] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul(&aa[3],&aa[3]); | |||||
| msb(&accum0, &a[3], &a[3]); | |||||
| mac(&accum1, &a[7], &a[7]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul2(&a[0],&a[2]); | |||||
| mac2(&accum1, &aa[0], &aa[2]); | |||||
| mac2(&accum0, &a[4], &a[6]); | |||||
| mac(&accum2, &a[1], &a[1]); | |||||
| mac(&accum1, &aa[1], &aa[1]); | |||||
| mac(&accum0, &a[5], &a[5]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[2] = ((uint64_t)(accum0)) & mask; | |||||
| c[6] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum0 += c[3]; | |||||
| accum1 += c[7]; | |||||
| c[3] = ((uint64_t)(accum0)) & mask; | |||||
| c[7] = ((uint64_t)(accum1)) & mask; | |||||
| /* we could almost stop here, but it wouldn't be stable, so... */ | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||||
| c[0] += ((uint64_t)(accum1)); | |||||
| } | |||||
| accum2 = widemul(&a[0],&a[3]); | |||||
| accum1 = widemul(&aa[0],&aa[3]); | |||||
| accum0 = widemul(&a[4],&a[7]); | |||||
| mac(&accum2, &a[1], &a[2]); | |||||
| mac(&accum1, &aa[1], &aa[2]); | |||||
| mac(&accum0, &a[5], &a[6]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[3] = ((uint64_t)(accum0))<<1 & mask; | |||||
| c[7] = ((uint64_t)(accum1))<<1 & mask; | |||||
| accum0 >>= 55; | |||||
| accum1 >>= 55; | |||||
| { | |||||
| accum2 = accum1; | |||||
| accum1 += accum0; | |||||
| accum0 = accum2; | |||||
| } | |||||
| accum2 = widemul(&a[0],&a[0]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul2(&aa[1],&aa[3]); | |||||
| msb2(&accum0, &a[1], &a[3]); | |||||
| mac2(&accum1, &a[5], &a[7]); | |||||
| msb(&accum0, &a[2], &a[2]); | |||||
| mac(&accum2, &aa[2], &aa[2]); | |||||
| mac(&accum1, &a[6], &a[6]); | |||||
| accum0 += accum2; | |||||
| accum1 += accum2; | |||||
| mac(&accum0, &a[4], &a[4]); | |||||
| mac(&accum1, &aa[0], &aa[0]); | |||||
| c[0] = ((uint64_t)(accum0)) & mask; | |||||
| c[4] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul2(&aa[2],&aa[3]); | |||||
| msb2(&accum0, &a[2], &a[3]); | |||||
| mac2(&accum1, &a[6], &a[7]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul2(&a[0],&a[1]); | |||||
| mac2(&accum1, &aa[0], &aa[1]); | |||||
| mac2(&accum0, &a[4], &a[5]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[1] = ((uint64_t)(accum0)) & mask; | |||||
| c[5] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum2 = widemul(&aa[3],&aa[3]); | |||||
| msb(&accum0, &a[3], &a[3]); | |||||
| mac(&accum1, &a[7], &a[7]); | |||||
| accum1 += accum2; | |||||
| accum0 += accum2; | |||||
| accum2 = widemul2(&a[0],&a[2]); | |||||
| mac2(&accum1, &aa[0], &aa[2]); | |||||
| mac2(&accum0, &a[4], &a[6]); | |||||
| mac(&accum2, &a[1], &a[1]); | |||||
| mac(&accum1, &aa[1], &aa[1]); | |||||
| mac(&accum0, &a[5], &a[5]); | |||||
| accum1 -= accum2; | |||||
| accum0 += accum2; | |||||
| c[2] = ((uint64_t)(accum0)) & mask; | |||||
| c[6] = ((uint64_t)(accum1)) & mask; | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| accum0 += c[3]; | |||||
| accum1 += c[7]; | |||||
| c[3] = ((uint64_t)(accum0)) & mask; | |||||
| c[7] = ((uint64_t)(accum1)) & mask; | |||||
| /* we could almost stop here, but it wouldn't be stable, so... */ | |||||
| accum0 >>= 56; | |||||
| accum1 >>= 56; | |||||
| c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||||
| c[0] += ((uint64_t)(accum1)); | |||||
| } | } | ||||
| static __inline__ void p448_sqr_inplace(p448_t *x) { | |||||
| p448_t y; | |||||
| p448_sqr(&y,x); | |||||
| *x = y; | |||||
| void | |||||
| p448_strong_reduce ( | |||||
| p448_t *a | |||||
| ) { | |||||
| uint64_t mask = (1ull<<56)-1; | |||||
| /* first, clear high */ | |||||
| a->limb[4] += a->limb[7]>>56; | |||||
| a->limb[0] += a->limb[7]>>56; | |||||
| a->limb[7] &= mask; | |||||
| /* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
| /* compute total_value - p. No need to reduce mod p. */ | |||||
| __int128_t scarry = 0; | |||||
| int i; | |||||
| for (i=0; i<8; i++) { | |||||
| scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||||
| a->limb[i] = scarry & mask; | |||||
| scarry >>= 56; | |||||
| } | |||||
| /* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
| * common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
| * so let's add back in p. will carry back off the top for 2^448. | |||||
| */ | |||||
| assert(is_zero(scarry) | is_zero(scarry+1)); | |||||
| uint64_t scarry_mask = scarry & mask; | |||||
| __uint128_t carry = 0; | |||||
| /* add it back */ | |||||
| for (i=0; i<8; i++) { | |||||
| carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||||
| a->limb[i] = carry & mask; | |||||
| carry >>= 56; | |||||
| } | |||||
| assert(is_zero(carry + scarry)); | |||||
| } | } | ||||
| static __inline__ void p448_mul_inplace(p448_t *x, const p448_t *z) { | |||||
| p448_t y; | |||||
| p448_mul(&y,x,z); | |||||
| *x = y; | |||||
| mask_t | |||||
| p448_is_zero ( | |||||
| const struct p448_t *a | |||||
| ) { | |||||
| struct p448_t b; | |||||
| p448_copy(&b,a); | |||||
| p448_strong_reduce(&b); | |||||
| uint64_t any = 0; | |||||
| int i; | |||||
| for (i=0; i<8; i++) { | |||||
| any |= b.limb[i]; | |||||
| } | |||||
| return is_zero(any); | |||||
| } | } | ||||
| static __inline__ void p448_repunit(p448_t *x, int space, int teeth) { | |||||
| int i,j; | |||||
| p448_t working = *x; | |||||
| for (i=0; i<teeth-1; i++) { | |||||
| for (j=0; j<space-(i?0:1); j++) | |||||
| p448_sqr_inplace(&working); | |||||
| if (i==teeth-2) | |||||
| p448_mul_inplace(x,&working); | |||||
| else | |||||
| p448_mul_inplace(&working,x); | |||||
| } | |||||
| void | |||||
| p448_serialize ( | |||||
| uint8_t *serial, | |||||
| const struct p448_t *x | |||||
| ) { | |||||
| int i,j; | |||||
| p448_t red; | |||||
| p448_copy(&red, x); | |||||
| p448_strong_reduce(&red); | |||||
| for (i=0; i<8; i++) { | |||||
| for (j=0; j<7; j++) { | |||||
| serial[7*i+j] = red.limb[i]; | |||||
| red.limb[i] >>= 8; | |||||
| } | |||||
| assert(red.limb[i] == 0); | |||||
| } | |||||
| } | } | ||||
| void | void | ||||
| p448_strong_reduce(p448_t *a) { | |||||
| uint64_t mask = (1ull<<56)-1; | |||||
| /* first, clear high */ | |||||
| a->limb[4] += a->limb[7]>>56; | |||||
| a->limb[0] += a->limb[7]>>56; | |||||
| a->limb[7] &= mask; | |||||
| /* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
| /* compute total_value - p. No need to reduce mod p. */ | |||||
| __int128_t scarry = 0; | |||||
| int i; | |||||
| for (i=0; i<8; i++) { | |||||
| scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||||
| a->limb[i] = scarry & mask; | |||||
| scarry >>= 56; | |||||
| } | |||||
| /* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
| * common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
| * so let's add back in p. will carry back off the top for 2^448. | |||||
| */ | |||||
| assert(is_zero(scarry) | is_zero(scarry+1)); | |||||
| uint64_t scarry_mask = scarry & mask; | |||||
| __uint128_t carry = 0; | |||||
| /* add it back */ | |||||
| for (i=0; i<8; i++) { | |||||
| carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||||
| a->limb[i] = carry & mask; | |||||
| carry >>= 56; | |||||
| } | |||||
| assert(is_zero(carry + scarry)); | |||||
| q448_serialize ( | |||||
| uint8_t *serial, | |||||
| const word_t x[7] | |||||
| ) { | |||||
| int i,j; | |||||
| for (i=0; i<7; i++) { | |||||
| for (j=0; j<8; j++) { | |||||
| serial[8*i+j] = x[i]>>(8*j); | |||||
| } | |||||
| } | |||||
| } | |||||
| mask_t | |||||
| q448_deserialize ( | |||||
| word_t x[7], | |||||
| const uint8_t serial[56] | |||||
| ) { | |||||
| int i,j; | |||||
| for (i=0; i<7; i++) { | |||||
| word_t out = 0; | |||||
| for (j=0; j<8; j++) { | |||||
| out |= ((word_t)serial[8*i+j])<<(8*j); | |||||
| } | |||||
| x[i] = out; | |||||
| } | |||||
| /* TODO: check for reduction */ | |||||
| return MASK_SUCCESS; | |||||
| } | } | ||||
| mask_t p448_is_zero(const struct p448_t *a) { | |||||
| struct p448_t b; | |||||
| p448_copy(&b,a); | |||||
| p448_strong_reduce(&b); | |||||
| uint64_t any = 0; | |||||
| int i; | |||||
| for (i=0; i<8; i++) { | |||||
| any |= b.limb[i]; | |||||
| } | |||||
| return is_zero(any); | |||||
| mask_t | |||||
| p448_deserialize ( | |||||
| p448_t *x, | |||||
| const uint8_t serial[56] | |||||
| ) { | |||||
| int i,j; | |||||
| for (i=0; i<8; i++) { | |||||
| word_t out = 0; | |||||
| for (j=0; j<7; j++) { | |||||
| out |= ((word_t)serial[7*i+j])<<(8*j); | |||||
| } | |||||
| x->limb[i] = out; | |||||
| } | |||||
| /* TODO: check for reduction */ | |||||
| return MASK_SUCCESS; | |||||
| } | } | ||||
| @@ -18,87 +18,141 @@ extern "C" { | |||||
| #endif | #endif | ||||
| static __inline__ void | static __inline__ void | ||||
| p448_set_ui(p448_t *out, | |||||
| uint64_t x) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_set_ui ( | |||||
| p448_t *out, | |||||
| uint64_t x | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_cond_swap(p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add(p448_t *out, | |||||
| const p448_t *a, | |||||
| const p448_t *b) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_add ( | |||||
| p448_t *out, | |||||
| const p448_t *a, | |||||
| const p448_t *b | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_sub(p448_t *out, | |||||
| const p448_t *a, | |||||
| const p448_t *b) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_sub ( | |||||
| p448_t *out, | |||||
| const p448_t *a, | |||||
| const p448_t *b | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_neg(p448_t *out, | |||||
| const p448_t *a) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_neg ( | |||||
| p448_t *out, | |||||
| const p448_t *a | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_cond_neg(p448_t *a, | |||||
| mask_t doNegate) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_cond_neg ( | |||||
| p448_t *a, | |||||
| mask_t doNegate | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_addw(p448_t *a, | |||||
| uint64_t x) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_addw ( | |||||
| p448_t *a, | |||||
| uint64_t x | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_subw(p448_t *a, | |||||
| uint64_t x) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_subw ( | |||||
| p448_t *a, | |||||
| uint64_t x | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_copy(p448_t *out, const p448_t *a) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_copy ( | |||||
| p448_t *out, | |||||
| const p448_t *a | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_weak_reduce(p448_t *inout) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_weak_reduce ( | |||||
| p448_t *inout | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_strong_reduce(p448_t *inout); | |||||
| p448_strong_reduce ( | |||||
| p448_t *inout | |||||
| ); | |||||
| mask_t | mask_t | ||||
| p448_is_zero(const p448_t *in); | |||||
| p448_is_zero ( | |||||
| const p448_t *in | |||||
| ); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_bias(p448_t *inout, int amount) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_bias ( | |||||
| p448_t *inout, | |||||
| int amount | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_mul(p448_t *__restrict__ out, | |||||
| const p448_t *a, | |||||
| const p448_t *b); | |||||
| p448_mul ( | |||||
| p448_t *__restrict__ out, | |||||
| const p448_t *a, | |||||
| const p448_t *b | |||||
| ); | |||||
| void | void | ||||
| p448_mulw(p448_t *__restrict__ out, | |||||
| const p448_t *a, | |||||
| uint64_t b); | |||||
| p448_mulw ( | |||||
| p448_t *__restrict__ out, | |||||
| const p448_t *a, | |||||
| uint64_t b | |||||
| ); | |||||
| void | void | ||||
| p448_sqr(p448_t *__restrict__ out, | |||||
| const p448_t *a); | |||||
| p448_sqr ( | |||||
| p448_t *__restrict__ out, | |||||
| const p448_t *a | |||||
| ); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_sqrn(p448_t *__restrict__ y, const p448_t *x, int n) | |||||
| __attribute__((unused,always_inline)); | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_set_ui(p448_t *out, | |||||
| uint64_t x) { | |||||
| p448_serialize ( | |||||
| uint8_t *serial, | |||||
| const struct p448_t *x | |||||
| ); | |||||
| void | |||||
| q448_serialize ( | |||||
| uint8_t *serial, | |||||
| const word_t x[7] | |||||
| ); | |||||
| mask_t | |||||
| q448_deserialize ( | |||||
| word_t x[7], | |||||
| const uint8_t serial[56] | |||||
| ); | |||||
| mask_t | |||||
| p448_deserialize ( | |||||
| p448_t *x, | |||||
| const uint8_t serial[56] | |||||
| ); | |||||
| /* -------------- Inline functions begin here -------------- */ | |||||
| void | |||||
| p448_set_ui ( | |||||
| p448_t *out, | |||||
| uint64_t x | |||||
| ) { | |||||
| int i; | int i; | ||||
| out->limb[0] = x; | out->limb[0] = x; | ||||
| for (i=1; i<8; i++) { | for (i=1; i<8; i++) { | ||||
| @@ -107,21 +161,29 @@ p448_set_ui(p448_t *out, | |||||
| } | } | ||||
| void | void | ||||
| p448_cond_swap(p448_t *a, p448_t *b, mask_t doswap) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| big_register_t m = doswap; | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| big_register_t m = doswap; | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| } | } | ||||
| void | void | ||||
| p448_add(p448_t *out, const p448_t *a, const p448_t *b) { | |||||
| p448_add ( | |||||
| p448_t *out, | |||||
| const p448_t *a, | |||||
| const p448_t *b | |||||
| ) { | |||||
| unsigned int i; | unsigned int i; | ||||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ||||
| @@ -135,7 +197,11 @@ p448_add(p448_t *out, const p448_t *a, const p448_t *b) { | |||||
| } | } | ||||
| void | void | ||||
| p448_sub(p448_t *out, const p448_t *a, const p448_t *b) { | |||||
| p448_sub ( | |||||
| p448_t *out, | |||||
| const p448_t *a, | |||||
| const p448_t *b | |||||
| ) { | |||||
| unsigned int i; | unsigned int i; | ||||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ||||
| @@ -149,7 +215,10 @@ p448_sub(p448_t *out, const p448_t *a, const p448_t *b) { | |||||
| } | } | ||||
| void | void | ||||
| p448_neg(p448_t *out, const p448_t *a) { | |||||
| p448_neg ( | |||||
| p448_t *out, | |||||
| const p448_t *a | |||||
| ) { | |||||
| unsigned int i; | unsigned int i; | ||||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
| ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i]; | ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i]; | ||||
| @@ -182,22 +251,34 @@ p448_cond_neg( | |||||
| } | } | ||||
| void | void | ||||
| p448_addw(p448_t *a, uint64_t x) { | |||||
| p448_addw ( | |||||
| p448_t *a, | |||||
| uint64_t x | |||||
| ) { | |||||
| a->limb[0] += x; | a->limb[0] += x; | ||||
| } | } | ||||
| void | void | ||||
| p448_subw(p448_t *a, uint64_t x) { | |||||
| p448_subw ( | |||||
| p448_t *a, | |||||
| uint64_t x | |||||
| ) { | |||||
| a->limb[0] -= x; | a->limb[0] -= x; | ||||
| } | } | ||||
| void | void | ||||
| p448_copy(p448_t *out, const p448_t *a) { | |||||
| p448_copy ( | |||||
| p448_t *out, | |||||
| const p448_t *a | |||||
| ) { | |||||
| *out = *a; | *out = *a; | ||||
| } | } | ||||
| void | void | ||||
| p448_bias(p448_t *a, int amt) { | |||||
| p448_bias ( | |||||
| p448_t *a, | |||||
| int amt | |||||
| ) { | |||||
| uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; | uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; | ||||
| uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | ||||
| uint64x4_t *aa = (uint64x4_t*) a; | uint64x4_t *aa = (uint64x4_t*) a; | ||||
| @@ -206,8 +287,10 @@ p448_bias(p448_t *a, int amt) { | |||||
| } | } | ||||
| void | void | ||||
| p448_weak_reduce(p448_t *a) { | |||||
| /* TODO: use pshufb/palignr if anyone cares about speed of this */ | |||||
| p448_weak_reduce ( | |||||
| p448_t *a | |||||
| ) { | |||||
| /* PERF: use pshufb/palignr if anyone cares about speed of this */ | |||||
| uint64_t mask = (1ull<<56) - 1; | uint64_t mask = (1ull<<56) - 1; | ||||
| uint64_t tmp = a->limb[7] >> 56; | uint64_t tmp = a->limb[7] >> 56; | ||||
| int i; | int i; | ||||
| @@ -218,7 +301,12 @@ p448_weak_reduce(p448_t *a) { | |||||
| a->limb[0] = (a->limb[0] & mask) + tmp; | a->limb[0] = (a->limb[0] & mask) + tmp; | ||||
| } | } | ||||
| void p448_sqrn(p448_t *__restrict__ y, const p448_t *x, int n) { | |||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | p448_t tmp; | ||||
| assert(n>0); | assert(n>0); | ||||
| if (n&1) { | if (n&1) { | ||||
| @@ -30,7 +30,7 @@ p448_montgomery_ladder( | |||||
| mask_t flip = -((w>>i)&1); | mask_t flip = -((w>>i)&1); | ||||
| p448_cond_swap(&mont.xa,&mont.xd,flip^pflip); | p448_cond_swap(&mont.xa,&mont.xd,flip^pflip); | ||||
| p448_cond_swap(&mont.za,&mont.zd,flip^pflip); | p448_cond_swap(&mont.za,&mont.zd,flip^pflip); | ||||
| p448_montgomery_step(&mont); | |||||
| montgomery_step(&mont); | |||||
| pflip = flip; | pflip = flip; | ||||
| } | } | ||||
| n = 63; | n = 63; | ||||
| @@ -39,35 +39,35 @@ p448_montgomery_ladder( | |||||
| p448_cond_swap(&mont.za,&mont.zd,pflip); | p448_cond_swap(&mont.za,&mont.zd,pflip); | ||||
| for (j=0; j<n_extra_doubles; j++) { | for (j=0; j<n_extra_doubles; j++) { | ||||
| p448_montgomery_step(&mont); | |||||
| montgomery_step(&mont); | |||||
| } | } | ||||
| struct p448_t sign; | struct p448_t sign; | ||||
| p448_montgomery_serialize(&sign, out, &mont, in); | |||||
| serialize_montgomery(&sign, out, &mont, in); | |||||
| p448_addw(&sign,1); | p448_addw(&sign,1); | ||||
| return ~p448_is_zero(&sign); | return ~p448_is_zero(&sign); | ||||
| } | } | ||||
| static __inline__ void | static __inline__ void | ||||
| niels_cond_negate( | |||||
| cond_negate_tw_niels( | |||||
| struct tw_niels_t *n, | struct tw_niels_t *n, | ||||
| mask_t doNegate | mask_t doNegate | ||||
| ) { | ) { | ||||
| p448_cond_swap(&n->a, &n->b, doNegate); | p448_cond_swap(&n->a, &n->b, doNegate); | ||||
| p448_cond_neg(&n->c, doNegate); /* TODO: bias amt? */ | |||||
| p448_cond_neg(&n->c, doNegate); | |||||
| } | } | ||||
| static __inline__ void | static __inline__ void | ||||
| pniels_cond_negate( | |||||
| cond_negate_tw_pniels( | |||||
| struct tw_pniels_t *n, | struct tw_pniels_t *n, | ||||
| mask_t doNegate | mask_t doNegate | ||||
| ) { | ) { | ||||
| niels_cond_negate(&n->n, doNegate); | |||||
| cond_negate_tw_niels(&n->n, doNegate); | |||||
| } | } | ||||
| void | void | ||||
| constant_time_lookup_pniels( | |||||
| constant_time_lookup_tw_pniels( | |||||
| struct tw_pniels_t *out, | struct tw_pniels_t *out, | ||||
| const struct tw_pniels_t *in, | const struct tw_pniels_t *in, | ||||
| int nin, | int nin, | ||||
| @@ -89,7 +89,7 @@ constant_time_lookup_pniels( | |||||
| } | } | ||||
| static __inline__ void | static __inline__ void | ||||
| constant_time_lookup_niels( | |||||
| constant_time_lookup_tw_niels( | |||||
| struct tw_niels_t *out, | struct tw_niels_t *out, | ||||
| const struct tw_niels_t *in, | const struct tw_niels_t *in, | ||||
| int nin, | int nin, | ||||
| @@ -165,7 +165,7 @@ edwards_scalar_multiply( | |||||
| struct tw_extensible_t tabulator; | struct tw_extensible_t tabulator; | ||||
| copy_tw_extensible(&tabulator, working); | copy_tw_extensible(&tabulator, working); | ||||
| p448_tw_extensible_double(&tabulator); | |||||
| double_tw_extensible(&tabulator); | |||||
| struct tw_pniels_t pn, multiples[8]; | struct tw_pniels_t pn, multiples[8]; | ||||
| convert_tw_extensible_to_tw_pniels(&pn, &tabulator); | convert_tw_extensible_to_tw_pniels(&pn, &tabulator); | ||||
| @@ -173,7 +173,7 @@ edwards_scalar_multiply( | |||||
| int i; | int i; | ||||
| for (i=1; i<8; i++) { | for (i=1; i<8; i++) { | ||||
| p448_tw_extensible_add_pniels(working, &pn); | |||||
| add_tw_pniels_to_tw_extensible(working, &pn); | |||||
| convert_tw_extensible_to_tw_pniels(&multiples[i], working); | convert_tw_extensible_to_tw_pniels(&multiples[i], working); | ||||
| } | } | ||||
| @@ -182,24 +182,92 @@ edwards_scalar_multiply( | |||||
| inv = (bits>>3)-1; | inv = (bits>>3)-1; | ||||
| bits ^= inv; | bits ^= inv; | ||||
| constant_time_lookup_pniels(&pn, multiples, 8, bits&7); | |||||
| pniels_cond_negate(&pn, inv); | |||||
| constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); | |||||
| cond_negate_tw_pniels(&pn, inv); | |||||
| convert_tw_pniels_to_tw_extensible(working, &pn); | convert_tw_pniels_to_tw_extensible(working, &pn); | ||||
| for (i-=4; i>=0; i-=4) { | for (i-=4; i>=0; i-=4) { | ||||
| p448_tw_extensible_double(working); | |||||
| p448_tw_extensible_double(working); | |||||
| p448_tw_extensible_double(working); | |||||
| p448_tw_extensible_double(working); | |||||
| double_tw_extensible(working); | |||||
| double_tw_extensible(working); | |||||
| double_tw_extensible(working); | |||||
| double_tw_extensible(working); | |||||
| bits = scalar2[i/64] >> (i%64) & 0xF; | bits = scalar2[i/64] >> (i%64) & 0xF; | ||||
| inv = (bits>>3)-1; | inv = (bits>>3)-1; | ||||
| bits ^= inv; | bits ^= inv; | ||||
| constant_time_lookup_pniels(&pn, multiples, 8, bits&7); | |||||
| pniels_cond_negate(&pn, inv); | |||||
| p448_tw_extensible_add_pniels(working, &pn); | |||||
| constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); | |||||
| cond_negate_tw_pniels(&pn, inv); | |||||
| add_tw_pniels_to_tw_extensible(working, &pn); | |||||
| } | |||||
| } | |||||
| void | |||||
| edwards_scalar_multiply_vlook( | |||||
| struct tw_extensible_t *working, | |||||
| const uint64_t scalar[7] | |||||
| ) { | |||||
| const int nbits=448; /* HACK? */ | |||||
| word_t prepared_data[14] = { | |||||
| 0x9595b847fdf73126ull, | |||||
| 0x9bb9b8a856af5200ull, | |||||
| 0xb3136e22f37d5c4full, | |||||
| 0x0000000189a19442ull, | |||||
| 0x0000000000000000ull, | |||||
| 0x0000000000000000ull, | |||||
| 0x4000000000000000ull, | |||||
| 0x721cf5b5529eec33ull, | |||||
| 0x7a4cf635c8e9c2abull, | |||||
| 0xeec492d944a725bfull, | |||||
| 0x000000020cd77058ull, | |||||
| 0x0000000000000000ull, | |||||
| 0x0000000000000000ull, | |||||
| 0x0000000000000000ull | |||||
| }; /* TODO: split off */ | |||||
| uint64_t scalar2[7]; | |||||
| convert_to_signed_window_form(scalar2,scalar,prepared_data,7); | |||||
| struct tw_extensible_t tabulator; | |||||
| copy_tw_extensible(&tabulator, working); | |||||
| double_tw_extensible(&tabulator); | |||||
| struct tw_pniels_t pn, multiples[8]; | |||||
| convert_tw_extensible_to_tw_pniels(&pn, &tabulator); | |||||
| convert_tw_extensible_to_tw_pniels(&multiples[0], working); | |||||
| int i; | |||||
| for (i=1; i<8; i++) { | |||||
| add_tw_pniels_to_tw_extensible(working, &pn); | |||||
| convert_tw_extensible_to_tw_pniels(&multiples[i], working); | |||||
| } | |||||
| i = nbits - 4; | |||||
| int bits = scalar2[i/64] >> (i%64) & 0xF, | |||||
| inv = (bits>>3)-1; | |||||
| bits ^= inv; | |||||
| copy_tw_pniels(&pn, &multiples[bits&7]); | |||||
| cond_negate_tw_pniels(&pn, inv); | |||||
| convert_tw_pniels_to_tw_extensible(working, &pn); | |||||
| for (i-=4; i>=0; i-=4) { | |||||
| double_tw_extensible(working); | |||||
| double_tw_extensible(working); | |||||
| double_tw_extensible(working); | |||||
| double_tw_extensible(working); | |||||
| bits = scalar2[i/64] >> (i%64) & 0xF; | |||||
| inv = (bits>>3)-1; | |||||
| bits ^= inv; | |||||
| copy_tw_pniels(&pn, &multiples[bits&7]); | |||||
| cond_negate_tw_pniels(&pn, inv); | |||||
| add_tw_pniels_to_tw_extensible(working, &pn); | |||||
| } | } | ||||
| } | } | ||||
| @@ -240,7 +308,7 @@ edwards_comb( | |||||
| struct tw_niels_t ni; | struct tw_niels_t ni; | ||||
| for (i=0; i<s; i++) { | for (i=0; i<s; i++) { | ||||
| if (i) p448_tw_extensible_double(working); | |||||
| if (i) double_tw_extensible(working); | |||||
| for (j=0; j<n; j++) { | for (j=0; j<n; j++) { | ||||
| int tab = 0; | int tab = 0; | ||||
| @@ -260,10 +328,10 @@ edwards_comb( | |||||
| tab ^= invert; | tab ^= invert; | ||||
| tab &= (1<<(t-1)) - 1; | tab &= (1<<(t-1)) - 1; | ||||
| constant_time_lookup_niels(&ni, table + (j<<(t-1)), 1<<(t-1), tab); | |||||
| niels_cond_negate(&ni, invert); | |||||
| constant_time_lookup_tw_niels(&ni, table + (j<<(t-1)), 1<<(t-1), tab); | |||||
| cond_negate_tw_niels(&ni, invert); | |||||
| if (i||j) { | if (i||j) { | ||||
| p448_tw_extensible_add_niels(working, &ni); | |||||
| add_tw_niels_to_tw_extensible(working, &ni); | |||||
| } else { | } else { | ||||
| convert_tw_niels_to_tw_extensible(working, &ni); | convert_tw_niels_to_tw_extensible(working, &ni); | ||||
| } | } | ||||
| @@ -334,7 +402,7 @@ precompute_for_combs( | |||||
| for (j=0; j<t; j++) { | for (j=0; j<t; j++) { | ||||
| if (j) { | if (j) { | ||||
| convert_tw_extensible_to_tw_pniels(&pn_tmp, &working); | convert_tw_extensible_to_tw_pniels(&pn_tmp, &working); | ||||
| p448_tw_extensible_add_pniels(&start, &pn_tmp); | |||||
| add_tw_pniels_to_tw_extensible(&start, &pn_tmp); | |||||
| } else { | } else { | ||||
| copy_tw_extensible(&start, &working); | copy_tw_extensible(&start, &working); | ||||
| } | } | ||||
| @@ -343,13 +411,13 @@ precompute_for_combs( | |||||
| break; | break; | ||||
| } | } | ||||
| p448_tw_extensible_double(&working); | |||||
| double_tw_extensible(&working); | |||||
| if (j<t-1) { | if (j<t-1) { | ||||
| convert_tw_extensible_to_tw_pniels(&doubles[j], &working); | convert_tw_extensible_to_tw_pniels(&doubles[j], &working); | ||||
| } | } | ||||
| for (k=0; k<s-1; k++) { | for (k=0; k<s-1; k++) { | ||||
| p448_tw_extensible_double(&working); | |||||
| double_tw_extensible(&working); | |||||
| } | } | ||||
| } | } | ||||
| @@ -370,13 +438,10 @@ precompute_for_combs( | |||||
| if (gray & (1<<k)) { | if (gray & (1<<k)) { | ||||
| /* start += doubles[k] */ | /* start += doubles[k] */ | ||||
| p448_tw_extensible_add_pniels(&start, &doubles[k]); | |||||
| add_tw_pniels_to_tw_extensible(&start, &doubles[k]); | |||||
| } else { | } else { | ||||
| /* start -= doubles[k] */ | /* start -= doubles[k] */ | ||||
| /* PERF: uncond negate */ | |||||
| copy_tw_pniels(&pn_tmp, &doubles[k]); | |||||
| pniels_cond_negate(&pn_tmp, -1); | |||||
| p448_tw_extensible_add_pniels(&start, &pn_tmp); | |||||
| sub_tw_pniels_from_tw_extensible(&start, &doubles[k]); | |||||
| } | } | ||||
| @@ -435,16 +500,16 @@ precompute_for_wnaf( | |||||
| copy_tw_niels(&out[0], &tmp.n); | copy_tw_niels(&out[0], &tmp.n); | ||||
| if (tbits > 0) { | if (tbits > 0) { | ||||
| p448_tw_extensible_double(&base); | |||||
| double_tw_extensible(&base); | |||||
| convert_tw_extensible_to_tw_pniels(&twop, &base); | convert_tw_extensible_to_tw_pniels(&twop, &base); | ||||
| p448_tw_extensible_add_pniels(&base, &tmp); | |||||
| add_tw_pniels_to_tw_extensible(&base, &tmp); | |||||
| convert_tw_extensible_to_tw_pniels(&tmp, &base); | convert_tw_extensible_to_tw_pniels(&tmp, &base); | ||||
| p448_copy(&zs[1], &tmp.z); | p448_copy(&zs[1], &tmp.z); | ||||
| copy_tw_niels(&out[1], &tmp.n); | copy_tw_niels(&out[1], &tmp.n); | ||||
| for (i=2; i < 1<<tbits; i++) { | for (i=2; i < 1<<tbits; i++) { | ||||
| p448_tw_extensible_add_pniels(&base, &twop); | |||||
| add_tw_pniels_to_tw_extensible(&base, &twop); | |||||
| convert_tw_extensible_to_tw_pniels(&tmp, &base); | convert_tw_extensible_to_tw_pniels(&tmp, &base); | ||||
| p448_copy(&zs[i], &tmp.z); | p448_copy(&zs[i], &tmp.z); | ||||
| copy_tw_niels(&out[i], &tmp.n); | copy_tw_niels(&out[i], &tmp.n); | ||||
| @@ -474,6 +539,10 @@ precompute_for_wnaf( | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| /** | |||||
| * @cond internal | |||||
| * Control for variable-time scalar multiply algorithms. | |||||
| */ | |||||
| struct smvt_control { | struct smvt_control { | ||||
| int power, addend; | int power, addend; | ||||
| }; | }; | ||||
| @@ -537,20 +606,20 @@ prepare_wnaf_table( | |||||
| if (tbits == 0) return; | if (tbits == 0) return; | ||||
| p448_tw_extensible_double(working); | |||||
| double_tw_extensible(working); | |||||
| struct tw_pniels_t twop; | struct tw_pniels_t twop; | ||||
| convert_tw_extensible_to_tw_pniels(&twop, working); | convert_tw_extensible_to_tw_pniels(&twop, working); | ||||
| p448_tw_extensible_add_pniels(working, &output[0]); | |||||
| add_tw_pniels_to_tw_extensible(working, &output[0]); | |||||
| convert_tw_extensible_to_tw_pniels(&output[1], working); | convert_tw_extensible_to_tw_pniels(&output[1], working); | ||||
| for (int i=2; i < 1<<tbits; i++) { | for (int i=2; i < 1<<tbits; i++) { | ||||
| p448_tw_extensible_add_pniels(working, &twop); | |||||
| add_tw_pniels_to_tw_extensible(working, &twop); | |||||
| convert_tw_extensible_to_tw_pniels(&output[i], working); | convert_tw_extensible_to_tw_pniels(&output[i], working); | ||||
| } | } | ||||
| } | } | ||||
| int | |||||
| void | |||||
| edwards_scalar_multiply_vt( | edwards_scalar_multiply_vt( | ||||
| struct tw_extensible_t *working, | struct tw_extensible_t *working, | ||||
| const uint64_t scalar[7] | const uint64_t scalar[7] | ||||
| @@ -570,31 +639,25 @@ edwards_scalar_multiply_vt( | |||||
| convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); | convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); | ||||
| } else { | } else { | ||||
| set_identity_tw_extensible(working); | set_identity_tw_extensible(working); | ||||
| return control_bits; | |||||
| return; | |||||
| } | } | ||||
| int conti = 1, i; | int conti = 1, i; | ||||
| struct tw_pniels_t neg; | |||||
| for (i = control[0].power - 1; i >= 0; i--) { | for (i = control[0].power - 1; i >= 0; i--) { | ||||
| p448_tw_extensible_double(working); | |||||
| double_tw_extensible(working); | |||||
| if (i == control[conti].power) { | if (i == control[conti].power) { | ||||
| assert(control[conti].addend); | assert(control[conti].addend); | ||||
| if (control[conti].addend > 0) { | if (control[conti].addend > 0) { | ||||
| p448_tw_extensible_add_pniels(working, &precmp[control[conti].addend >> 1]); | |||||
| add_tw_pniels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); | |||||
| } else { | } else { | ||||
| /* PERF: uncond negate */ | |||||
| copy_tw_pniels(&neg, &precmp[(-control[conti].addend) >> 1]); | |||||
| pniels_cond_negate(&neg, -1); | |||||
| p448_tw_extensible_add_pniels(working, &neg); | |||||
| sub_tw_pniels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); | |||||
| } | } | ||||
| conti++; | conti++; | ||||
| assert(conti <= control_bits); | assert(conti <= control_bits); | ||||
| } | } | ||||
| } | } | ||||
| return control_bits; /* TODO: don't return anything, this is just for testing */ | |||||
| } | } | ||||
| void | void | ||||
| @@ -620,21 +683,16 @@ edwards_scalar_multiply_vt_pre( | |||||
| } | } | ||||
| int conti = 1, i; | int conti = 1, i; | ||||
| struct tw_niels_t neg; | |||||
| for (i = control[0].power - 1; i >= 0; i--) { | for (i = control[0].power - 1; i >= 0; i--) { | ||||
| p448_tw_extensible_double(working); | |||||
| double_tw_extensible(working); | |||||
| if (i == control[conti].power) { | if (i == control[conti].power) { | ||||
| assert(control[conti].addend); | assert(control[conti].addend); | ||||
| if (control[conti].addend > 0) { | if (control[conti].addend > 0) { | ||||
| p448_tw_extensible_add_niels(working, &precmp[control[conti].addend >> 1]); | |||||
| add_tw_niels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); | |||||
| } else { | } else { | ||||
| /* PERF: uncond negate */ | |||||
| copy_tw_niels(&neg, &precmp[(-control[conti].addend) >> 1]); | |||||
| niels_cond_negate(&neg, -1); | |||||
| p448_tw_extensible_add_niels(working, &neg); | |||||
| sub_tw_niels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); | |||||
| } | } | ||||
| conti++; | conti++; | ||||
| assert(conti <= control_bits); | assert(conti <= control_bits); | ||||
| @@ -642,7 +700,7 @@ edwards_scalar_multiply_vt_pre( | |||||
| } | } | ||||
| } | } | ||||
| int | |||||
| void | |||||
| edwards_combo_var_fixed_vt( | edwards_combo_var_fixed_vt( | ||||
| struct tw_extensible_t *working, | struct tw_extensible_t *working, | ||||
| const uint64_t scalar_var[7], | const uint64_t scalar_var[7], | ||||
| @@ -671,7 +729,7 @@ edwards_combo_var_fixed_vt( | |||||
| contv++; | contv++; | ||||
| } else if (i == control_pre[0].power && i >=0 ) { | } else if (i == control_pre[0].power && i >=0 ) { | ||||
| convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); | convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); | ||||
| p448_tw_extensible_add_niels(working, &precmp[control_pre[0].addend >> 1]); | |||||
| add_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); | |||||
| contv++; contp++; | contv++; contp++; | ||||
| } else { | } else { | ||||
| i = control_pre[0].power; | i = control_pre[0].power; | ||||
| @@ -681,24 +739,19 @@ edwards_combo_var_fixed_vt( | |||||
| if (i < 0) { | if (i < 0) { | ||||
| set_identity_tw_extensible(working); | set_identity_tw_extensible(working); | ||||
| return ncb_pre; | |||||
| return; | |||||
| } | } | ||||
| struct tw_pniels_t pneg; | |||||
| struct tw_niels_t neg; | |||||
| for (i--; i >= 0; i--) { | for (i--; i >= 0; i--) { | ||||
| p448_tw_extensible_double(working); | |||||
| double_tw_extensible(working); | |||||
| if (i == control_var[contv].power) { | if (i == control_var[contv].power) { | ||||
| assert(control_var[contv].addend); | assert(control_var[contv].addend); | ||||
| if (control_var[contv].addend > 0) { | if (control_var[contv].addend > 0) { | ||||
| p448_tw_extensible_add_pniels(working, &precmp_var[control_var[contv].addend >> 1]); | |||||
| add_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[contv].addend >> 1]); | |||||
| } else { | } else { | ||||
| /* PERF: uncond negate */ | |||||
| copy_tw_pniels(&pneg, &precmp_var[(-control_var[contv].addend) >> 1]); | |||||
| pniels_cond_negate(&pneg, -1); | |||||
| p448_tw_extensible_add_pniels(working, &pneg); | |||||
| sub_tw_pniels_from_tw_extensible(working, &precmp_var[(-control_var[contv].addend) >> 1]); | |||||
| } | } | ||||
| contv++; | contv++; | ||||
| } | } | ||||
| @@ -707,12 +760,9 @@ edwards_combo_var_fixed_vt( | |||||
| assert(control_pre[contp].addend); | assert(control_pre[contp].addend); | ||||
| if (control_pre[contp].addend > 0) { | if (control_pre[contp].addend > 0) { | ||||
| p448_tw_extensible_add_niels(working, &precmp[control_pre[contp].addend >> 1]); | |||||
| add_tw_niels_to_tw_extensible(working, &precmp[control_pre[contp].addend >> 1]); | |||||
| } else { | } else { | ||||
| /* PERF: uncond negate */ | |||||
| copy_tw_niels(&neg, &precmp[(-control_pre[contp].addend) >> 1]); | |||||
| niels_cond_negate(&neg, -1); | |||||
| p448_tw_extensible_add_niels(working, &neg); | |||||
| sub_tw_niels_from_tw_extensible(working, &precmp[(-control_pre[contp].addend) >> 1]); | |||||
| } | } | ||||
| contp++; | contp++; | ||||
| } | } | ||||
| @@ -720,8 +770,6 @@ edwards_combo_var_fixed_vt( | |||||
| assert(contv == ncb_var); | assert(contv == ncb_var); | ||||
| assert(contp == ncb_pre); | assert(contp == ncb_pre); | ||||
| return ncb_pre; | |||||
| } | } | ||||
| @@ -53,6 +53,13 @@ edwards_scalar_multiply( | |||||
| const uint64_t scalar[7] | const uint64_t scalar[7] | ||||
| /* TODO? int nbits */ | /* TODO? int nbits */ | ||||
| ); | ); | ||||
| void | |||||
| edwards_scalar_multiply_vlook( | |||||
| struct tw_extensible_t *working, | |||||
| const uint64_t scalar[7] | |||||
| /* TODO? int nbits */ | |||||
| ); | |||||
| mask_t | mask_t | ||||
| precompute_for_combs( | precompute_for_combs( | ||||
| @@ -73,8 +80,7 @@ edwards_comb( | |||||
| int s | int s | ||||
| ); | ); | ||||
| /* TODO: void. int is just for diagnostic purposes. */ | |||||
| int | |||||
| void | |||||
| edwards_scalar_multiply_vt( | edwards_scalar_multiply_vt( | ||||
| struct tw_extensible_t *working, | struct tw_extensible_t *working, | ||||
| const uint64_t scalar[7] | const uint64_t scalar[7] | ||||
| @@ -95,8 +101,7 @@ precompute_for_wnaf( | |||||
| int tbits | int tbits | ||||
| ); /* TODO: attr don't ignore... */ | ); /* TODO: attr don't ignore... */ | ||||
| /* TODO: void. int is just for diagnostic purposes. */ | |||||
| int | |||||
| void | |||||
| edwards_combo_var_fixed_vt( | edwards_combo_var_fixed_vt( | ||||
| struct tw_extensible_t *working, | struct tw_extensible_t *working, | ||||
| const uint64_t scalar_var[7], | const uint64_t scalar_var[7], | ||||
| @@ -0,0 +1,182 @@ | |||||
| /* Copyright (c) 2011 Stanford University. | |||||
| * Copyright (c) 2014 Cryptography Research, Inc. | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| */ | |||||
| #include "sha512.h" | |||||
| #include <string.h> | |||||
| #include <assert.h> | |||||
| static inline uint64_t | |||||
| rotate_r ( | |||||
| uint64_t x, | |||||
| int d | |||||
| ) { | |||||
| return (x >> d) | (x << (64-d)); | |||||
| } | |||||
| /* TODO: get from headers */ | |||||
| static inline uint64_t | |||||
| htobe64 (uint64_t x) { | |||||
| __asm__ ("bswapq %0" : "+r"(x)); | |||||
| return x; | |||||
| } | |||||
| static const uint64_t | |||||
| sha512_init_state[8] = { | |||||
| 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, | |||||
| 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 | |||||
| }; | |||||
| static const uint64_t | |||||
| sha512_k[80] = { | |||||
| 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, | |||||
| 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, | |||||
| 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, | |||||
| 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, | |||||
| 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, | |||||
| 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, | |||||
| 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, | |||||
| 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, | |||||
| 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, | |||||
| 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, | |||||
| 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, | |||||
| 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, | |||||
| 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, | |||||
| 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, | |||||
| 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, | |||||
| 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, | |||||
| 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, | |||||
| 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, | |||||
| 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, | |||||
| 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 | |||||
| }; | |||||
| static inline uint64_t S0 (uint64_t h1) { | |||||
| return rotate_r(h1, 28) ^ rotate_r(h1, 34) ^ rotate_r(h1, 39); | |||||
| } | |||||
| static inline uint64_t S1 (uint64_t h4) { | |||||
| return rotate_r(h4,14) ^ rotate_r(h4,18) ^ rotate_r(h4,41); | |||||
| } | |||||
| static inline uint64_t s0 (uint64_t a) { | |||||
| return rotate_r(a,1) ^ rotate_r(a,8) ^ a>>7; | |||||
| } | |||||
| static inline uint64_t s1 (uint64_t b) { | |||||
| return rotate_r(b,19) ^ rotate_r(b,61) ^ b>>6; | |||||
| } | |||||
| static inline uint64_t ch (uint64_t h4, uint64_t h5, uint64_t h6) { | |||||
| return h6^(h4 & (h6^h5)); | |||||
| } | |||||
| static inline uint64_t maj(uint64_t h1, uint64_t h2, uint64_t h3) { | |||||
| return (h1&h2) ^ (h3&(h1^h2)); | |||||
| } | |||||
| static void | |||||
| sha512_process_block ( | |||||
| struct sha512_ctx_t *ctx | |||||
| ) { | |||||
| uint64_t i, tmp, a, b, | |||||
| *w = (uint64_t *) ctx->block, | |||||
| *state = ctx->chain, | |||||
| h0 = state[0], h1 = state[1], h2 = state[2], h3 = state[3], | |||||
| h4 = state[4], h5 = state[5], h6 = state[6], h7 = state[7]; | |||||
| /* Clang doesn't unswitch this automatically */ | |||||
| for (i=0; i<16; i++) { | |||||
| /* load up the input word for this round */ | |||||
| tmp = w[i] = htobe64(w[i]); | |||||
| tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; | |||||
| /* shift register */ | |||||
| h7 = h6; h6 = h5; h5 = h4; | |||||
| h4 = h3 + tmp; | |||||
| h3 = h2; h2 = h1; h1 = h0; | |||||
| h0 = tmp + maj(h1,h2,h3) + S0(h1); | |||||
| } | |||||
| for (; i<80; i++) { | |||||
| /* load up the input word for this round */ | |||||
| a = w[(i+1 ) & 15]; | |||||
| b = w[(i+14) & 15]; | |||||
| tmp = w[i&15] = s0(a) + s1(b) + w[i&15] + w[(i+9) & 15]; | |||||
| tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; | |||||
| /* shift register */ | |||||
| h7 = h6; h6 = h5; h5 = h4; | |||||
| h4 = h3 + tmp; | |||||
| h3 = h2; h2 = h1; h1 = h0; | |||||
| h0 = tmp + maj(h1,h2,h3) + S0(h1); | |||||
| } | |||||
| state[0] += h0; | |||||
| state[1] += h1; | |||||
| state[2] += h2; | |||||
| state[3] += h3; | |||||
| state[4] += h4; | |||||
| state[5] += h5; | |||||
| state[6] += h6; | |||||
| state[7] += h7; | |||||
| } | |||||
| void | |||||
| sha512_init ( | |||||
| struct sha512_ctx_t *ctx | |||||
| ) { | |||||
| ctx->nbytes = 0; | |||||
| memcpy(ctx->chain, sha512_init_state, sizeof(sha512_init_state)); | |||||
| memset(ctx->block, 0, sizeof(ctx->block)); | |||||
| } | |||||
| void | |||||
| sha512_update ( | |||||
| struct sha512_ctx_t *ctx, | |||||
| const unsigned char *data, | |||||
| uint64_t bytes | |||||
| ) { | |||||
| assert(ctx->nbytes < 1ull<<56); | |||||
| assert(bytes < 1ull<<56); | |||||
| while (bytes) { | |||||
| uint64_t fill = ctx->nbytes % 128, accept = 128 - fill; | |||||
| if (accept > bytes) accept = bytes; | |||||
| ctx->nbytes += accept; | |||||
| memcpy(ctx->block + fill, data, accept); | |||||
| if (fill+accept == 128) | |||||
| sha512_process_block(ctx); | |||||
| bytes -= accept; | |||||
| data += accept; | |||||
| } | |||||
| assert(ctx->nbytes < 1ull<<56); | |||||
| } | |||||
| void | |||||
| sha512_final ( | |||||
| struct sha512_ctx_t *ctx, | |||||
| uint8_t result[64] | |||||
| ) { | |||||
| uint64_t fill = ctx->nbytes % 128, i; | |||||
| ctx->block[fill++] = 0x80; | |||||
| if (fill > 112) { | |||||
| memset(ctx->block + fill, 0, 128-fill); | |||||
| sha512_process_block(ctx); | |||||
| fill = 0; | |||||
| } | |||||
| memset(ctx->block + fill, 0, 112-fill); | |||||
| *((uint64_t *)&ctx->block[112]) = 0; | |||||
| *((uint64_t *)&ctx->block[120]) = htobe64((ctx->nbytes * 8)); | |||||
| sha512_process_block(ctx); | |||||
| for (i=0; i<8; i++) { | |||||
| ctx->chain[i] = htobe64(ctx->chain[i]); | |||||
| } | |||||
| memcpy(result, ctx->chain, sizeof(ctx->chain)); | |||||
| sha512_init(ctx); | |||||
| } | |||||
| @@ -0,0 +1,49 @@ | |||||
| /* Copyright (c) 2014 Cryptography Research, Inc. | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| */ | |||||
| #ifndef __GOLDI_SHA512_H__ | |||||
| #define __GOLDI_SHA512_H__ 1 | |||||
| #include <stdint.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| /* TODO: KAT */ | |||||
| /** | |||||
| * SHA512 hashing context. | |||||
| * | |||||
| * This structure is opaque. | |||||
| */ | |||||
| struct sha512_ctx_t { | |||||
| /** @privatesection */ | |||||
| uint64_t chain[8]; | |||||
| uint8_t block[128]; | |||||
| uint64_t nbytes; | |||||
| }; | |||||
| void | |||||
| sha512_init ( | |||||
| struct sha512_ctx_t *ctx | |||||
| ); | |||||
| void | |||||
| sha512_update ( | |||||
| struct sha512_ctx_t *ctx, | |||||
| const unsigned char *data, | |||||
| uint64_t bytes | |||||
| ); | |||||
| void | |||||
| sha512_final ( | |||||
| struct sha512_ctx_t *ctx, | |||||
| uint8_t result[64] | |||||
| ); | |||||
| #ifdef __cplusplus | |||||
| }; /* extern "C" */ | |||||
| #endif | |||||
| #endif /* __GOLDI_SHA512_H__ */ | |||||
| @@ -49,7 +49,7 @@ br_is_zero(big_register_t x) { | |||||
| return (big_register_t)(x == (big_register_t)0); | return (big_register_t)(x == (big_register_t)0); | ||||
| } | } | ||||
| #else | #else | ||||
| #error "Todo: constant-time equality on vectorless platforms" | |||||
| #error "TODO: constant-time equality on vectorless platforms" | |||||
| #endif | #endif | ||||
| #endif /* __WORD_H__ */ | #endif /* __WORD_H__ */ | ||||