diff --git a/barrett_field.c b/barrett_field.c deleted file mode 100644 index a27095a..0000000 --- a/barrett_field.c +++ /dev/null @@ -1,269 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#include "barrett_field.h" -#include - -word_t -add_nr_ext_packed( - word_t *out, - const word_t *a, - int nwords_a, - const word_t *c, - int nwords_c, - word_t mask -) { - int i; - dword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - for (; i>= WORD_BITS; - } - return carry; -} - -static __inline__ word_t -add_nr_packed( - word_t *a, - const word_t *c, - int nwords -) { - int i; - dword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - return carry; -} - -static __inline__ word_t -sub_nr_packed( - word_t *a, - const word_t *c, - int nwords -) { - int i; - dsword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - return carry; -} - -word_t -sub_nr_ext_packed( - word_t *out, - const word_t *a, - int nwords_a, - const word_t *c, - int nwords_c, - word_t mask -) { - int i; - dsword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - for (; i>= WORD_BITS; - } - return carry; -} - -static word_t -widemac( - word_t *accum, - int nwords_accum, - const word_t *mier, - int nwords_mier, - word_t mand, - word_t carry -) { - int i; - assert(nwords_accum >= nwords_mier); - - for (i=0; i> WORD_BITS; - } - - for (; i> WORD_BITS; - } - - return carry; -} - -void -barrett_negate ( - word_t *a, - int nwords_a, - const word_t *p_lo, - int nwords_p, - int nwords_lo, - int p_shift -) { - int i; - dsword_t carry = 0; - - barrett_reduce(a,nwords_a,0,p_lo,nwords_p,nwords_lo,p_shift); - - /* Have p = 2^big - p_lo. Want p - a = 2^big - p_lo - a */ - - for (i=0; i>= WORD_BITS; - } - for (; i>= WORD_BITS; - } - } - - a[nwords_p-1] = carry = carry + (((word_t)1) << p_shift); - - for (; i>64)); -} - -void -barrett_reduce( - word_t *a, - int nwords_a, - word_t a_carry, - const word_t *p_lo, - int nwords_p, - int nwords_lo, - int p_shift -) { - /* TODO: non 2^k-c primes. */ - int repeat, nwords_left_in_a=nwords_a; - - /* TODO: is there a point to this a_carry business? */ - assert(a_carry < ((word_t)1)<= nwords_p); - - for (; nwords_left_in_a >= nwords_p; nwords_left_in_a--) { - for (repeat=0; repeat<2; repeat++) { - /* PERF: surely a more careful implementation could - * avoid this double round - */ - word_t mand = a[nwords_left_in_a-1] >> p_shift; - a[nwords_left_in_a-1] &= (((word_t)1)<>p_shift); - a[nwords_p-1] &= (((word_t)1)<= nwords_p); - - /* nwords_tmp = max(nwords_a + 1, nwords_p + 1, nwords_accum if doMac); */ - int nwords_tmp = (nwords_a > nwords_p) ? nwords_a : nwords_p; - nwords_tmp++; - if (nwords_tmp < nwords_accum && doMac) - nwords_tmp = nwords_accum; - - word_t tmp[nwords_tmp]; - int bpos, i; - - for (i=0; i= 0; bpos--) { - /* Invariant at the beginning of the loop: the high word is unused. */ - assert(tmp[nwords_tmp-1] == 0); - - /* shift up */ - for (i=nwords_tmp-2; i>=0; i--) { - tmp[i+1] = tmp[i]; - } - tmp[0] = 0; - - /* mac and reduce */ - word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0); - - /* the mac can't carry, because nwords_tmp >= nwords_a+1 and its high word is clear */ - assert(!carry); - barrett_reduce(tmp, nwords_tmp, carry, p_lo, nwords_p, nwords_lo, p_shift); - - /* at this point, the number of words used is nwords_p <= nwords_tmp-1, - * so the high word is again clear */ - } - - if (doMac) { - word_t cout = add_nr_packed(tmp, accum, nwords_accum); - barrett_reduce(tmp, nwords_tmp, cout, p_lo, nwords_p, nwords_lo, p_shift); - } - - for (i=0; i -#include -#include -#include -#include - -#include "p448.h" -#include "ec_point.h" -#include "scalarmul.h" -#include "barrett_field.h" -#include "crandom.h" -#include "goldilocks.h" -#include "sha512.h" - -word_t q448_lo[4] = { - 0xdc873d6d54a7bb0dull, - 0xde933d8d723a70aaull, - 0x3bb124b65129c96full, - 0x000000008335dc16ull -}; - -double now() { - struct timeval tv; - gettimeofday(&tv, NULL); - - return tv.tv_sec + tv.tv_usec/1000000.0; -} - -void p448_randomize( struct crandom_state_t *crand, struct p448_t *a ) { - crandom_generate(crand, (unsigned char *)a, sizeof(*a)); - p448_strong_reduce(a); -} - -void q448_randomize( struct crandom_state_t *crand, uint64_t sk[7] ) { - crandom_generate(crand, (unsigned char *)sk, sizeof(uint64_t)*7); -} - -void p448_print( const char *descr, const struct p448_t *a ) { - p448_t b; - p448_copy(&b, a); - p448_strong_reduce(&b); - int j; - printf("%s = 0x", descr); - for (j=7; j>=0; j--) { - printf("%014llx", (unsigned long long)b.limb[j]); - } - printf("\n"); -} - -void p448_print_full( const char *descr, const struct p448_t *a ) { - int j; - printf("%s = 0x", descr); - for (j=7; j>=0; j--) { - printf("%02llx_%014llx ", a->limb[j]>>56, (unsigned long long)a->limb[j]&(1ull<<56)-1); - } - printf("\n"); -} - -void q448_print( const char *descr, const uint64_t secret[7] ) { - int j; - printf("%s = 0x", descr); - for (j=6; j>=0; j--) { - printf("%016llx", (unsigned long long)secret[j]); - } - printf("\n"); -} - -int main(int argc, char **argv) { - (void)argc; - (void)argv; - - struct tw_extensible_t ext; - struct extensible_t exta; - struct tw_niels_t niels; - struct tw_pniels_t pniels; - struct affine_t affine; - struct montgomery_t mb; - struct p448_t a,b,c,d; - - - double when; - int i,j; - - /* Bad randomness so we can debug. */ - char initial_seed[32]; - for (i=0; i<32; i++) initial_seed[i] = i; - struct crandom_state_t crand; - crandom_init_from_buffer(&crand, initial_seed); - - uint64_t sk[7],tk[7]; - q448_randomize(&crand, sk); - - when = now(); - for (i=0; i<10000000; i++) { - p448_mul(&c, &b, &a); - } - when = now() - when; - printf("mul: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<10000000; i++) { - p448_sqr(&c, &a); - } - when = now() - when; - printf("sqr: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<5000000; i++) { - p448_mul(&c, &b, &a); - p448_mul(&a, &b, &c); - } - when = now() - when; - printf("mul dep: %5.1fns\n", when * 1e9 / i / 2); - - when = now(); - for (i=0; i<10000000; i++) { - p448_mulw(&c, &b, 1234562); - } - when = now() - when; - printf("mulw: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<100000; i++) { - p448_randomize(&crand, &a); - } - when = now() - when; - printf("rand448: %5.1fns\n", when * 1e9 / i); - - struct sha512_ctx_t sha; - uint8_t hashout[128]; - when = now(); - for (i=0; i<10000; i++) { - sha512_init(&sha); - sha512_final(&sha, hashout); - } - when = now() - when; - printf("sha512 1blk: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<10000; i++) { - sha512_update(&sha, hashout, 128); - } - when = now() - when; - printf("sha512 blk: %5.1fns (%0.2f MB/s)\n", when * 1e9 / i, 128*i/when/1e6); - - when = now(); - for (i=0; i<10000; i++) { - p448_isr(&c, &a); - } - when = now() - when; - printf("isr auto: %5.1fµs\n", when * 1e6 / i); - - for (i=0; i<100; i++) { - p448_randomize(&crand, &a); - p448_isr(&d,&a); - p448_sqr(&b,&d); - p448_mul(&c,&b,&a); - p448_sqr(&b,&c); - p448_subw(&b,1); - p448_bias(&b,1); - if (!p448_is_zero(&b)) { - printf("ISR validation failure!\n"); - p448_print("a", &a); - p448_print("s", &d); - } - } - - when = now(); - for (i=0; i<10000; i++) { - elligator_2s_inject(&affine, &a); - } - when = now() - when; - printf("elligator: %5.1fµs\n", when * 1e6 / i); - - for (i=0; i<100; i++) { - p448_randomize(&crand, &a); - elligator_2s_inject(&affine, &a); - if (!validate_affine(&affine)) { - printf("Elligator validation failure!\n"); - p448_print("a", &a); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - } - } - - when = now(); - for (i=0; i<10000; i++) { - deserialize_affine(&affine, &a); - } - when = now() - when; - printf("decompress: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - serialize_extensible(&a, &exta); - } - when = now() - when; - printf("compress: %5.1fµs\n", when * 1e6 / i); - - int goods = 0; - for (i=0; i<100; i++) { - p448_randomize(&crand, &a); - mask_t good = deserialize_affine(&affine, &a); - if (good & !validate_affine(&affine)) { - printf("Deserialize validation failure!\n"); - p448_print("a", &a); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - } else if (good) { - goods++; - convert_affine_to_extensible(&exta,&affine); - serialize_extensible(&b, &exta); - p448_sub(&c,&b,&a); - p448_bias(&c,2); - if (!p448_is_zero(&c)) { - printf("Reserialize validation failure!\n"); - p448_print("a", &a); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - deserialize_affine(&affine, &b); - p448_print("b", &b); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - printf("\n"); - } - } - } - if (goods=0; j--) { - lsk[j] = random(); - lsk[j] = lsk[j]<<22 ^ random(); - lsk[j] = lsk[j]<<22 ^ random(); - } - } - - when = now(); - for (i=0; i<1000000; i++) { - barrett_reduce(lsk,12,0,q448_lo,7,4,62); - } - when = now() - when; - printf("barrett red: %5.1fns\n", when * 1e9 / i); - // - // when = now(); - // for (i=0; i<100000; i++) { - // barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62); - // } - // when = now() - when; - // printf("barrett mac: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - add_tw_niels_to_tw_extensible(&ext, &niels); - } - when = now() - when; - printf("exti+niels: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - add_tw_pniels_to_tw_extensible(&ext, &pniels); - } - when = now() - when; - printf("exti+pniels: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - double_tw_extensible(&ext); - } - when = now() - when; - printf("exti dbl: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - untwist_and_double(&exta, &ext); - } - when = now() - when; - printf("i->a isog: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - twist_and_double(&ext, &exta); - } - when = now() - when; - printf("a->i isog: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - montgomery_step(&mb); - } - when = now() - when; - printf("monty step: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000; i++) { - p448_montgomery_ladder(&a,&b,sk,448,0); - } - when = now() - when; - printf("full ladder: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - edwards_scalar_multiply(&ext,sk); - } - when = now() - when; - printf("edwards smz: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - edwards_scalar_multiply_vlook(&ext,sk); - untwist_and_double_and_serialize(&a,&ext); - } - when = now() - when; - printf("edwards svl: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt(&ext,sk); - } - when = now() - when; - printf("edwards vtm: %5.1fµs\n", when * 1e6 / i); - - struct tw_niels_t wnaft[1<<6]; - when = now(); - for (i=0; i<1000; i++) { - precompute_for_wnaf(wnaft,&ext,6); - } - when = now() - when; - printf("wnaf6 pre: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,6); - } - when = now() - when; - printf("edwards vt6: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - precompute_for_wnaf(wnaft,&ext,4); - } - when = now() - when; - printf("wnaf4 pre: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,4); - } - when = now() - when; - printf("edwards vt4: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - precompute_for_wnaf(wnaft,&ext,5); - } - when = now() - when; - printf("wnaf5 pre: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,5); - } - when = now() - when; - printf("edwards vt5: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - q448_randomize(&crand, tk); - edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); - } - when = now() - when; - printf("vt vf combo: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - } - when = now() - when; - printf("edwards sm: %5.1fµs\n", when * 1e6 / i); - - struct tw_niels_t table[80] __attribute__((aligned(32))); - - while (1) { - p448_randomize(&crand, &a); - if (deserialize_affine(&affine, &a)) break; - } - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - when = now(); - for (i=0; i<1000; i++) { - precompute_for_combs(table, &ext, 5, 5, 18); - } - when = now() - when; - printf("pre(5,5,18): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - edwards_comb(&ext, sk, table, 5, 5, 18); - } - when = now() - when; - printf("com(5,5,18): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - edwards_comb(&ext, sk, table, 3, 5, 30); - } - when = now() - when; - printf("com(3,5,30): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - edwards_comb(&ext, sk, table, 8, 4, 14); - } - when = now() - when; - printf("com(4,4,28): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - q448_randomize(&crand, sk); - edwards_comb(&ext, sk, table, 5, 5, 18); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - } - when = now() - when; - printf("keygen: %5.1fµs\n", when * 1e6 / i); - - printf("\nGoldilocks:\n"); - - int res = goldilocks_init(); - assert(!res); - - struct goldilocks_public_key_t gpk,hpk; - struct goldilocks_private_key_t gsk,hsk; - - when = now(); - for (i=0; i<10000; i++) { - if (i&1) { - res = goldilocks_keygen(&gsk,&gpk); - } else { - res = goldilocks_keygen(&hsk,&hpk); - } - assert(!res); - } - when = now() - when; - printf("keygen: %5.1fµs\n", when * 1e6 / i); - - uint8_t ss1[64],ss2[64]; - int gres1,gres2; - when = now(); - for (i=0; i<10000; i++) { - if (i&1) { - gres1 = goldilocks_shared_secret(ss1,&gsk,&hpk); - } else { - gres2 = goldilocks_shared_secret(ss2,&hsk,&gpk); - } - } - when = now() - when; - printf("ecdh: %5.1fµs\n", when * 1e6 / i); - if (gres1 || gres2 || memcmp(ss1,ss2,64)) { - printf("[FAIL] %d %d\n",gres1,gres2); - - printf("ss1 = "); - for (i=0; i<56; i++) { - printf("%02x", ss1[i]); - } - printf("\nss2 = "); - for (i=0; i<56; i++) { - printf("%02x", ss2[i]); - } - printf("\n"); - } - - uint8_t sout[56*2]; - const char *message = "hello world"; - uint64_t message_len = strlen(message); - when = now(); - for (i=0; i<10000; i++) { - res = goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); - assert(!res); - } - when = now() - when; - printf("sign: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); - } - when = now() - when; - printf("verify: %5.1fµs\n", when * 1e6 / i); - - printf("\nTesting...\n"); - - - int failures=0, successes = 0; - for (i=0; i<1000; i++) { - (void)goldilocks_keygen(&gsk,&gpk); - goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); - res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); - if (res) failures++; - } - if (failures) { - printf("FAIL %d/%d signature checks!\n", failures, i); - } - - failures=0; successes = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - uint64_t two = 2; - mask_t good = p448_montgomery_ladder(&b,&a,&two,2,0); - if (!good) continue; - - uint64_t x = rand(), y=rand(), z=x*y; - p448_montgomery_ladder(&b,&a,&x,64,0); - p448_montgomery_ladder(&c,&b,&y,64,0); - p448_montgomery_ladder(&b,&a,&z,64,0); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - if (!p448_is_zero(&d)) { - printf("Odd ladder validation failure %d!\n", ++failures); - p448_print("a", &a); - printf("x=%llx, y=%llx, z=%llx\n", x,y,z); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } - } - - failures = 0; - for (i=0; i<1000; i++) { - mask_t good; - do { - p448_randomize(&crand, &a); - good = deserialize_affine(&affine, &a); - } while (!good); - - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - untwist_and_double_and_serialize(&c, &ext); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (good && !p448_is_zero(&d)){ - printf("Iso+serial validation failure %d!\n", ++failures); - p448_print("a", &a); - p448_print("b", &b); - p448_print("c", &c); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("Iso+serial variation: only %d/%d successful.\n", successes, i); - } - - failures = 0; - uint64_t four = 4; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - - mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); - good &= p448_montgomery_ladder(&c,&b,sk,448,0); - - mask_t goodb = deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (good != goodb) { - printf("Compatibility validation failure %d: good: %d != %d\n", ++failures, (int)(-good), (int)(-goodb)); - } else if (good && !p448_is_zero(&d)){ - printf("Compatibility validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("Compatibility variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - if (!i) bzero(&sk, sizeof(sk)); - - mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); - good &= p448_montgomery_ladder(&c,&b,sk,448,0); - if (!good) continue; - - deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - - precompute_for_combs(table, &ext, 5, 5, 18); - edwards_comb(&ext, sk, table, 5, 5, 18); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("Comb validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("Comb variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - if (!i) bzero(&sk, sizeof(sk)); - - mask_t good = deserialize_affine(&affine, &a); - if (!good) continue; - - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - struct tw_extensible_t exu; - copy_tw_extensible(&exu, &ext); - - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - edwards_scalar_multiply_vt(&exu,sk); - untwist_and_double(&exta,&exu); - serialize_extensible(&c, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("WNAF validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("WNAF variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - if (!i) bzero(&sk, sizeof(sk)); - - mask_t good = deserialize_affine(&affine, &a); - if (!good) continue; - - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - struct tw_extensible_t exu; - copy_tw_extensible(&exu, &ext); - - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - precompute_for_wnaf(wnaft,&exu,5); - edwards_scalar_multiply_vt_pre(&exu,sk,wnaft,5); - untwist_and_double(&exta,&exu); - serialize_extensible(&c, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("PreWNAF validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - for (j=0; j<1<<5; j++) { - printf("WNAFT %d\n", j); - p448_print(" a",&wnaft[j].a); - p448_print(" b",&wnaft[j].b); - p448_print(" c",&wnaft[j].c); - } - printf("\n\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("PreWNAF variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - struct p448_t aa; - struct tw_extensible_t exu,exv,exw; - - mask_t good; - do { - p448_randomize(&crand, &a); - good = deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - } while (!good); - do { - p448_randomize(&crand, &aa); - good = deserialize_affine(&affine, &aa); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&exu,&exta); - } while (!good); - p448_randomize(&crand, &aa); - - q448_randomize(&crand, sk); - if (i==0 || i==2) bzero(&sk, sizeof(sk)); - q448_randomize(&crand, tk); - if (i==0 || i==1) bzero(&tk, sizeof(tk)); - - copy_tw_extensible(&exv, &ext); - copy_tw_extensible(&exw, &exu); - edwards_scalar_multiply(&exv,sk); - edwards_scalar_multiply(&exw,tk); - convert_tw_extensible_to_tw_pniels(&pniels, &exw); - add_tw_pniels_to_tw_extensible(&exv,&pniels); - untwist_and_double(&exta,&exv); - serialize_extensible(&b, &exta); - - precompute_for_wnaf(wnaft,&exu,5); - edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); - untwist_and_double(&exta,&exv); - serialize_extensible(&c, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("PreWNAF combo validation failure %d!\n", ++failures); - p448_print("a", &a); - p448_print("A", &aa); - q448_print("s", sk); - q448_print("t", tk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n\n"); - } else if (good) { - successes ++; - } - } - if (successes < i) { - printf("PreWNAF combo variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - - q448_randomize(&crand, sk); - q448_randomize(&crand, tk); - - uint64_t two = 2; - mask_t good = p448_montgomery_ladder(&b,&a,&two,2,0); - p448_montgomery_ladder(&b,&a,sk,448,0); - p448_montgomery_ladder(&d,&b,tk,448,0); - p448_montgomery_ladder(&b,&a,tk,448,0); - p448_montgomery_ladder(&c,&b,sk,448,0); - - p448_sub(&b,&c,&d); - p448_bias(&b,2); - - mask_t success = p448_is_zero(&b) | ~good; - - if (!success) { - printf("Ladder validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - q448_print("t", tk); - p448_print("c", &c); - p448_print("d", &d); - printf("\n"); - } - } - - return 0; -} diff --git a/crandom.c b/crandom.c deleted file mode 100644 index 468b226..0000000 --- a/crandom.c +++ /dev/null @@ -1,442 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/* Chacha random number generator code copied from crandom */ - -#include "intrinsics.h" -#include "crandom.h" -#include - -volatile unsigned int crandom_features = 0; - -unsigned int crandom_detect_features() { - unsigned int out = GEN; - -# if (defined(__i386__) || defined(__x86_64__)) - u_int32_t a,b,c,d; - - a=1; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); - out |= GEN; - if (d & 1<<26) out |= SSE2; - if (d & 1<< 9) out |= SSSE3; - if (c & 1<<25) out |= AESNI; - if (c & 1<<28) out |= AVX; - if (b & 1<<5) out |= AVX2; - - a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); - if (c & 1<<11) out |= XOP; - if (c & 1<<30) out |= RDRAND; -# endif - - return out; -} - - - -INTRINSIC u_int64_t rdrand(int abort_on_fail) { - uint64_t out = 0; - int tries = 1000; - - if (HAVE(RDRAND)) { - # if defined(__x86_64__) - u_int64_t out, a=0; - for (; tries && !a; tries--) { - __asm__ __volatile__ ( - "rdrand %0\n\tsetc %%al" - : "=r"(out), "+a"(a) :: "cc" - ); - } - # elif (defined(__i386__)) - u_int32_t reg, a=0; - uint64_t out; - for (; tries && !a; tries--) { - __asm__ __volatile__ ( - "rdrand %0\n\tsetc %%al" - : "=r"(reg), "+a"(a) :: "cc" - ); - } - out = reg; a = 0; - for (; tries && !a; tries--) { - __asm__ __volatile__ ( - "rdrand %0\n\tsetc %%al" - : "=r"(reg), "+a"(a) :: "cc" - ); - } - out = out << 32 | reg; - return out; - # else - abort(); // whut - # endif - } else { - tries = 0; - } - - if (abort_on_fail && !tries) { - abort(); - } - - return out; -} - - -/* ------------------------------- Vectorized code ------------------------------- */ -#define shuffle(x,i) _mm_shuffle_epi32(x, \ - i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64) - -#define add _mm_add_epi32 -#define add64 _mm_add_epi64 - -#define NEED_XOP (MIGHT_HAVE(XOP)) -#define NEED_SSSE3 (MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP)) -#define NEED_SSE2 (MIGHT_HAVE(SSE2) && !MUST_HAVE(SSSE3)) -#define NEED_CONV (!MUST_HAVE(SSE2)) - -#if NEED_XOP -static __inline__ void -quarter_round_xop( - ssereg *a, - ssereg *b, - ssereg *c, - ssereg *d -) { - *a = add(*a,*b); *d = xop_rotate(16, *d ^ *a); - *c = add(*c,*d); *b = xop_rotate(12, *b ^ *c); - *a = add(*a,*b); *d = xop_rotate(8, *d ^ *a); - *c = add(*c,*d); *b = xop_rotate(7, *b ^ *c); -} -#endif - -#if NEED_SSSE3 -static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull }; -static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull }; - -INTRINSIC ssereg ssse3_rotate_8(ssereg a) { - return _mm_shuffle_epi8(a, shuffle8); -} - -INTRINSIC ssereg ssse3_rotate_16(ssereg a) { - return _mm_shuffle_epi8(a, shuffle16); -} - -static __inline__ void -quarter_round_ssse3( - ssereg *a, - ssereg *b, - ssereg *c, - ssereg *d -) { - *a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); - *a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); -} -#endif /* MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) */ - -#if NEED_SSE2 -static __inline__ void -quarter_round_sse2( - ssereg *a, - ssereg *b, - ssereg *c, - ssereg *d -) { - *a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); - *a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); -} -#endif - -#define DOUBLE_ROUND(qrf) { \ - qrf(&a1,&b1,&c1,&d1); \ - qrf(&a2,&b2,&c2,&d2); \ - b1 = shuffle(b1,1); \ - c1 = shuffle(c1,2); \ - d1 = shuffle(d1,3); \ - b2 = shuffle(b2,1); \ - c2 = shuffle(c2,2); \ - d2 = shuffle(d2,3); \ - \ - qrf(&a1,&b1,&c1,&d1); \ - qrf(&a2,&b2,&c2,&d2); \ - b1 = shuffle(b1,3); \ - c1 = shuffle(c1,2); \ - d1 = shuffle(d1,1); \ - b2 = shuffle(b2,3); \ - c2 = shuffle(c2,2); \ - d2 = shuffle(d2,1); \ - } - -#define OUTPUT_FUNCTION { \ - output[0] = add(a1,aa); \ - output[1] = add(b1,bb); \ - output[2] = add(c1,cc); \ - output[3] = add(d1,dd); \ - output[4] = add(a2,aa); \ - output[5] = add(b2,bb); \ - output[6] = add(c2,add(cc,p)); \ - output[7] = add(d2,dd); \ - \ - output += 8; \ - \ - cc = add64(add64(cc,p), p); \ - a1 = a2 = aa; \ - b1 = b2 = bb; \ - c1 = cc; c2 = add64(cc,p);\ - d1 = d2 = dd; \ - } -/* ------------------------------------------------------------------------------- */ - -INTRINSIC u_int32_t rotate(int r, u_int32_t a) { - return a<>(32-r); -} - -static __inline__ void -quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) { - *a = *a + *b; *d = rotate(16, *d^*a); - *c = *c + *d; *b = rotate(12, *b^*c); - *a = *a + *b; *d = rotate(8, *d^*a); - *c = *c + *d; *b = rotate(7, *b^*c); -} - -static void -crandom_chacha_expand(u_int64_t iv, - u_int64_t ctr, - int nr, - int output_size, - const unsigned char *key_, - unsigned char *output_) { -# if MIGHT_HAVE_SSE2 - if (HAVE(SSE2)) { - ssereg *key = (ssereg *)key_; - ssereg *output = (ssereg *)output_; - - ssereg a1 = key[0], a2 = a1, aa = a1, - b1 = key[1], b2 = b1, bb = b1, - c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1, - d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull}, - d2 = d1, dd = d1, - p = {0, 1}; - - int i,r; -# if (NEED_XOP) - if (HAVE(XOP)) { - for (i=0; i0; r-=2) - DOUBLE_ROUND(quarter_round_xop); - OUTPUT_FUNCTION; - } - return; - } -# endif -# if (NEED_SSSE3) - if (HAVE(SSSE3)) { - for (i=0; i0; r-=2) - DOUBLE_ROUND(quarter_round_ssse3); - OUTPUT_FUNCTION; - } - return; - } -# endif -# if (NEED_SSE2) - if (HAVE(SSE2)) { - for (i=0; i0; r-=2) - DOUBLE_ROUND(quarter_round_sse2); - OUTPUT_FUNCTION; - } - return; - } -# endif - } -# endif - -# if NEED_CONV - { - const u_int32_t *key = (const u_int32_t *)key_; - u_int32_t - x[16], - input[16] = { - key[0], key[1], key[2], key[3], - key[4], key[5], key[6], key[7], - iv, iv>>32, ctr, ctr>>32, - 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 - }, - *output = (u_int32_t *)output_; - int i, r; - - for (i=0; i0; r-=2) { - quarter_round(&x[0], &x[4], &x[8], &x[12]); - quarter_round(&x[1], &x[5], &x[9], &x[13]); - quarter_round(&x[2], &x[6], &x[10], &x[14]); - quarter_round(&x[3], &x[7], &x[11], &x[15]); - - quarter_round(&x[0], &x[5], &x[10], &x[15]); - quarter_round(&x[1], &x[6], &x[11], &x[12]); - quarter_round(&x[2], &x[7], &x[8], &x[13]); - quarter_round(&x[3], &x[4], &x[9], &x[14]); - } - for (r=0; r<16; r++) { - output[r] = x[r] + input[r]; - } - - output += 16; - input[11] ++; - if (!input[11]) input[12]++; - } - } - -#endif /* NEED_CONV */ -} - -/* "return 4", cf xkcd #221 */ -#define CRANDOM_MAGIC 0x72657475726e2034ull - -int -crandom_init_from_file( - struct crandom_state_t *state, - const char *filename, - int reseed_interval, - int reseeds_mandatory -) { - state->fill = 0; - state->reseed_countdown = reseed_interval; - state->reseed_interval = reseed_interval; - state->ctr = 0; - - state->randomfd = open(filename, O_RDONLY); - if (state->randomfd == -1) { - int err = errno; - return err ? err : -1; - } - - ssize_t offset = 0, red; - do { - red = read(state->randomfd, state->seed + offset, 32 - offset); - if (red > 0) offset += red; - } while (red > 0 && offset < 32); - - if (offset < 32) { - int err = errno; - return err ? err : -1; - } - - memset(state->buffer, 0, 96); - - state->magic = CRANDOM_MAGIC; - state->reseeds_mandatory = reseeds_mandatory; - - return 0; -} - -void -crandom_init_from_buffer( - struct crandom_state_t *state, - const char initial_seed[32] -) { - memcpy(state->seed, initial_seed, 32); - memset(state->buffer, 0, 96); - state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0; - state->randomfd = -1; - state->magic = CRANDOM_MAGIC; -} - -int -crandom_generate( - struct crandom_state_t *state, - unsigned char *output, - unsigned long long length -) { - /* the generator isn't seeded; maybe they ignored the return value of init_from_file */ - if (unlikely(state->magic != CRANDOM_MAGIC)) { - abort(); - } - - int ret = 0; - - while (length) { - if (unlikely(state->fill <= 0)) { - uint64_t iv = 0; - if (state->reseed_interval) { - /* it's nondeterministic, stir in some rdrand() or rdtsc() */ - if (HAVE(RDRAND)) { - iv = rdrand(0); - if (!iv) iv = rdtsc(); - } else { - iv = rdtsc(); - } - - state->reseed_countdown--; - if (unlikely(state->reseed_countdown <= 0)) { - /* reseed by xoring in random state */ - state->reseed_countdown = state->reseed_interval; - ssize_t offset = 0, red; - do { - red = read(state->randomfd, state->buffer + offset, 32 - offset); - if (red > 0) offset += red; - } while (red > 0 && offset < 32); - - if (offset < 32) { - /* The read failed. Signal an error with the return code. - * - * If reseeds are mandatory, crash. - * - * If not, the generator is still probably safe to use, because reseeding - * is basically over-engineering for caution. Also, the user might ignore - * the return code, so we still need to fill the request. - * - * Set reseed_countdown = 1 so we'll try again later. If the user's - * performance sucks as a result of ignoring the error code while calling - * us in a loop, well, that's life. - */ - if (state->reseeds_mandatory) { - abort(); - } - - ret = errno; - if (ret == 0) ret = -1; - state->reseed_countdown = 1; - } - - int i; - for (i=0; i<32; i++) { - /* Stir in the buffer. If somehow the read failed, it'll be zeros. */ - state->seed[i] ^= state->buffer[i]; - } - } - } - crandom_chacha_expand(iv,state->ctr,20,128,state->seed,state->seed); - state->ctr++; - state->fill = sizeof(state->buffer); - } - - unsigned long long copy = (length > state->fill) ? state->fill : length; - state->fill -= copy; - memcpy(output, state->buffer + state->fill, copy); - memset(state->buffer + state->fill, 0, copy); - output += copy; length -= copy; - } - - return ret; -} - -void -crandom_destroy( - struct crandom_state_t *state -) { - if (state->magic == CRANDOM_MAGIC && state->randomfd) { - (void) close(state->randomfd); - /* Ignore the return value from close(), because what would it mean? - * "Your random device, which you were reading over NFS, lost some data"? - */ - } - - memset(state, 0, sizeof(*state)); -} diff --git a/crandom.h b/crandom.h deleted file mode 100644 index f603f13..0000000 --- a/crandom.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** - * @file crandom.h - * @author Mike Hamburg - * @brief A miniature version of the (as of yet incomplete) crandom project. - */ - -#ifndef __GOLDI_CRANDOM_H__ -#define __GOLDI_CRANDOM_H__ 1 - -#include /* for uint64_t */ -#include /* for open */ -#include /* for returning errors after open */ -#include /* for abort */ -#include /* for memcpy */ -#include /* for bzero */ -#include /* for read */ - -/** - * @brief The state of a crandom generator. - * - * This object is opaque. It is not protected by a lock, and so must - * not be accessed by multiple threads at the same time. - */ -struct crandom_state_t { - /** @privatesection */ - unsigned char seed[32]; - unsigned char buffer[96]; - uint64_t ctr; - uint64_t magic; - unsigned int fill; - int reseed_countdown; - int reseed_interval; - int reseeds_mandatory; - int randomfd; -} __attribute__((aligned(16))) ; - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Initialize a crandom state from the chosen file. - * - * This function initializes a state from a given state file, or - * from a random device (eg. /dev/random or /dev/urandom). - * - * You must check the return value of this function. - * - * @param [out] state The crandom state variable to initalize. - * @param [in] filename The name of the seed file or random device. - * @param [in] reseed_interval The number of 96-byte blocks which can be - * generated without reseeding. Suggest 10000. - * @param [in] reseeds_mandatory If nonzero, call abort() if a reseed fails. - * Suggest 1. - * - * @retval 0 Success. - * @retval Nonzero An error to be interpreted by strerror(). - */ -int -crandom_init_from_file ( - struct crandom_state_t *state, - const char *filename, - int reseed_interval, - int reseeds_mandatory -) __attribute__((warn_unused_result)); - - -/** - * Initialize a crandom state from a buffer, for deterministic operation. - * - * This function is used to initialize a crandom state deterministically, - * mainly for testing purposes. It can also be used to expand a secret - * random value deterministically. - * - * @warning The crandom implementation is not guaranteed to be stable. - * That is, a later release might produce a different random stream from - * the same seed. - * - * @param [out] state The crandom state variable to initalize. - * @param [in] initial_seed The seed value. - */ -void -crandom_init_from_buffer ( - struct crandom_state_t *state, - const char initial_seed[32] -); - -/** - * Fill the output buffer with random data. - * - * This function uses the given crandom state to produce pseudorandom data - * in the output buffer. - * - * This function may perform reads from the state's random device if it needs - * to reseed. This could block if that file is a blocking source, such as - * a pipe or /dev/random on Linux. If reseeding fails and the state has - * reseeds_mandatory set, this function will call abort(). Otherwise, it will - * return an error code, but it will still randomize the buffer. - * - * If called on a corrupted, uninitialized or destroyed state, this function - * will abort(). - * - * @warning This function is not thread-safe with respect to the state. Don't - * call it from multiple threads with the same state at the same time. - * - * @param [inout] state The crandom state to use for generation. - * @param [out] output The buffer to fill with random data. - * @param [in] length The length of the buffer. - * - * @retval 0 Success. - * @retval Nonezero A non-mandatory reseed operation failed. - */ -int -crandom_generate ( - struct crandom_state_t *state, - unsigned char *output, - unsigned long long length -); - -/** - * Destroy the random state. Further calls to crandom_generate() on that state - * will abort(). - * - * @param [inout] state The state to be destroyed. - */ -void -crandom_destroy ( - struct crandom_state_t *state -); - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __GOLDI_CRANDOM_H__ */ diff --git a/ec_point.c b/ec_point.c deleted file mode 100644 index 5c2d3ab..0000000 --- a/ec_point.c +++ /dev/null @@ -1,745 +0,0 @@ -/** - * @cond internal - * @file ec_point.c - * @copyright - * Copyright (c) 2014 Cryptography Research, Inc. \n - * Released under the MIT License. See LICENSE.txt for license information. - * @author Mike Hamburg - * @warning This file was automatically generated. - */ - -#include "ec_point.h" - - -void -p448_isr ( - struct p448_t* a, - const struct p448_t* x -) { - struct p448_t L0, L1, L2; - p448_sqr ( &L1, x ); - p448_mul ( &L2, x, &L1 ); - p448_sqr ( &L1, &L2 ); - p448_mul ( &L2, x, &L1 ); - p448_sqrn ( &L1, &L2, 3 ); - p448_mul ( &L0, &L2, &L1 ); - p448_sqrn ( &L1, &L0, 3 ); - p448_mul ( &L0, &L2, &L1 ); - p448_sqrn ( &L2, &L0, 9 ); - p448_mul ( &L1, &L0, &L2 ); - p448_sqr ( &L0, &L1 ); - p448_mul ( &L2, x, &L0 ); - p448_sqrn ( &L0, &L2, 18 ); - p448_mul ( &L2, &L1, &L0 ); - p448_sqrn ( &L0, &L2, 37 ); - p448_mul ( &L1, &L2, &L0 ); - p448_sqrn ( &L0, &L1, 37 ); - p448_mul ( &L1, &L2, &L0 ); - p448_sqrn ( &L0, &L1, 111 ); - p448_mul ( &L2, &L1, &L0 ); - p448_sqr ( &L0, &L2 ); - p448_mul ( &L1, x, &L0 ); - p448_sqrn ( &L0, &L1, 223 ); - p448_mul ( a, &L2, &L0 ); -} - -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -) { - struct p448_t L0, L1; - p448_isr ( &L0, x ); - p448_sqr ( &L1, &L0 ); - p448_sqr ( &L0, &L1 ); - p448_mul ( a, x, &L0 ); -} - -void -add_tw_niels_to_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -) { - struct p448_t L0, L1; - p448_bias ( &d->y, 2 ); - p448_bias ( &d->z, 2 ); - p448_sub ( &L1, &d->y, &d->x ); - p448_mul ( &L0, &e->a, &L1 ); - p448_add ( &L1, &d->x, &d->y ); - p448_mul ( &d->y, &e->b, &L1 ); - p448_bias ( &d->y, 2 ); - p448_mul ( &L1, &d->u, &d->t ); - p448_mul ( &d->x, &e->c, &L1 ); - p448_add ( &d->u, &L0, &d->y ); - p448_sub ( &d->t, &d->y, &L0 ); - p448_sub ( &d->y, &d->z, &d->x ); - p448_add ( &L0, &d->x, &d->z ); - p448_mul ( &d->z, &L0, &d->y ); - p448_mul ( &d->x, &d->y, &d->t ); - p448_mul ( &d->y, &L0, &d->u ); -} - -void -sub_tw_niels_from_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -) { - struct p448_t L0, L1; - p448_bias ( &d->y, 2 ); - p448_bias ( &d->z, 2 ); - p448_sub ( &L1, &d->y, &d->x ); - p448_mul ( &L0, &e->b, &L1 ); - p448_add ( &L1, &d->x, &d->y ); - p448_mul ( &d->y, &e->a, &L1 ); - p448_bias ( &d->y, 2 ); - p448_mul ( &L1, &d->u, &d->t ); - p448_mul ( &d->x, &e->c, &L1 ); - p448_add ( &d->u, &L0, &d->y ); - p448_sub ( &d->t, &d->y, &L0 ); - p448_add ( &d->y, &d->x, &d->z ); - p448_sub ( &L0, &d->z, &d->x ); - p448_mul ( &d->z, &L0, &d->y ); - p448_mul ( &d->x, &d->y, &d->t ); - p448_mul ( &d->y, &L0, &d->u ); -} - -void -add_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -) { - struct p448_t L0; - p448_mul ( &L0, &e->z, &a->z ); - p448_copy ( &e->z, &L0 ); - add_tw_niels_to_tw_extensible( e, &a->n ); -} - -void -sub_tw_pniels_from_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -) { - struct p448_t L0; - p448_mul ( &L0, &e->z, &a->z ); - p448_copy ( &e->z, &L0 ); - sub_tw_niels_from_tw_extensible( e, &a->n ); -} - -void -double_tw_extensible ( - struct tw_extensible_t* a -) { - struct p448_t L0, L1, L2; - p448_sqr ( &L2, &a->x ); - p448_sqr ( &L0, &a->y ); - p448_add ( &a->u, &L2, &L0 ); - p448_add ( &a->t, &a->y, &a->x ); - p448_sqr ( &L1, &a->t ); - p448_bias ( &L1, 3 ); - p448_sub ( &a->t, &L1, &a->u ); - p448_sub ( &L1, &L0, &L2 ); - p448_bias ( &L1, 2 ); - p448_sqr ( &a->x, &a->z ); - p448_bias ( &a->x, 2 ); - p448_add ( &a->z, &a->x, &a->x ); - p448_sub ( &L0, &a->z, &L1 ); - p448_mul ( &a->z, &L1, &L0 ); - p448_mul ( &a->x, &L0, &a->t ); - p448_mul ( &a->y, &L1, &a->u ); -} - -void -double_extensible ( - struct extensible_t* a -) { - struct p448_t L0, L1, L2; - p448_sqr ( &L2, &a->x ); - p448_sqr ( &L0, &a->y ); - p448_add ( &L1, &L2, &L0 ); - p448_add ( &a->t, &a->y, &a->x ); - p448_sqr ( &a->u, &a->t ); - p448_bias ( &a->u, 3 ); - p448_sub ( &a->t, &a->u, &L1 ); - p448_sub ( &a->u, &L0, &L2 ); - p448_bias ( &a->u, 2 ); - p448_sqr ( &a->x, &a->z ); - p448_bias ( &a->x, 2 ); - p448_add ( &a->z, &a->x, &a->x ); - p448_sub ( &L0, &a->z, &L1 ); - p448_mul ( &a->z, &L1, &L0 ); - p448_mul ( &a->x, &L0, &a->t ); - p448_mul ( &a->y, &L1, &a->u ); -} - -void -twist_and_double ( - struct tw_extensible_t* b, - const struct extensible_t* a -) { - struct p448_t L0; - p448_sqr ( &b->x, &a->x ); - p448_sqr ( &b->z, &a->y ); - p448_add ( &b->u, &b->x, &b->z ); - p448_add ( &b->t, &a->y, &a->x ); - p448_sqr ( &L0, &b->t ); - p448_bias ( &L0, 3 ); - p448_sub ( &b->t, &L0, &b->u ); - p448_sub ( &L0, &b->z, &b->x ); - p448_bias ( &L0, 2 ); - p448_sqr ( &b->x, &a->z ); - p448_bias ( &b->x, 2 ); - p448_add ( &b->z, &b->x, &b->x ); - p448_sub ( &b->y, &b->z, &b->u ); - p448_mul ( &b->z, &L0, &b->y ); - p448_mul ( &b->x, &b->y, &b->t ); - p448_mul ( &b->y, &L0, &b->u ); -} - -void -untwist_and_double ( - struct extensible_t* b, - const struct tw_extensible_t* a -) { - struct p448_t L0; - p448_sqr ( &b->x, &a->x ); - p448_sqr ( &b->z, &a->y ); - p448_add ( &L0, &b->x, &b->z ); - p448_add ( &b->t, &a->y, &a->x ); - p448_sqr ( &b->u, &b->t ); - p448_bias ( &b->u, 3 ); - p448_sub ( &b->t, &b->u, &L0 ); - p448_sub ( &b->u, &b->z, &b->x ); - p448_bias ( &b->u, 2 ); - p448_sqr ( &b->x, &a->z ); - p448_bias ( &b->x, 2 ); - p448_add ( &b->z, &b->x, &b->x ); - p448_sub ( &b->y, &b->z, &b->u ); - p448_mul ( &b->z, &L0, &b->y ); - p448_mul ( &b->x, &b->y, &b->t ); - p448_mul ( &b->y, &L0, &b->u ); -} - -void -convert_tw_affine_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_affine_t* a -) { - p448_sub ( &b->n.a, &a->y, &a->x ); - p448_bias ( &b->n.a, 2 ); - p448_weak_reduce( &b->n.a ); - p448_add ( &b->n.b, &a->x, &a->y ); - p448_weak_reduce( &b->n.b ); - p448_mul ( &b->n.c, &a->y, &a->x ); - p448_mulw ( &b->z, &b->n.c, 78164 ); - p448_neg ( &b->n.c, &b->z ); - p448_bias ( &b->n.c, 2 ); - p448_weak_reduce( &b->n.c ); - p448_set_ui( &b->z, 2 ); -} - -void -convert_tw_affine_to_tw_extensible ( - struct tw_extensible_t* b, - const struct tw_affine_t* a -) { - p448_copy ( &b->x, &a->x ); - p448_copy ( &b->y, &a->y ); - p448_set_ui( &b->z, 1 ); - p448_copy ( &b->t, &a->x ); - p448_copy ( &b->u, &a->y ); -} - -void -convert_affine_to_extensible ( - struct extensible_t* b, - const struct affine_t* a -) { - p448_copy ( &b->x, &a->x ); - p448_copy ( &b->y, &a->y ); - p448_set_ui( &b->z, 1 ); - p448_copy ( &b->t, &a->x ); - p448_copy ( &b->u, &a->y ); -} - -void -convert_tw_extensible_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_extensible_t* a -) { - p448_sub ( &b->n.a, &a->y, &a->x ); - p448_bias ( &b->n.a, 2 ); - p448_weak_reduce( &b->n.a ); - p448_add ( &b->n.b, &a->x, &a->y ); - p448_weak_reduce( &b->n.b ); - p448_mul ( &b->n.c, &a->u, &a->t ); - p448_mulw ( &b->z, &b->n.c, 78164 ); - p448_neg ( &b->n.c, &b->z ); - p448_bias ( &b->n.c, 2 ); - p448_weak_reduce( &b->n.c ); - p448_add ( &b->z, &a->z, &a->z ); - p448_weak_reduce( &b->z ); -} - -void -convert_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* d -) { - p448_add ( &e->u, &d->n.b, &d->n.a ); - p448_sub ( &e->t, &d->n.b, &d->n.a ); - p448_bias ( &e->t, 2 ); - p448_mul ( &e->x, &d->z, &e->t ); - p448_mul ( &e->y, &d->z, &e->u ); - p448_sqr ( &e->z, &d->z ); -} - -void -convert_tw_niels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_niels_t* d -) { - p448_add ( &e->y, &d->b, &d->a ); - p448_weak_reduce( &e->y ); - p448_sub ( &e->x, &d->b, &d->a ); - p448_bias ( &e->x, 2 ); - p448_weak_reduce( &e->x ); - p448_set_ui( &e->z, 1 ); - p448_copy ( &e->t, &e->x ); - p448_copy ( &e->u, &e->y ); -} - -void -montgomery_step ( - struct montgomery_t* a -) { - struct p448_t L0, L1; - p448_bias ( &a->xd, 2 ); - p448_bias ( &a->xa, 2 ); - p448_add ( &L0, &a->zd, &a->xd ); - p448_sub ( &L1, &a->xd, &a->zd ); - p448_sub ( &a->zd, &a->xa, &a->za ); - p448_mul ( &a->xd, &L0, &a->zd ); - p448_bias ( &a->xd, 2 ); - p448_add ( &a->zd, &a->za, &a->xa ); - p448_mul ( &a->za, &L1, &a->zd ); - p448_add ( &a->xa, &a->za, &a->xd ); - p448_sqr ( &a->zd, &a->xa ); - p448_mul ( &a->xa, &a->z0, &a->zd ); - p448_sub ( &a->zd, &a->xd, &a->za ); - p448_sqr ( &a->za, &a->zd ); - p448_sqr ( &a->xd, &L0 ); - p448_bias ( &a->xd, 2 ); - p448_sqr ( &L0, &L1 ); - p448_mulw ( &a->zd, &a->xd, 39082 ); - p448_bias ( &a->zd, 4 ); - p448_sub ( &L1, &a->xd, &L0 ); - p448_mul ( &a->xd, &L0, &a->zd ); - p448_sub ( &L0, &a->zd, &L1 ); - p448_mul ( &a->zd, &L0, &L1 ); -} - -void -serialize_montgomery ( - struct p448_t* sign, - struct p448_t* ser, - const struct montgomery_t* a, - const struct p448_t* sbz -) { - struct p448_t L0, L1, L2, L3; - p448_mul ( &L2, &a->z0, &a->zd ); - p448_bias ( &L2, 2 ); - p448_sub ( &L0, &L2, &a->xd ); - p448_mul ( &L2, &a->za, &L0 ); - p448_bias ( &L2, 2 ); - p448_mul ( &L1, &a->z0, &a->xd ); - p448_bias ( &L1, 2 ); - p448_sub ( &L0, &L1, &a->zd ); - p448_mul ( &L3, &a->xa, &L0 ); - p448_add ( &L1, &L3, &L2 ); - p448_sub ( &L0, &L2, &L3 ); - p448_mul ( &L2, &L0, &L1 ); - p448_mul ( &L0, sbz, &L2 ); - p448_mul ( &L2, &a->zd, &L0 ); - p448_mul ( sign, &L2, &a->zd ); - p448_mul ( ser, &L2, &a->xd ); - p448_mul ( &L2, sign, ser ); - p448_isr ( &L1, &L2 ); - p448_mul ( ser, sign, &L1 ); - p448_sqr ( &L0, &L1 ); - p448_mul ( sign, &L2, &L0 ); -} - -void -serialize_extensible ( - struct p448_t* b, - const struct extensible_t* a -) { - struct p448_t L0, L1, L2; - p448_sub ( &L0, &a->y, &a->z ); - p448_bias ( &L0, 2 ); - p448_add ( b, &a->z, &a->y ); - p448_mul ( &L1, &a->z, &a->x ); - p448_mul ( &L2, &L0, &L1 ); - p448_mul ( &L1, &L2, &L0 ); - p448_mul ( &L0, &L2, b ); - p448_mul ( &L2, &L1, &L0 ); - p448_isr ( &L0, &L2 ); - p448_mul ( b, &L1, &L0 ); - p448_sqr ( &L1, &L0 ); - p448_mul ( &L0, &L2, &L1 ); -} - -void -untwist_and_double_and_serialize ( - struct p448_t* b, - const struct tw_extensible_t* a -) { - struct p448_t L0, L1, L2, L3; - p448_mul ( &L3, &a->y, &a->x ); - p448_add ( b, &a->y, &a->x ); - p448_sqr ( &L1, b ); - p448_add ( &L2, &L3, &L3 ); - p448_sub ( b, &L1, &L2 ); - p448_bias ( b, 3 ); - p448_sqr ( &L2, &a->z ); - p448_sqr ( &L1, &L2 ); - p448_add ( &L2, b, b ); - p448_mulw ( b, &L2, 39082 ); - p448_neg ( &L2, b ); - p448_bias ( &L2, 2 ); - p448_mulw ( &L0, &L2, 39082 ); - p448_neg ( b, &L0 ); - p448_bias ( b, 2 ); - p448_mul ( &L0, &L2, &L1 ); - p448_mul ( &L2, b, &L0 ); - p448_isr ( &L0, &L2 ); - p448_mul ( &L1, b, &L0 ); - p448_sqr ( b, &L0 ); - p448_mul ( &L0, &L2, b ); - p448_mul ( b, &L1, &L3 ); -} - -void -twist ( - struct tw_extensible_t* b, - const struct extensible_t* a -) { - mask_t L0, L1; - p448_sqr ( &b->y, &a->z ); - p448_sqr ( &b->z, &a->x ); - p448_sub ( &b->u, &b->y, &b->z ); - p448_bias ( &b->u, 2 ); - p448_sub ( &b->z, &a->z, &a->x ); - p448_bias ( &b->z, 2 ); - p448_mul ( &b->y, &b->z, &a->y ); - p448_sub ( &b->z, &a->z, &a->y ); - p448_bias ( &b->z, 2 ); - p448_mul ( &b->x, &b->z, &b->y ); - p448_mul ( &b->t, &b->x, &b->u ); - p448_mul ( &b->y, &b->x, &b->t ); - p448_isr ( &b->t, &b->y ); - p448_mul ( &b->u, &b->x, &b->t ); - p448_sqr ( &b->x, &b->t ); - p448_mul ( &b->t, &b->y, &b->x ); - p448_mul ( &b->x, &a->x, &b->u ); - p448_mul ( &b->y, &a->y, &b->u ); - L1 = p448_is_zero( &b->z ); - L0 = - L1; - p448_addw ( &b->y, L0 ); - p448_weak_reduce( &b->y ); - p448_set_ui( &b->z, 1 ); - p448_copy ( &b->t, &b->x ); - p448_copy ( &b->u, &b->y ); -} - -mask_t -deserialize_affine ( - struct affine_t* a, - const struct p448_t* sz -) { - struct p448_t L0, L1, L2, L3; - p448_sqr ( &L1, sz ); - p448_copy ( &L3, &L1 ); - p448_addw ( &L3, 1 ); - p448_sqr ( &a->x, &L3 ); - p448_mulw ( &L3, &a->x, 39082 ); - p448_neg ( &a->x, &L3 ); - p448_add ( &L3, &L1, &L1 ); - p448_bias ( &L3, 1 ); - p448_add ( &a->y, &L3, &L3 ); - p448_add ( &L3, &a->y, &a->x ); - p448_copy ( &a->y, &L1 ); - p448_subw ( &a->y, 1 ); - p448_neg ( &a->x, &a->y ); - p448_bias ( &a->x, 2 ); - p448_mul ( &a->y, &a->x, &L3 ); - p448_sqr ( &L2, &a->x ); - p448_mul ( &L0, &L2, &a->y ); - p448_mul ( &a->y, &a->x, &L0 ); - p448_isr ( &L3, &a->y ); - p448_mul ( &a->y, &L2, &L3 ); - p448_sqr ( &L2, &L3 ); - p448_mul ( &L3, &L0, &L2 ); - p448_mul ( &L0, &a->x, &L3 ); - p448_bias ( &L0, 1 ); - p448_add ( &L2, &a->y, &a->y ); - p448_mul ( &a->x, sz, &L2 ); - p448_addw ( &L1, 1 ); - p448_mul ( &a->y, &L1, &L3 ); - p448_subw ( &L0, 1 ); - return p448_is_zero( &L0 ); -} - -mask_t -deserialize_and_twist_approx ( - struct tw_extensible_t* a, - const struct p448_t* sdm1, - const struct p448_t* sz -) { - struct p448_t L0, L1; - p448_sqr ( &a->z, sz ); - p448_copy ( &a->y, &a->z ); - p448_addw ( &a->y, 1 ); - p448_sqr ( &a->x, &a->y ); - p448_mulw ( &a->y, &a->x, 39082 ); - p448_neg ( &a->x, &a->y ); - p448_add ( &a->y, &a->z, &a->z ); - p448_bias ( &a->y, 1 ); - p448_add ( &a->u, &a->y, &a->y ); - p448_add ( &a->y, &a->u, &a->x ); - p448_sqr ( &a->x, &a->z ); - p448_subw ( &a->x, 1 ); - p448_neg ( &a->u, &a->x ); - p448_bias ( &a->u, 2 ); - p448_mul ( &a->x, sdm1, &a->u ); - p448_mul ( &L0, &a->x, &a->y ); - p448_mul ( &a->t, &L0, &a->y ); - p448_mul ( &a->u, &a->x, &a->t ); - p448_mul ( &a->t, &a->u, &L0 ); - p448_mul ( &a->y, &a->x, &a->t ); - p448_isr ( &L0, &a->y ); - p448_mul ( &a->y, &a->u, &L0 ); - p448_sqr ( &L1, &L0 ); - p448_mul ( &a->u, &a->t, &L1 ); - p448_mul ( &a->t, &a->x, &a->u ); - p448_bias ( &a->t, 1 ); - p448_add ( &a->x, sz, sz ); - p448_mul ( &L0, &a->u, &a->x ); - p448_copy ( &a->x, &a->z ); - p448_subw ( &a->x, 1 ); - p448_neg ( &L1, &a->x ); - p448_bias ( &L1, 2 ); - p448_mul ( &a->x, &L1, &L0 ); - p448_mul ( &L0, &a->u, &a->y ); - p448_addw ( &a->z, 1 ); - p448_mul ( &a->y, &a->z, &L0 ); - p448_subw ( &a->t, 1 ); - mask_t ret = p448_is_zero( &a->t ); - p448_set_ui( &a->z, 1 ); - p448_copy ( &a->t, &a->x ); - p448_copy ( &a->u, &a->y ); - return ret; -} - -void -set_identity_extensible ( - struct extensible_t* a -) { - p448_set_ui( &a->x, 0 ); - p448_set_ui( &a->y, 1 ); - p448_set_ui( &a->z, 1 ); - p448_set_ui( &a->t, 0 ); - p448_set_ui( &a->u, 0 ); -} - -void -set_identity_tw_extensible ( - struct tw_extensible_t* a -) { - p448_set_ui( &a->x, 0 ); - p448_set_ui( &a->y, 1 ); - p448_set_ui( &a->z, 1 ); - p448_set_ui( &a->t, 0 ); - p448_set_ui( &a->u, 0 ); -} - -void -set_identity_affine ( - struct affine_t* a -) { - p448_set_ui( &a->x, 0 ); - p448_set_ui( &a->y, 1 ); -} - -mask_t -eq_affine ( - const struct affine_t* a, - const struct affine_t* b -) { - mask_t L1, L2; - struct p448_t L0; - p448_sub ( &L0, &a->x, &b->x ); - p448_bias ( &L0, 2 ); - L2 = p448_is_zero( &L0 ); - p448_sub ( &L0, &a->y, &b->y ); - p448_bias ( &L0, 2 ); - L1 = p448_is_zero( &L0 ); - return L2 & L1; -} - -mask_t -eq_extensible ( - const struct extensible_t* a, - const struct extensible_t* b -) { - mask_t L3, L4; - struct p448_t L0, L1, L2; - p448_mul ( &L2, &b->z, &a->x ); - p448_mul ( &L1, &a->z, &b->x ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L4 = p448_is_zero( &L0 ); - p448_mul ( &L2, &b->z, &a->y ); - p448_mul ( &L1, &a->z, &b->y ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L3 = p448_is_zero( &L0 ); - return L4 & L3; -} - -mask_t -eq_tw_extensible ( - const struct tw_extensible_t* a, - const struct tw_extensible_t* b -) { - mask_t L3, L4; - struct p448_t L0, L1, L2; - p448_mul ( &L2, &b->z, &a->x ); - p448_mul ( &L1, &a->z, &b->x ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L4 = p448_is_zero( &L0 ); - p448_mul ( &L2, &b->z, &a->y ); - p448_mul ( &L1, &a->z, &b->y ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L3 = p448_is_zero( &L0 ); - return L4 & L3; -} - -void -elligator_2s_inject ( - struct affine_t* a, - const struct p448_t* r -) { - mask_t L0, L1; - struct p448_t L2, L3, L4, L5, L6, L7, L8, L9; - p448_sqr ( &a->x, r ); - p448_sqr ( &L3, &a->x ); - p448_copy ( &a->y, &L3 ); - p448_subw ( &a->y, 1 ); - p448_neg ( &L9, &a->y ); - p448_bias ( &L9, 2 ); - p448_sqr ( &L2, &L9 ); - p448_bias ( &L2, 1 ); - p448_mulw ( &L7, &L2, 1527402724 ); - p448_bias ( &L7, 2 ); - p448_mulw ( &L8, &L3, 6108985600 ); - p448_add ( &a->y, &L8, &L7 ); - p448_mulw ( &L8, &L2, 6109454568 ); - p448_sub ( &L7, &a->y, &L8 ); - p448_mulw ( &L4, &a->y, 78160 ); - p448_mul ( &L6, &L7, &L9 ); - p448_mul ( &L8, &L6, &L4 ); - p448_mul ( &L4, &L7, &L8 ); - p448_isr ( &L5, &L4 ); - p448_mul ( &L4, &L6, &L5 ); - p448_sqr ( &L6, &L5 ); - p448_mul ( &L5, &L8, &L6 ); - p448_mul ( &L8, &L7, &L5 ); - p448_mul ( &L7, &L8, &L5 ); - p448_copy ( &L6, &a->x ); - p448_subw ( &L6, 1 ); - p448_addw ( &a->x, 1 ); - p448_mul ( &L5, &a->x, &L8 ); - p448_sub ( &a->x, &L6, &L5 ); - p448_bias ( &a->x, 3 ); - p448_mul ( &L5, &L4, &a->x ); - p448_mulw ( &L4, &L5, 78160 ); - p448_neg ( &a->x, &L4 ); - p448_bias ( &a->x, 2 ); - p448_weak_reduce( &a->x ); - p448_add ( &L4, &L3, &L3 ); - p448_add ( &L3, &L4, &L2 ); - p448_subw ( &L3, 2 ); - p448_mul ( &L2, &L3, &L8 ); - p448_mulw ( &L3, &L2, 3054649120 ); - p448_add ( &L2, &L3, &a->y ); - p448_mul ( &a->y, &L7, &L2 ); - L1 = p448_is_zero( &L9 ); - L0 = - L1; - p448_addw ( &a->y, L0 ); - p448_weak_reduce( &a->y ); -} - -mask_t -validate_affine ( - const struct affine_t* a -) { - struct p448_t L0, L1, L2, L3; - p448_sqr ( &L0, &a->y ); - p448_sqr ( &L2, &a->x ); - p448_add ( &L3, &L2, &L0 ); - p448_subw ( &L3, 1 ); - p448_mulw ( &L1, &L2, 39081 ); - p448_neg ( &L2, &L1 ); - p448_bias ( &L2, 2 ); - p448_mul ( &L1, &L0, &L2 ); - p448_sub ( &L0, &L3, &L1 ); - p448_bias ( &L0, 3 ); - return p448_is_zero( &L0 ); -} - -mask_t -validate_tw_extensible ( - const struct tw_extensible_t* ext -) { - mask_t L4, L5; - struct p448_t L0, L1, L2, L3; - /* - * Check invariant: - * 0 = -x*y + z*t*u - */ - p448_mul ( &L0, &ext->t, &ext->u ); - p448_mul ( &L2, &ext->z, &L0 ); - p448_addw ( &L2, 0 ); - p448_mul ( &L1, &ext->x, &ext->y ); - p448_neg ( &L0, &L1 ); - p448_add ( &L1, &L0, &L2 ); - p448_bias ( &L1, 2 ); - L5 = p448_is_zero( &L1 ); - /* - * Check invariant: - * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 - */ - p448_sqr ( &L2, &ext->y ); - p448_neg ( &L0, &L2 ); - p448_addw ( &L0, 0 ); - p448_sqr ( &L1, &ext->x ); - p448_bias ( &L1, 4 ); - p448_add ( &L2, &L1, &L0 ); - p448_sqr ( &L3, &ext->u ); - p448_sqr ( &L1, &ext->t ); - p448_mul ( &L0, &L1, &L3 ); - p448_mulw ( &L1, &L0, 39081 ); - p448_neg ( &L3, &L1 ); - p448_add ( &L1, &L3, &L2 ); - p448_neg ( &L3, &L0 ); - p448_add ( &L2, &L3, &L1 ); - p448_sqr ( &L1, &ext->z ); - p448_add ( &L0, &L1, &L2 ); - L4 = p448_is_zero( &L0 ); - return L5 & L4; -} - - diff --git a/ec_point.h b/ec_point.h deleted file mode 100644 index 4e28152..0000000 --- a/ec_point.h +++ /dev/null @@ -1,503 +0,0 @@ -/** - * @file ec_point.h - * @copyright - * Copyright (c) 2014 Cryptography Research, Inc. \n - * Released under the MIT License. See LICENSE.txt for license information. - * @author Mike Hamburg - * @warning This file was automatically generated. - */ - -#ifndef __CC_INCLUDED_EC_POINT_H__ -#define __CC_INCLUDED_EC_POINT_H__ - -#include "p448.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Affine point on an Edwards curve. - */ -struct affine_t { - struct p448_t x, y; -}; - -/** - * Affine point on a twisted Edwards curve. - */ -struct tw_affine_t { - struct p448_t x, y; -}; - -/** - * Montgomery buffer. - */ -struct montgomery_t { - struct p448_t z0, xd, zd, xa, za; -}; - -/** - * Extensible coordinates for Edwards curves, suitable for - * accumulators. - * - * Represents the point (x/z, y/z). The extra coordinates - * t,u satisfy xy = tuz, allowing for conversion to Extended - * form by multiplying t and u. - * - * The idea is that you don't have to do this multiplication - * when doubling the accumulator, because the t-coordinate - * isn't used there. At the same time, as long as you only - * have one point in extensible form, additions don't cost - * extra. - * - * This is essentially a lazier version of Hisil et al's - * lookahead trick. It might be worth considering that trick - * instead. - */ -struct extensible_t { - struct p448_t x, y, z, t, u; -}; - -/** - * Extensible coordinates for twisted Edwards curves, - * suitable for accumulators. - */ -struct tw_extensible_t { - struct p448_t x, y, z, t, u; -}; - -/** - * Niels coordinates for twisted Edwards curves. - * - * Good for mixed readdition; suitable for fixed tables. - */ -struct tw_niels_t { - struct p448_t a, b, c; -}; - -/** - * Projective niels coordinates for twisted Edwards curves. - * - * Good for readdition; suitable for temporary tables. - */ -struct tw_pniels_t { - struct tw_niels_t n; - struct p448_t z; -}; - - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_affine ( - struct affine_t* a, - const struct affine_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_affine ( - struct tw_affine_t* a, - const struct tw_affine_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_montgomery ( - struct montgomery_t* a, - const struct montgomery_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_extensible ( - struct extensible_t* a, - const struct extensible_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_extensible ( - struct tw_extensible_t* a, - const struct tw_extensible_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_niels ( - struct tw_niels_t* a, - const struct tw_niels_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_pniels ( - struct tw_pniels_t* a, - const struct tw_pniels_t* ds -) __attribute__((unused,always_inline)); - -/** - * Returns 1/sqrt(+- x). - * - * The Legendre symbol of the result is the same as that of the - * input. - * - * If x=0, returns 0. - */ -void -p448_isr ( - struct p448_t* a, - const struct p448_t* x -); - -/** - * Returns 1/x. - * - * If x=0, returns 0. - */ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in half-Niels form. - */ -void -add_tw_niels_to_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in half-Niels form. - */ -void -sub_tw_niels_from_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in projective Niels form. - */ -void -add_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in projective Niels form. - */ -void -sub_tw_pniels_from_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -); - -/** - * Double a point on a twisted Edwards curve, in "extensible" coordinates. - */ -void -double_tw_extensible ( - struct tw_extensible_t* a -); - -/** - * Double a point on an Edwards curve, in "extensible" coordinates. - */ -void -double_extensible ( - struct extensible_t* a -); - -/** - * Double a point, and transfer it to the twisted curve. - * - * That is, apply the 4-isogeny. - */ -void -twist_and_double ( - struct tw_extensible_t* b, - const struct extensible_t* a -); - -/** - * Double a point, and transfer it to the untwisted curve. - * - * That is, apply the dual isogeny. - */ -void -untwist_and_double ( - struct extensible_t* b, - const struct tw_extensible_t* a -); - -void -convert_tw_affine_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_affine_t* a -); - -void -convert_tw_affine_to_tw_extensible ( - struct tw_extensible_t* b, - const struct tw_affine_t* a -); - -void -convert_affine_to_extensible ( - struct extensible_t* b, - const struct affine_t* a -); - -void -convert_tw_extensible_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_extensible_t* a -); - -void -convert_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* d -); - -void -convert_tw_niels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_niels_t* d -); - -void -montgomery_step ( - struct montgomery_t* a -); - -void -serialize_montgomery ( - struct p448_t* sign, - struct p448_t* ser, - const struct montgomery_t* a, - const struct p448_t* sbz -); - -/** - * Serialize a point on an Edwards curve. - * - * The serialized form would be sqrt((z-y)/(z+y)) with sign of xz. - * - * It would be on 4y^2/(1-d) = x^3 + 2(1+d)/(1-d) * x^2 + x. - * - * But 4/(1-d) isn't square, so we need to twist it: - * - * -x is on 4y^2/(d-1) = x^3 + 2(d+1)/(d-1) * x^2 + x - */ -void -serialize_extensible ( - struct p448_t* b, - const struct extensible_t* a -); - -/** - * - */ -void -untwist_and_double_and_serialize ( - struct p448_t* b, - const struct tw_extensible_t* a -); - -/** - * Expensive transfer from untwisted to twisted. Roughly equivalent to halve and isogeny. - * Correctly transfers point of order 2. - * - * Can't have x=+1 (it's not even). There is code to fix the exception that would otherwise - * occur at (0,1). - * - * Input point must be even. - */ -void -twist ( - struct tw_extensible_t* b, - const struct extensible_t* a -); - -/** - * Deserialize a point to an untwisted affine curve. - */ -mask_t -deserialize_affine ( - struct affine_t* a, - const struct p448_t* sz -); - -/** - * Deserialize a point and transfer it to the twist. - * - * Not guaranteed to preserve the 4-torsion component. - * - * Refuses to deserialize +-1, which are the points of order 2. - */ -mask_t -deserialize_and_twist_approx ( - struct tw_extensible_t* a, - const struct p448_t* sdm1, - const struct p448_t* sz -); - -void -set_identity_extensible ( - struct extensible_t* a -); - -void -set_identity_tw_extensible ( - struct tw_extensible_t* a -); - -void -set_identity_affine ( - struct affine_t* a -); - -mask_t -eq_affine ( - const struct affine_t* a, - const struct affine_t* b -); - -mask_t -eq_extensible ( - const struct extensible_t* a, - const struct extensible_t* b -); - -mask_t -eq_tw_extensible ( - const struct tw_extensible_t* a, - const struct tw_extensible_t* b -); - -void -elligator_2s_inject ( - struct affine_t* a, - const struct p448_t* r -); - -mask_t -validate_affine ( - const struct affine_t* a -); - -/** - * Check the invariants for struct tw_extensible_t. - * PERF: This function was automatically generated - * with no regard for speed. - */ -mask_t -validate_tw_extensible ( - const struct tw_extensible_t* ext -); - - -void -copy_affine ( - struct affine_t* a, - const struct affine_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); -} - -void -copy_tw_affine ( - struct tw_affine_t* a, - const struct tw_affine_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); -} - -void -copy_montgomery ( - struct montgomery_t* a, - const struct montgomery_t* ds -) { - p448_copy ( &a->z0, &ds->z0 ); - p448_copy ( &a->xd, &ds->xd ); - p448_copy ( &a->zd, &ds->zd ); - p448_copy ( &a->xa, &ds->xa ); - p448_copy ( &a->za, &ds->za ); -} - -void -copy_extensible ( - struct extensible_t* a, - const struct extensible_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); - p448_copy ( &a->z, &ds->z ); - p448_copy ( &a->t, &ds->t ); - p448_copy ( &a->u, &ds->u ); -} - -void -copy_tw_extensible ( - struct tw_extensible_t* a, - const struct tw_extensible_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); - p448_copy ( &a->z, &ds->z ); - p448_copy ( &a->t, &ds->t ); - p448_copy ( &a->u, &ds->u ); -} - -void -copy_tw_niels ( - struct tw_niels_t* a, - const struct tw_niels_t* ds -) { - p448_copy ( &a->a, &ds->a ); - p448_copy ( &a->b, &ds->b ); - p448_copy ( &a->c, &ds->c ); -} - -void -copy_tw_pniels ( - struct tw_pniels_t* a, - const struct tw_pniels_t* ds -) { - copy_tw_niels( &a->n, &ds->n ); - p448_copy ( &a->z, &ds->z ); -} - - - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __CC_INCLUDED_EC_POINT_H__ */ diff --git a/exported.sym b/exported.sym deleted file mode 100644 index 424cb0a..0000000 --- a/exported.sym +++ /dev/null @@ -1,5 +0,0 @@ -_goldilocks_init -_goldilocks_keygen -_goldilocks_shared_secret -_goldilocks_sign -_goldilocks_verify diff --git a/goldilocks.c b/goldilocks.c deleted file mode 100644 index ca82294..0000000 --- a/goldilocks.c +++ /dev/null @@ -1,299 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#include - -#include "goldilocks.h" -#include "ec_point.h" -#include "scalarmul.h" -#include "barrett_field.h" -#include "crandom.h" -#include "sha512.h" - -#ifndef GOLDILOCKS_RANDOM_INIT_FILE -#define GOLDILOCKS_RANDOM_INIT_FILE "/dev/urandom" -#endif - -#ifndef GOLDILOCKS_RANDOM_RESEED_INTERVAL -#define GOLDILOCKS_RANDOM_RESEED_INTERVAL 10000 -#endif - -/* We'll check it ourselves */ -#ifndef GOLDILOCKS_RANDOM_RESEEDS_MANDATORY -#define GOLDILOCKS_RANDOM_RESEEDS_MANDATORY 0 -#endif - -/* TODO: word size; precompute */ -const struct affine_t goldilocks_base_point = { - {{ 0xf0de840aed939full, 0xc170033f4ba0c7ull, 0xf3932d94c63d96ull, 0x9cecfa96147eaaull, - 0x5f065c3c59d070ull, 0x3a6a26adf73324ull, 0x1b4faff4609845ull, 0x297ea0ea2692ffull - }}, - {{ 19, 0, 0, 0, 0, 0, 0, 0 }} -}; - -// /* TODO: direct */ -// void -// transfer_and_serialize(struct p448_t *out, const struct tw_extensible_t *twext) { -// struct extensible_t ext; -// transfer_tw_to_un(&ext, twext); -// serialize_extensible(out, &ext); -// } - -// FIXME: threading -// TODO: autogen instead of init -struct { - struct tw_niels_t combs[80]; - struct tw_niels_t wnafs[32]; - struct crandom_state_t rand; -} goldilocks_global; - -int -goldilocks_init () { - struct extensible_t ext; - struct tw_extensible_t text; - - /* Sanity check: the base point is on the curve. */ - assert(validate_affine(&goldilocks_base_point)); - - /* Convert it to twisted Edwards. */ - convert_affine_to_extensible(&ext, &goldilocks_base_point); - twist(&text, &ext); - //p448_transfer_un_to_tw(&text, &ext); - - /* Precompute the tables. */ - precompute_for_combs(goldilocks_global.combs, &text, 5, 5, 18); - precompute_for_wnaf(goldilocks_global.wnafs, &text, 5); - - return crandom_init_from_file(&goldilocks_global.rand, - GOLDILOCKS_RANDOM_INIT_FILE, - GOLDILOCKS_RANDOM_RESEED_INTERVAL, - GOLDILOCKS_RANDOM_RESEEDS_MANDATORY); -} - -static word_t -q448_lo[4] = { - 0xdc873d6d54a7bb0dull, - 0xde933d8d723a70aaull, - 0x3bb124b65129c96full, - 0x000000008335dc16ull -}; - -static const struct p448_t -sqrt_d_minus_1 = {{ - 0xd2e21836749f46ull, - 0x888db42b4f0179ull, - 0x5a189aabdeea38ull, - 0x51e65ca6f14c06ull, - 0xa49f7b424d9770ull, - 0xdcac4628c5f656ull, - 0x49443b8748734aull, - 0x12fec0c0b25b7aull -}}; - -int -goldilocks_keygen ( - struct goldilocks_private_key_t *privkey, - struct goldilocks_public_key_t *pubkey -) { - // TODO: check for init. Also maybe take CRANDOM object? API... - word_t sk[448*2/WORD_BITS]; - - struct tw_extensible_t exta; - struct p448_t pk; - - int ret = crandom_generate(&goldilocks_global.rand, (unsigned char *)sk, sizeof(sk)); - barrett_reduce(sk,sizeof(sk)/sizeof(sk[0]),0,q448_lo,7,4,62); // TODO word size - q448_serialize(privkey->opaque, sk); - - edwards_comb(&exta, sk, goldilocks_global.combs, 5, 5, 18); - //transfer_and_serialize_qtor(&pk, &sqrt_d_minus_1, &exta); - untwist_and_double_and_serialize(&pk, &exta); - - p448_serialize(pubkey->opaque, &pk); - memcpy(&privkey->opaque[56], pubkey->opaque, 56); - - int ret2 = crandom_generate(&goldilocks_global.rand, &privkey->opaque[112], 32); - if (!ret) ret = ret2; - - return ret ? GOLDI_ENODICE : GOLDI_EOK; -} - -int -goldilocks_shared_secret ( - uint8_t shared[64], - const struct goldilocks_private_key_t *my_privkey, - const struct goldilocks_public_key_t *your_pubkey -) { - word_t sk[448/WORD_BITS]; - struct p448_t pk; - - mask_t succ = p448_deserialize(&pk,your_pubkey->opaque), msucc = -1; - -#ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS - struct p448_t sum, prod; - msucc &= p448_deserialize(&sum,&my_privkey->opaque[56]); - p448_mul(&prod,&pk,&sum); - p448_add(&sum,&pk,&sum); -#endif - - msucc &= q448_deserialize(sk,my_privkey->opaque); - succ &= p448_montgomery_ladder(&pk,&pk,sk,446,2); - - p448_serialize(shared,&pk); - - /* obliterate records of our failure by adjusting with obliteration key */ - struct sha512_ctx_t ctx; - sha512_init(&ctx); - -#ifdef EXPERIMENT_ECDH_OBLITERATE_CT - uint8_t oblit[40]; - unsigned i; - for (i=0; i<8; i++) { - oblit[i] = "noshared"[i] & ~(succ&msucc); - } - for (i=0; i<32; i++) { - oblit[8+i] = my_privkey->opaque[112+i] & ~(succ&msucc); - } - sha512_update(&ctx, oblit, 40); -#endif - -#ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS - /* stir in the sum and product of the pubkeys. */ - uint8_t a_pk[56]; - p448_serialize(a_pk, &sum); - sha512_update(&ctx, a_pk, 56); - p448_serialize(a_pk, &prod); - sha512_update(&ctx, a_pk, 56); -#endif - - /* stir in the shared key and finish */ - sha512_update(&ctx, shared, 56); - sha512_final(&ctx, shared); - - return (GOLDI_ECORRUPT & ~msucc) - | (GOLDI_EINVAL & msucc &~ succ) - | (GOLDI_EOK & msucc & succ); -} - -int -goldilocks_sign ( - uint8_t signature_out[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_private_key_t *privkey -) { - - /* challenge = H(pk, [nonceG], message). FIXME: endian. */ - word_t skw[448/WORD_BITS]; - mask_t succ = q448_deserialize(skw,privkey->opaque); - if (!succ) { - memset(skw,0,sizeof(skw)); - return GOLDI_ECORRUPT; - } - - /* Derive a nonce. TODO: use HMAC. FIXME: endian. FUTURE: factor. */ - word_t tk[512/WORD_BITS]; - struct sha512_ctx_t ctx; - sha512_init(&ctx); - sha512_update(&ctx, (const unsigned char *)"signonce", 8); - sha512_update(&ctx, &privkey->opaque[112], 32); - sha512_update(&ctx, message, message_len); - sha512_update(&ctx, &privkey->opaque[112], 32); - sha512_final(&ctx, (unsigned char *)tk); - barrett_reduce(tk,512/WORD_BITS,0,q448_lo,7,4,62); // TODO word size - - /* 4[nonce]G */ - uint8_t signature_tmp[56]; - struct tw_extensible_t exta; - struct p448_t gsk; - edwards_comb(&exta, tk, goldilocks_global.combs, 5, 5, 18); - double_tw_extensible(&exta); - untwist_and_double_and_serialize(&gsk, &exta); - p448_serialize(signature_tmp, &gsk); - - word_t challenge[512/WORD_BITS]; - sha512_update(&ctx, &privkey->opaque[56], 56); - sha512_update(&ctx, signature_tmp, 56); - sha512_update(&ctx, message, message_len); - sha512_final(&ctx, (unsigned char *)challenge); - - // reduce challenge and sub. - barrett_negate(challenge,512/WORD_BITS,q448_lo,7,4,62); - - barrett_mac( - tk,512/WORD_BITS, - challenge,512/WORD_BITS, - skw,448/WORD_BITS, - q448_lo,7,4,62 - ); - - word_t carry = add_nr_ext_packed(tk,tk,512/WORD_BITS,tk,512/WORD_BITS,-1); - barrett_reduce(tk,512/WORD_BITS,carry,q448_lo,7,4,62); - - memcpy(signature_out, signature_tmp, 56); - q448_serialize(signature_out+56, tk); - memset((unsigned char *)tk,0,sizeof(tk)); - memset((unsigned char *)skw,0,sizeof(skw)); - memset((unsigned char *)challenge,0,sizeof(challenge)); - - /* response = 2(nonce_secret - sk*challenge) - * Nonce = 8[nonce_secret]*G - * PK = 2[sk]*G, except doubled (TODO) - * so [2] ( [response]G + 2[challenge]PK ) = Nonce - */ - - return 0; -} - -int -goldilocks_verify ( - const uint8_t signature[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_public_key_t *pubkey -) { - struct p448_t pk; - word_t s[448/WORD_BITS]; - - mask_t succ = p448_deserialize(&pk,pubkey->opaque); - if (!succ) return GOLDI_EINVAL; - - succ = q448_deserialize(s, &signature[56]); - if (!succ) return GOLDI_EINVAL; - - /* challenge = H(pk, [nonceG], message). FIXME: endian. */ - word_t challenge[512/WORD_BITS]; - struct sha512_ctx_t ctx; - sha512_init(&ctx); - sha512_update(&ctx, pubkey->opaque, 56); - sha512_update(&ctx, signature, 56); - sha512_update(&ctx, message, message_len); - sha512_final(&ctx, (unsigned char *)challenge); - barrett_reduce(challenge,512/WORD_BITS,0,q448_lo,7,4,62); - - struct p448_t eph; - struct tw_extensible_t pk_text; - - /* deserialize [nonce]G */ - succ = p448_deserialize(&eph, signature); - if (!succ) return GOLDI_EINVAL; - - - // succ = affine_deserialize(&pk_aff,&pk); - // if (!succ) return EINVAL; - // - // convert_affine_to_extensible(&pk_ext,&pk_aff); - // transfer_un_to_tw(&pk_text,&pk_ext); - succ = deserialize_and_twist_approx(&pk_text, &sqrt_d_minus_1, &pk); - if (!succ) return GOLDI_EINVAL; - - edwards_combo_var_fixed_vt( &pk_text, challenge, s, goldilocks_global.wnafs, 5 ); - - untwist_and_double_and_serialize( &pk, &pk_text ); - p448_sub(&eph, &eph, &pk); - p448_bias(&eph, 2); - - succ = p448_is_zero(&eph); - - return succ ? 0 : GOLDI_EINVAL; -} diff --git a/goldilocks.h b/goldilocks.h deleted file mode 100644 index feba5e9..0000000 --- a/goldilocks.h +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** - * @file goldilocks.h - * @author Mike Hamburg - * @brief Goldilocks high-level functions. - */ -#ifndef __GOLDILOCKS_H__ -#define __GOLDILOCKS_H__ 1 - -#include - -/** - * @brief Serialized form of a Goldilocks public key. - * - * @warning This isn't even my final form! - */ -struct goldilocks_public_key_t { - uint8_t opaque[56]; /**< Serialized data. */ -}; - -/** - * @brief Serialized form of a Goldilocks private key. - * - * Contains 56 bytes of actual private key, 56 bytes of - * public key, and 32 bytes of symmetric key for randomization. - * - * @warning This isn't even my final form! - */ -struct goldilocks_private_key_t { - uint8_t opaque[144]; /**< Serialized data. */ -}; - -#ifdef __cplusplus -extern "C" { -#endif - -/** @brief No error. */ -static const int GOLDI_EOK = 0; - -/** @brief Error: your key is corrupt. */ -static const int GOLDI_ECORRUPT = 44801; - -/** @brief Error: other party's key is corrupt. */ -static const int GOLDI_EINVAL = 44802; - -/** @brief Error: not enough entropy. */ -static const int GOLDI_ENODICE = 44804; - -/** - * @brief Initialize Goldilocks' precomputed tables and - * random number generator. - * @retval GOLDI_EOK Success. - * @retval Nonzero An error occurred. - */ -int -goldilocks_init(); - -/** - * @brief Generate a new random keypair. - * @param [out] privkey The generated private key. - * @param [out] pubkey The generated public key. - * - * @warning This isn't even my final form! - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_ENODICE Insufficient entropy. - */ -int -goldilocks_keygen ( - struct goldilocks_private_key_t *privkey, - struct goldilocks_public_key_t *pubkey -) __attribute__((warn_unused_result)); - -/** - * @brief Generate a Diffie-Hellman shared secret in constant time. - * - * This function uses some compile-time flags whose merit remains to - * be decided. - * - * If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes - * of zeros to the secret before hashing. In the case that the other - * party's key is detectably corrupt, instead the symmetric part - * of the secret key is used to produce a pseudorandom value. - * - * If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of - * the two parties' public keys is prepended to the hash. - * - * @warning This isn't even my final form! - * - * @param [out] shared The shared secret established with the other party. - * @param [in] my_privkey My private key. - * @param [in] your_pubkey The other party's public key. - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_ECORRUPT My key is corrupt. - * @retval GOLDI_EINVAL The other party's key is corrupt. - */ -int -goldilocks_shared_secret ( - uint8_t shared[64], - const struct goldilocks_private_key_t *my_privkey, - const struct goldilocks_public_key_t *your_pubkey -) __attribute__((warn_unused_result)); - -/** - * @brief Sign a message. - * - * The signature is deterministic, using the symmetric secret found in the - * secret key to form a nonce. - * - * The technique used in signing is a modified Schnorr system, like EdDSA. - * - * @warning This isn't even my final form! - * @warning This function contains endian bugs. (TODO) - * - * @param [out] signature_out Space for the output signature. - * @param [in] message The message to be signed. - * @param [in] message_len The length of the message to be signed. - * @param [in] privkey My private key. - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_ECORRUPT My key is corrupt. - */ -int -goldilocks_sign ( - uint8_t signature_out[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_private_key_t *privkey -); - -/** - * @brief Verify a signature. - * - * This function is fairly strict. It will correctly detect when - * the signature has the wrong cofactor companent. Once deserialization - * of numbers is strictified (TODO) it will limit the response to being - * less than q as well. - * - * Currently this function does not detect when the public key is weird, - * eg 0, has cofactor, etc. As a result, a party with a bogus public - * key could create signatures that succeed on some systems and fail on - * others. - * - * @warning This isn't even my final form! - * @warning This function contains endian bugs. (TODO) - * - * @param [out] signature_out The signature. - * @param [in] message The message to be verified. - * @param [in] message_len The length of the message to be verified. - * @param [in] pubkey The signer's public key. - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_EINVAL The public key or signature is corrupt. - */ -int -goldilocks_verify ( - const uint8_t signature[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_public_key_t *pubkey -) __attribute__((warn_unused_result)); - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __GOLDILOCKS_H__ */ diff --git a/intrinsics.h b/intrinsics.h deleted file mode 100644 index 93a0974..0000000 --- a/intrinsics.h +++ /dev/null @@ -1,199 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** @file crandom.h - * @brief cRandom intrinsics header. - */ - -#ifndef __CRANDOM_INTRINSICS_H__ -#define __CRANDOM_INTRINSICS_H__ 1 - -#include - -#include - -#define INTRINSIC \ - static __inline__ __attribute__((__gnu_inline__, __always_inline__)) - -#define GEN 1 -#define SSE2 2 -#define SSSE3 4 -#define AESNI 8 -#define XOP 16 -#define AVX 32 -#define AVX2 64 -#define RDRAND 128 - -INTRINSIC u_int64_t rdtsc() { - u_int64_t out = 0; -# if (defined(__i386__) || defined(__x86_64__)) - __asm__ __volatile__ ("rdtsc" : "=A"(out)); -# endif - return out; -} - -/** - * Return x unchanged, but confuse the compiler. - * - * This is mainly for use in test scripts, to prevent the value from - * being constant-folded or removed by dead code elimination. - * - * @param x A 64-bit number. - * @return The same number in a register. - */ -INTRINSIC u_int64_t opacify(u_int64_t x) { - __asm__ volatile("mov %0, %0" : "+r"(x)); - return x; -} - -#ifdef __AVX2__ -# define MIGHT_HAVE_AVX2 1 -# ifndef MUST_HAVE_AVX2 -# define MUST_HAVE_AVX2 0 -# endif -#else -# define MIGHT_HAVE_AVX2 0 -# define MUST_HAVE_AVX2 0 -#endif - -#ifdef __AVX__ -# define MIGHT_HAVE_AVX 1 -# ifndef MUST_HAVE_AVX -# define MUST_HAVE_AVX MUST_HAVE_AVX2 -# endif -#else -# define MIGHT_HAVE_AVX 0 -# define MUST_HAVE_AVX 0 -#endif - -#ifdef __SSSE3__ -# define MIGHT_HAVE_SSSE3 1 -# ifndef MUST_HAVE_SSSE3 -# define MUST_HAVE_SSSE3 MUST_HAVE_AVX -# endif -#else -# define MIGHT_HAVE_SSSE3 0 -# define MUST_HAVE_SSSE3 0 -#endif - -#ifdef __SSE2__ -# define MIGHT_HAVE_SSE2 1 -# ifndef MUST_HAVE_SSE2 -# define MUST_HAVE_SSE2 MUST_HAVE_SSSE3 -# endif - typedef __m128i ssereg; -# define pslldq _mm_slli_epi32 -# define pshufd _mm_shuffle_epi32 - -INTRINSIC ssereg sse2_rotate(int r, ssereg a) { - return _mm_slli_epi32(a, r) ^ _mm_srli_epi32(a, 32-r); -} - -#else -# define MIGHT_HAVE_SSE2 0 -# define MUST_HAVE_SSE2 0 -#endif - -#ifdef __AES__ -/* don't include intrinsics file, because not all platforms have it */ -# define MIGHT_HAVE_AESNI 1 -# ifndef MIGHT_HAVE_RDRAND -# define MIGHT_HAVE_RDRAND 1 -# endif -# ifndef MUST_HAVE_RDRAND -# define MUST_HAVE_RDRAND 0 -# endif -# ifndef MUST_HAVE_AESNI -# define MUST_HAVE_AESNI 0 -# endif - -INTRINSIC ssereg aeskeygenassist(int rc, ssereg x) { - ssereg out; - __asm__("aeskeygenassist %2, %1, %0" : "=x"(out) : "x"(x), "g"(rc)); - return out; -} - -INTRINSIC ssereg aesenc(ssereg subkey, ssereg block) { - ssereg out = block; - __asm__("aesenc %1, %0" : "+x"(out) : "x"(subkey)); - return out; -} - -INTRINSIC ssereg aesenclast(ssereg subkey, ssereg block) { - ssereg out = block; - __asm__("aesenclast %1, %0" : "+x"(out) : "x"(subkey)); - return out; -} - -#else -# define MIGHT_HAVE_AESNI 0 -# define MUST_HAVE_AESNI 0 -# define MIGHT_HAVE_RDRAND 0 -# define MUST_HAVE_RDRAND 0 -#endif - -#ifdef __XOP__ -/* don't include intrinsics file, because not all platforms have it */ -# define MIGHT_HAVE_XOP 1 -# ifndef MUST_HAVE_XOP -# define MUST_HAVE_XOP 0 -# endif -INTRINSIC ssereg xop_rotate(int amount, ssereg x) { - ssereg out; - __asm__ ("vprotd %1, %2, %0" : "=x"(out) : "x"(x), "g"(amount)); - return out; -} -#else -# define MIGHT_HAVE_XOP 0 -# define MUST_HAVE_XOP 0 -#endif - -#define MIGHT_MASK \ - ( SSE2 * MIGHT_HAVE_SSE2 \ - | SSSE3 * MIGHT_HAVE_SSSE3 \ - | AESNI * MIGHT_HAVE_AESNI \ - | XOP * MIGHT_HAVE_XOP \ - | AVX * MIGHT_HAVE_AVX \ - | RDRAND * MIGHT_HAVE_RDRAND \ - | AVX2 * MIGHT_HAVE_AVX2) - -#define MUST_MASK \ - ( SSE2 * MUST_HAVE_SSE2 \ - | SSSE3 * MUST_HAVE_SSSE3 \ - | AESNI * MUST_HAVE_AESNI \ - | XOP * MUST_HAVE_XOP \ - | AVX * MUST_HAVE_AVX \ - | RDRAND * MUST_HAVE_RDRAND \ - | AVX2 * MUST_HAVE_AVX2 ) - -#define MIGHT_HAVE(feature) ((MIGHT_MASK & feature) == feature) -#define MUST_HAVE(feature) ((MUST_MASK & feature) == feature) - -#ifdef __cplusplus -# define extern_c extern "C" -#else -# define extern_c -#endif - -extern_c -unsigned int crandom_detect_features(); - -#ifndef likely -# define likely(x) __builtin_expect((x),1) -# define unlikely(x) __builtin_expect((x),0) -#endif - -extern volatile unsigned int crandom_features; -INTRINSIC int HAVE(unsigned int feature) { - unsigned int features; - if (!MIGHT_HAVE(feature)) return 0; - if (MUST_HAVE(feature)) return 1; - features = crandom_features; - if (unlikely(!features)) - crandom_features = features = crandom_detect_features(); - return likely((features & feature) == feature); -} - -#endif /* __CRANDOM_INTRINSICS_H__ */ diff --git a/p448.c b/p448.c deleted file mode 100644 index 8327370..0000000 --- a/p448.c +++ /dev/null @@ -1,446 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#include "p448.h" -#include "x86-64-arith.h" - -void -p448_mul ( - p448_t *__restrict__ cs, - const p448_t *as, - const p448_t *bs -) { - const uint64_t *a = as->limb, *b = bs->limb; - uint64_t *c = cs->limb; - - __uint128_t accum0 = 0, accum1 = 0, accum2; - uint64_t mask = (1ull<<56) - 1; - - uint64_t aa[4], bb[4]; - - /* For some reason clang doesn't vectorize this without prompting? */ - unsigned int i; - for (i=0; i>= 56; - accum1 >>= 56; - - { - accum2 = accum1; - accum1 += accum0; - accum0 = accum2; - } - - accum2 = widemul(&a[0],&b[0]); - accum1 -= accum2; - accum0 += accum2; - - accum2 = widemul(&aa[1],&bb[3]); - msb(&accum0, &a[1], &b[3]); - mac(&accum1, &a[5], &b[7]); - - msb(&accum0, &a[2], &b[2]); - mac(&accum2, &aa[2], &bb[2]); - mac(&accum1, &a[6], &b[6]); - - msb(&accum0, &a[3], &b[1]); - mac(&accum1, &a[7], &b[5]); - mac(&accum2, &aa[3], &bb[1]); - - accum0 += accum2; - accum1 += accum2; - mac(&accum0, &a[4], &b[4]); - mac(&accum1, &aa[0], &bb[0]); - - c[0] = ((uint64_t)(accum0)) & mask; - c[4] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul(&aa[2],&bb[3]); - msb(&accum0, &a[2], &b[3]); - mac(&accum1, &a[6], &b[7]); - - mac(&accum2, &aa[3], &bb[2]); - msb(&accum0, &a[3], &b[2]); - mac(&accum1, &a[7], &b[6]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul(&a[0],&b[1]); - mac(&accum1, &aa[0], &bb[1]); - mac(&accum0, &a[4], &b[5]); - - mac(&accum2, &a[1], &b[0]); - mac(&accum1, &aa[1], &bb[0]); - mac(&accum0, &a[5], &b[4]); - - accum1 -= accum2; - accum0 += accum2; - - c[1] = ((uint64_t)(accum0)) & mask; - c[5] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul(&aa[3],&bb[3]); - msb(&accum0, &a[3], &b[3]); - mac(&accum1, &a[7], &b[7]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul(&a[0],&b[2]); - mac(&accum1, &aa[0], &bb[2]); - mac(&accum0, &a[4], &b[6]); - - mac(&accum2, &a[1], &b[1]); - mac(&accum1, &aa[1], &bb[1]); - mac(&accum0, &a[5], &b[5]); - - mac(&accum2, &a[2], &b[0]); - mac(&accum1, &aa[2], &bb[0]); - mac(&accum0, &a[6], &b[4]); - - accum1 -= accum2; - accum0 += accum2; - - c[2] = ((uint64_t)(accum0)) & mask; - c[6] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum0 += c[3]; - accum1 += c[7]; - c[3] = ((uint64_t)(accum0)) & mask; - c[7] = ((uint64_t)(accum1)) & mask; - - /* we could almost stop here, but it wouldn't be stable, so... */ - - accum0 >>= 56; - accum1 >>= 56; - c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); - c[0] += ((uint64_t)(accum1)); -} - -void -p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, - uint64_t b -) { - const uint64_t *a = as->limb; - uint64_t *c = cs->limb; - - __uint128_t accum0, accum4; - uint64_t mask = (1ull<<56) - 1; - - accum0 = widemul_rm(b, &a[0]); - accum4 = widemul_rm(b, &a[4]); - - c[0] = accum0 & mask; accum0 >>= 56; - c[4] = accum4 & mask; accum4 >>= 56; - - mac_rm(&accum0, b, &a[1]); - mac_rm(&accum4, b, &a[5]); - - c[1] = accum0 & mask; accum0 >>= 56; - c[5] = accum4 & mask; accum4 >>= 56; - - mac_rm(&accum0, b, &a[2]); - mac_rm(&accum4, b, &a[6]); - - c[2] = accum0 & mask; accum0 >>= 56; - c[6] = accum4 & mask; accum4 >>= 56; - - mac_rm(&accum0, b, &a[3]); - mac_rm(&accum4, b, &a[7]); - - c[3] = accum0 & mask; accum0 >>= 56; - c[7] = accum4 & mask; accum4 >>= 56; - - c[4] += accum0 + accum4; - c[0] += accum4; -} - -void -p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as -) { - const uint64_t *a = as->limb; - uint64_t *c = cs->limb; - - __uint128_t accum0 = 0, accum1 = 0, accum2; - uint64_t mask = (1ull<<56) - 1; - - uint64_t aa[4]; - - /* For some reason clang doesn't vectorize this without prompting? */ - unsigned int i; - for (i=0; i>= 55; - accum1 >>= 55; - - { - accum2 = accum1; - accum1 += accum0; - accum0 = accum2; - } - - accum2 = widemul(&a[0],&a[0]); - accum1 -= accum2; - accum0 += accum2; - - accum2 = widemul2(&aa[1],&aa[3]); - msb2(&accum0, &a[1], &a[3]); - mac2(&accum1, &a[5], &a[7]); - - msb(&accum0, &a[2], &a[2]); - mac(&accum2, &aa[2], &aa[2]); - mac(&accum1, &a[6], &a[6]); - - accum0 += accum2; - accum1 += accum2; - mac(&accum0, &a[4], &a[4]); - mac(&accum1, &aa[0], &aa[0]); - - c[0] = ((uint64_t)(accum0)) & mask; - c[4] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul2(&aa[2],&aa[3]); - msb2(&accum0, &a[2], &a[3]); - mac2(&accum1, &a[6], &a[7]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul2(&a[0],&a[1]); - mac2(&accum1, &aa[0], &aa[1]); - mac2(&accum0, &a[4], &a[5]); - - accum1 -= accum2; - accum0 += accum2; - - c[1] = ((uint64_t)(accum0)) & mask; - c[5] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul(&aa[3],&aa[3]); - msb(&accum0, &a[3], &a[3]); - mac(&accum1, &a[7], &a[7]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul2(&a[0],&a[2]); - mac2(&accum1, &aa[0], &aa[2]); - mac2(&accum0, &a[4], &a[6]); - - mac(&accum2, &a[1], &a[1]); - mac(&accum1, &aa[1], &aa[1]); - mac(&accum0, &a[5], &a[5]); - - accum1 -= accum2; - accum0 += accum2; - - c[2] = ((uint64_t)(accum0)) & mask; - c[6] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum0 += c[3]; - accum1 += c[7]; - c[3] = ((uint64_t)(accum0)) & mask; - c[7] = ((uint64_t)(accum1)) & mask; - - /* we could almost stop here, but it wouldn't be stable, so... */ - - accum0 >>= 56; - accum1 >>= 56; - c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); - c[0] += ((uint64_t)(accum1)); -} - -void -p448_strong_reduce ( - p448_t *a -) { - uint64_t mask = (1ull<<56)-1; - - /* first, clear high */ - a->limb[4] += a->limb[7]>>56; - a->limb[0] += a->limb[7]>>56; - a->limb[7] &= mask; - - /* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ - - /* compute total_value - p. No need to reduce mod p. */ - - __int128_t scarry = 0; - int i; - for (i=0; i<8; i++) { - scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); - a->limb[i] = scarry & mask; - scarry >>= 56; - } - - /* uncommon case: it was >= p, so now scarry = 0 and this = x - * common case: it was < p, so now scarry = -1 and this = x - p + 2^448 - * so let's add back in p. will carry back off the top for 2^448. - */ - - assert(is_zero(scarry) | is_zero(scarry+1)); - - uint64_t scarry_mask = scarry & mask; - __uint128_t carry = 0; - - /* add it back */ - for (i=0; i<8; i++) { - carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); - a->limb[i] = carry & mask; - carry >>= 56; - } - - assert(is_zero(carry + scarry)); -} - -mask_t -p448_is_zero ( - const struct p448_t *a -) { - struct p448_t b; - p448_copy(&b,a); - p448_strong_reduce(&b); - - uint64_t any = 0; - int i; - for (i=0; i<8; i++) { - any |= b.limb[i]; - } - return is_zero(any); -} - -void -p448_serialize ( - uint8_t *serial, - const struct p448_t *x -) { - int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); - for (i=0; i<8; i++) { - for (j=0; j<7; j++) { - serial[7*i+j] = red.limb[i]; - red.limb[i] >>= 8; - } - assert(red.limb[i] == 0); - } -} - -void -q448_serialize ( - uint8_t *serial, - const word_t x[7] -) { - int i,j; - for (i=0; i<7; i++) { - for (j=0; j<8; j++) { - serial[8*i+j] = x[i]>>(8*j); - } - } -} - -mask_t -q448_deserialize ( - word_t x[7], - const uint8_t serial[56] -) { - int i,j; - for (i=0; i<7; i++) { - word_t out = 0; - for (j=0; j<8; j++) { - out |= ((word_t)serial[8*i+j])<<(8*j); - } - x[i] = out; - } - /* TODO: check for reduction */ - return MASK_SUCCESS; -} - -mask_t -p448_deserialize ( - p448_t *x, - const uint8_t serial[56] -) { - int i,j; - for (i=0; i<8; i++) { - word_t out = 0; - for (j=0; j<7; j++) { - out |= ((word_t)serial[7*i+j])<<(8*j); - } - x->limb[i] = out; - } - /* TODO: check for reduction */ - return MASK_SUCCESS; -} diff --git a/p448.h b/p448.h deleted file mode 100644 index 3150614..0000000 --- a/p448.h +++ /dev/null @@ -1,330 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __P448_H__ -#define __P448_H__ 1 - -#include -#include - -#include "word.h" - -typedef struct p448_t { - uint64_t limb[8]; -} __attribute__((aligned(32))) p448_t; - -#ifdef __cplusplus -extern "C" { -#endif - -static __inline__ void -p448_set_ui ( - p448_t *out, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_add ( - p448_t *out, - const p448_t *a, - const p448_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_sub ( - p448_t *out, - const p448_t *a, - const p448_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_neg ( - p448_t *out, - const p448_t *a -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_addw ( - p448_t *a, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_subw ( - p448_t *a, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_copy ( - p448_t *out, - const p448_t *a -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_weak_reduce ( - p448_t *inout -) __attribute__((unused,always_inline)); - -void -p448_strong_reduce ( - p448_t *inout -); - -mask_t -p448_is_zero ( - const p448_t *in -); - -static __inline__ void -p448_bias ( - p448_t *inout, - int amount -) __attribute__((unused,always_inline)); - -void -p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b -); - -void -p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, - uint64_t b -); - -void -p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a -); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); - -void -p448_serialize ( - uint8_t *serial, - const struct p448_t *x -); - -void -q448_serialize ( - uint8_t *serial, - const word_t x[7] -); - -mask_t -q448_deserialize ( - word_t x[7], - const uint8_t serial[56] -); - -mask_t -p448_deserialize ( - p448_t *x, - const uint8_t serial[56] -); - -/* -------------- Inline functions begin here -------------- */ - -void -p448_set_ui ( - p448_t *out, - uint64_t x -) { - int i; - out->limb[0] = x; - for (i=1; i<8; i++) { - out->limb[i] = 0; - } -} - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; - big_register_t m = doswap; - - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] + b->limb[i]; - } - */ -} - -void -p448_sub ( - p448_t *out, - const p448_t *a, - const p448_t *b -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] - b->limb[i]; - } - */ -} - -void -p448_neg ( - p448_t *out, - const p448_t *a -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = -a->limb[i]; - } - */ -} - -void -p448_cond_neg( - p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; - big_register_t m = doNegate; - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; ilimb[0] += x; -} - -void -p448_subw ( - p448_t *a, - uint64_t x -) { - a->limb[0] -= x; -} - -void -p448_copy ( - p448_t *out, - const p448_t *a -) { - *out = *a; -} - -void -p448_bias ( - p448_t *a, - int amt -) { - uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; - uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; - uint64x4_t *aa = (uint64x4_t*) a; - aa[0] += lo; - aa[1] += hi; -} - -void -p448_weak_reduce ( - p448_t *a -) { - /* PERF: use pshufb/palignr if anyone cares about speed of this */ - uint64_t mask = (1ull<<56) - 1; - uint64_t tmp = a->limb[7] >> 56; - int i; - a->limb[4] += tmp; - for (i=7; i>0; i--) { - a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56); - } - a->limb[0] = (a->limb[0] & mask) + tmp; -} - -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __P448_H__ */ diff --git a/scalarmul.c b/scalarmul.c deleted file mode 100644 index d2fe50f..0000000 --- a/scalarmul.c +++ /dev/null @@ -1,776 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#include - -#include "scalarmul.h" -#include "string.h" -#include "barrett_field.h" - -mask_t -p448_montgomery_ladder( - struct p448_t *out, - const struct p448_t *in, - const uint64_t *scalar, - int nbits, - int n_extra_doubles -) { - struct montgomery_t mont; - p448_sqr(&mont.z0,in); - p448_copy(&mont.za,&mont.z0); - p448_set_ui(&mont.xa,1); - p448_set_ui(&mont.zd,0); - p448_set_ui(&mont.xd,1); - - int i,j,n=(nbits-1)&63; - mask_t pflip = 0; - for (j=(nbits+63)/64-1; j>=0; j--) { - uint64_t w = scalar[j]; - for (i=n; i>=0; i--) { - mask_t flip = -((w>>i)&1); - p448_cond_swap(&mont.xa,&mont.xd,flip^pflip); - p448_cond_swap(&mont.za,&mont.zd,flip^pflip); - montgomery_step(&mont); - pflip = flip; - } - n = 63; - } - p448_cond_swap(&mont.xa,&mont.xd,pflip); - p448_cond_swap(&mont.za,&mont.zd,pflip); - - for (j=0; ja, &n->b, doNegate); - p448_cond_neg(&n->c, doNegate); -} - -static __inline__ void -cond_negate_tw_pniels( - struct tw_pniels_t *n, - mask_t doNegate -) { - cond_negate_tw_niels(&n->n, doNegate); -} - -void -constant_time_lookup_tw_pniels( - struct tw_pniels_t *out, - const struct tw_pniels_t *in, - int nin, - int idx -) { - big_register_t big_one = 1, big_i = idx; - big_register_t *o = (big_register_t *)out; - const big_register_t *i = (const big_register_t *)in; - int j; - unsigned int k; - - memset(out, 0, sizeof(*out)); - for (j=0; j>= 1; - if (i> (i%64) & 0xF, - inv = (bits>>3)-1; - bits ^= inv; - - constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); - cond_negate_tw_pniels(&pn, inv); - convert_tw_pniels_to_tw_extensible(working, &pn); - - - for (i-=4; i>=0; i-=4) { - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - - bits = scalar2[i/64] >> (i%64) & 0xF; - inv = (bits>>3)-1; - bits ^= inv; - - constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); - cond_negate_tw_pniels(&pn, inv); - add_tw_pniels_to_tw_extensible(working, &pn); - } -} - -void -edwards_scalar_multiply_vlook( - struct tw_extensible_t *working, - const uint64_t scalar[7] -) { - - const int nbits=448; /* HACK? */ - word_t prepared_data[14] = { - 0x9595b847fdf73126ull, - 0x9bb9b8a856af5200ull, - 0xb3136e22f37d5c4full, - 0x0000000189a19442ull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x4000000000000000ull, - - 0x721cf5b5529eec33ull, - 0x7a4cf635c8e9c2abull, - 0xeec492d944a725bfull, - 0x000000020cd77058ull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x0000000000000000ull - }; /* TODO: split off */ - - uint64_t scalar2[7]; - convert_to_signed_window_form(scalar2,scalar,prepared_data,7); - - struct tw_extensible_t tabulator; - copy_tw_extensible(&tabulator, working); - double_tw_extensible(&tabulator); - - struct tw_pniels_t pn, multiples[8]; - convert_tw_extensible_to_tw_pniels(&pn, &tabulator); - convert_tw_extensible_to_tw_pniels(&multiples[0], working); - - int i; - for (i=1; i<8; i++) { - add_tw_pniels_to_tw_extensible(working, &pn); - convert_tw_extensible_to_tw_pniels(&multiples[i], working); - } - - i = nbits - 4; - int bits = scalar2[i/64] >> (i%64) & 0xF, - inv = (bits>>3)-1; - bits ^= inv; - - copy_tw_pniels(&pn, &multiples[bits&7]); - cond_negate_tw_pniels(&pn, inv); - convert_tw_pniels_to_tw_extensible(working, &pn); - - - for (i-=4; i>=0; i-=4) { - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - - bits = scalar2[i/64] >> (i%64) & 0xF; - inv = (bits>>3)-1; - bits ^= inv; - - copy_tw_pniels(&pn, &multiples[bits&7]); - cond_negate_tw_pniels(&pn, inv); - add_tw_pniels_to_tw_extensible(working, &pn); - } -} - - -void -edwards_comb( - struct tw_extensible_t *working, - const word_t scalar[7], - const struct tw_niels_t *table, - int n, - int t, - int s -) { - word_t prepared_data[14] = { - 0xebec9967f5d3f5c2ull, - 0x0aa09b49b16c9a02ull, - 0x7f6126aec172cd8eull, - 0x00000007b027e54dull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x4000000000000000ull, - - 0xc873d6d54a7bb0cfull, - 0xe933d8d723a70aadull, - 0xbb124b65129c96fdull, - 0x00000008335dc163ull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x0000000000000000ull - }; /* TODO: split off. Above is for 450 bits */ - - word_t scalar2[7]; - convert_to_signed_window_form(scalar2,scalar,prepared_data,7); - - /* const int n=3, t=5, s=30; */ - int i,j,k; - - struct tw_niels_t ni; - - for (i=0; i> (bit%WORD_BITS) & 1) << k; - } - } - - mask_t invert = (tab>>(t-1))-1; - tab ^= invert; - tab &= (1<<(t-1)) - 1; - - constant_time_lookup_tw_niels(&ni, table + (j<<(t-1)), 1<<(t-1), tab); - cond_negate_tw_niels(&ni, invert); - if (i||j) { - add_tw_niels_to_tw_extensible(working, &ni); - } else { - convert_tw_niels_to_tw_extensible(working, &ni); - } - } - } -} - -void -simultaneous_invert_p448( - struct p448_t *out, - const struct p448_t *in, - int n -) { - if (!n) return; - - p448_copy(&out[1], &in[0]); - int i; - for (i=1; i0; i--) { - p448_mul(&tmp, &out[i], &out[0]); - p448_copy(&out[i], &tmp); - - p448_mul(&tmp, &out[0], &in[i]); - p448_copy(&out[0], &tmp); - } -} - -mask_t -precompute_for_combs( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, - int n, - int t, - int s -) { - if (s < 1) return 0; - - struct tw_extensible_t working, start; - copy_tw_extensible(&working, const_base); - struct tw_pniels_t pn_tmp; - - struct tw_pniels_t *doubles = (struct tw_pniels_t *) malloc(sizeof(*doubles) * (t-1)); - struct p448_t *zs = (struct p448_t *) malloc(sizeof(*zs) * (n<<(t-1))); - struct p448_t *zis = (struct p448_t *) malloc(sizeof(*zis) * (n<<(t-1))); - - if (!doubles || !zs || !zis) { - free(doubles); - free(zs); - free(zis); - return 0; - } - - int i,j,k; - for (i=0; i>1); - int idx = ((i+1)<<(t-1))-1 ^ gray; - - convert_tw_extensible_to_tw_pniels(&pn_tmp, &start); - copy_tw_niels(&out[idx], &pn_tmp.n); - p448_copy(&zs[idx], &pn_tmp.z); - - if (j >= (1<<(t-1)) - 1) break; - int delta = (j+1) ^ ((j+1)>>1) ^ gray; - - for (k=0; delta>1; k++) - delta >>=1; - - if (gray & (1< 0) { - double_tw_extensible(&base); - convert_tw_extensible_to_tw_pniels(&twop, &base); - add_tw_pniels_to_tw_extensible(&base, &tmp); - - convert_tw_extensible_to_tw_pniels(&tmp, &base); - p448_copy(&zs[1], &tmp.z); - copy_tw_niels(&out[1], &tmp.n); - - for (i=2; i < 1<= -2 - tableBits; i--) { - int bit = (i >= 0) - ? (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1 - : 0; - - current = 2*current + bit; - - /* - * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0 - * So current loses (tableBits+1) bits every time. It otherwise gains - * 1 bit per iteration. The number of iterations is - * (nbits + 2 + tableBits), and an additional control word is added at - * the end. So the total number of control words is at most - * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2. - * There's also the stopper with power -1, for a total of +3. - */ - if (current >= (2<> 1; - current = -(current & 1); - - int j; - for (j=i; (delta & 1) == 0; j++) { - delta >>= 1; - } - control[position].power = j+1; - control[position].addend = delta; - position++; - assert(position <= nbits/(tableBits+1) + 2); - } - } - - control[position].power = -1; - control[position].addend = 0; - return position; -} - - -static void -prepare_wnaf_table( - struct tw_pniels_t *output, - struct tw_extensible_t *working, - int tbits -) { - convert_tw_extensible_to_tw_pniels(&output[0], working); - - if (tbits == 0) return; - - double_tw_extensible(working); - struct tw_pniels_t twop; - convert_tw_extensible_to_tw_pniels(&twop, working); - - add_tw_pniels_to_tw_extensible(working, &output[0]); - convert_tw_extensible_to_tw_pniels(&output[1], working); - - for (int i=2; i < 1< 0) { - assert(control[0].addend > 0); - assert(control[0].power >= 0); - convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); - } else { - set_identity_tw_extensible(working); - return; - } - - int conti = 1, i; - for (i = control[0].power - 1; i >= 0; i--) { - double_tw_extensible(working); - - if (i == control[conti].power) { - assert(control[conti].addend); - - if (control[conti].addend > 0) { - add_tw_pniels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); - } else { - sub_tw_pniels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); - } - conti++; - assert(conti <= control_bits); - } - } -} - -void -edwards_scalar_multiply_vt_pre( - struct tw_extensible_t *working, - const uint64_t scalar[7], - const struct tw_niels_t *precmp, - int table_bits -) { - /* HACK: not 448? */ - const int nbits=448; - struct smvt_control control[nbits/(table_bits+1)+3]; - - int control_bits = recode_wnaf(control, scalar, nbits, table_bits); - - if (control_bits > 0) { - assert(control[0].addend > 0); - assert(control[0].power >= 0); - convert_tw_niels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); - } else { - set_identity_tw_extensible(working); - return; - } - - int conti = 1, i; - for (i = control[0].power - 1; i >= 0; i--) { - double_tw_extensible(working); - - if (i == control[conti].power) { - assert(control[conti].addend); - - if (control[conti].addend > 0) { - add_tw_niels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); - } else { - sub_tw_niels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); - } - conti++; - assert(conti <= control_bits); - } - } -} - -void -edwards_combo_var_fixed_vt( - struct tw_extensible_t *working, - const uint64_t scalar_var[7], - const uint64_t scalar_pre[7], - const struct tw_niels_t *precmp, - int table_bits_pre -) { - /* HACK: not 448? */ - const int nbits_var=448, nbits_pre=448, table_bits_var = 3; - struct smvt_control control_var[nbits_var/(table_bits_var+1)+3]; - struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3]; - - int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var); - int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre); - (void)ncb_var; - (void)ncb_pre; - - struct tw_pniels_t precmp_var[1< control_pre[0].power) { - convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); - contv++; - } else if (i == control_pre[0].power && i >=0 ) { - convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); - add_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); - contv++; contp++; - } else { - i = control_pre[0].power; - convert_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); - contp++; - } - - if (i < 0) { - set_identity_tw_extensible(working); - return; - } - - for (i--; i >= 0; i--) { - double_tw_extensible(working); - - if (i == control_var[contv].power) { - assert(control_var[contv].addend); - - if (control_var[contv].addend > 0) { - add_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[contv].addend >> 1]); - } else { - sub_tw_pniels_from_tw_extensible(working, &precmp_var[(-control_var[contv].addend) >> 1]); - } - contv++; - } - - if (i == control_pre[contp].power) { - assert(control_pre[contp].addend); - - if (control_pre[contp].addend > 0) { - add_tw_niels_to_tw_extensible(working, &precmp[control_pre[contp].addend >> 1]); - } else { - sub_tw_niels_from_tw_extensible(working, &precmp[(-control_pre[contp].addend) >> 1]); - } - contp++; - } - } - - assert(contv == ncb_var); - assert(contp == ncb_pre); -} - - - diff --git a/scalarmul.h b/scalarmul.h deleted file mode 100644 index 208fb18..0000000 --- a/scalarmul.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __P448_ALGO_H__ -#define __P448_ALGO_H__ 1 - -#include "ec_point.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Out = scalar * in, encoded in inverse square root - * format. - * - * nbits is the number of bits in scalar. - * - * The scalar is to be presented in little-endian form, - * meaning that scalar[0] contains the least significant - * word of the scalar. - * - * If the point "in" is on the curve, the return - * value will be set (to -1). - * - * If the point "in" is not on the curve, then the - * output will be incorrect. If the scalar is even, - * this condition will be detected by returning 0, - * unless the output is the identity point (0; TODO). - * If the scalar is odd, the value returned will be - * set (to -1; TODO). - * - * The input and output points are always even. - * Therefore on a cofactor-4 curve like Goldilocks, - * it is sufficient for security to make the scalar - * even. (TODO: detect when i/o has cofactor?) - * - * This function takes constant time, depending on - * nbits but not on in or scalar. - */ -mask_t -p448_montgomery_ladder( - struct p448_t *out, - const struct p448_t *in, - const uint64_t *scalar, - int nbits, - int n_extra_doubles -); - -void -edwards_scalar_multiply( - struct tw_extensible_t *working, - const uint64_t scalar[7] - /* TODO? int nbits */ -); - -void -edwards_scalar_multiply_vlook( - struct tw_extensible_t *working, - const uint64_t scalar[7] - /* TODO? int nbits */ -); - -mask_t -precompute_for_combs( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, - int n, - int t, - int s -); - -void -edwards_comb( - struct tw_extensible_t *working, - const word_t scalar[7], - const struct tw_niels_t *table, - int n, - int t, - int s -); - -void -edwards_scalar_multiply_vt( - struct tw_extensible_t *working, - const uint64_t scalar[7] -); - -void -edwards_scalar_multiply_vt_pre( - struct tw_extensible_t *working, - const uint64_t scalar[7], - const struct tw_niels_t *precmp, - int table_bits -); - -mask_t -precompute_for_wnaf( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, - int tbits -); /* TODO: attr don't ignore... */ - -void -edwards_combo_var_fixed_vt( - struct tw_extensible_t *working, - const uint64_t scalar_var[7], - const uint64_t scalar_pre[7], - const struct tw_niels_t *precmp, - int table_bits_pre -); - -#ifdef __cplusplus -}; -#endif - -#endif /* __P448_ALGO_H__ */ diff --git a/sha512.c b/sha512.c deleted file mode 100644 index 311a65b..0000000 --- a/sha512.c +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#include "sha512.h" - -#include -#include - -static inline uint64_t -rotate_r ( - uint64_t x, - int d -) { - return (x >> d) | (x << (64-d)); -} - -/* TODO: get from headers */ -static inline uint64_t -htobe64 (uint64_t x) { - __asm__ ("bswapq %0" : "+r"(x)); - return x; -} - -static const uint64_t -sha512_init_state[8] = { - 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, - 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 -}; - -static const uint64_t -sha512_k[80] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 -}; - -static inline uint64_t S0 (uint64_t h1) { - return rotate_r(h1, 28) ^ rotate_r(h1, 34) ^ rotate_r(h1, 39); -} - -static inline uint64_t S1 (uint64_t h4) { - return rotate_r(h4,14) ^ rotate_r(h4,18) ^ rotate_r(h4,41); -} - -static inline uint64_t s0 (uint64_t a) { - return rotate_r(a,1) ^ rotate_r(a,8) ^ a>>7; -} - -static inline uint64_t s1 (uint64_t b) { - return rotate_r(b,19) ^ rotate_r(b,61) ^ b>>6; -} - -static inline uint64_t ch (uint64_t h4, uint64_t h5, uint64_t h6) { - return h6^(h4 & (h6^h5)); -} - -static inline uint64_t maj(uint64_t h1, uint64_t h2, uint64_t h3) { - return (h1&h2) ^ (h3&(h1^h2)); -} - -static void -sha512_process_block ( - struct sha512_ctx_t *ctx -) { - uint64_t i, tmp, a, b, - *w = (uint64_t *) ctx->block, - *state = ctx->chain, - h0 = state[0], h1 = state[1], h2 = state[2], h3 = state[3], - h4 = state[4], h5 = state[5], h6 = state[6], h7 = state[7]; - - /* Clang doesn't unswitch this automatically */ - for (i=0; i<16; i++) { - /* load up the input word for this round */ - tmp = w[i] = htobe64(w[i]); - - tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; - - /* shift register */ - h7 = h6; h6 = h5; h5 = h4; - h4 = h3 + tmp; - h3 = h2; h2 = h1; h1 = h0; - h0 = tmp + maj(h1,h2,h3) + S0(h1); - } - - for (; i<80; i++) { - /* load up the input word for this round */ - a = w[(i+1 ) & 15]; - b = w[(i+14) & 15]; - tmp = w[i&15] = s0(a) + s1(b) + w[i&15] + w[(i+9) & 15]; - tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; - - /* shift register */ - h7 = h6; h6 = h5; h5 = h4; - h4 = h3 + tmp; - h3 = h2; h2 = h1; h1 = h0; - h0 = tmp + maj(h1,h2,h3) + S0(h1); - } - - state[0] += h0; - state[1] += h1; - state[2] += h2; - state[3] += h3; - state[4] += h4; - state[5] += h5; - state[6] += h6; - state[7] += h7; -} - -void -sha512_init ( - struct sha512_ctx_t *ctx -) { - ctx->nbytes = 0; - memcpy(ctx->chain, sha512_init_state, sizeof(sha512_init_state)); - memset(ctx->block, 0, sizeof(ctx->block)); -} - -void -sha512_update ( - struct sha512_ctx_t *ctx, - const unsigned char *data, - uint64_t bytes -) { - assert(ctx->nbytes < 1ull<<56); - assert(bytes < 1ull<<56); - - while (bytes) { - uint64_t fill = ctx->nbytes % 128, accept = 128 - fill; - if (accept > bytes) accept = bytes; - ctx->nbytes += accept; - memcpy(ctx->block + fill, data, accept); - - if (fill+accept == 128) - sha512_process_block(ctx); - - bytes -= accept; - data += accept; - } - - assert(ctx->nbytes < 1ull<<56); -} - -void -sha512_final ( - struct sha512_ctx_t *ctx, - uint8_t result[64] -) { - uint64_t fill = ctx->nbytes % 128, i; - ctx->block[fill++] = 0x80; - if (fill > 112) { - memset(ctx->block + fill, 0, 128-fill); - sha512_process_block(ctx); - fill = 0; - } - memset(ctx->block + fill, 0, 112-fill); - *((uint64_t *)&ctx->block[112]) = 0; - *((uint64_t *)&ctx->block[120]) = htobe64((ctx->nbytes * 8)); - sha512_process_block(ctx); - for (i=0; i<8; i++) { - ctx->chain[i] = htobe64(ctx->chain[i]); - } - memcpy(result, ctx->chain, sizeof(ctx->chain)); - sha512_init(ctx); -} diff --git a/sha512.h b/sha512.h deleted file mode 100644 index c6c83e5..0000000 --- a/sha512.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __GOLDI_SHA512_H__ -#define __GOLDI_SHA512_H__ 1 - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* TODO: KAT */ - -/** - * SHA512 hashing context. - * - * This structure is opaque. - */ -struct sha512_ctx_t { - /** @privatesection */ - uint64_t chain[8]; - uint8_t block[128]; - uint64_t nbytes; -}; - -void -sha512_init ( - struct sha512_ctx_t *ctx -); - -void -sha512_update ( - struct sha512_ctx_t *ctx, - const unsigned char *data, - uint64_t bytes -); - -void -sha512_final ( - struct sha512_ctx_t *ctx, - uint8_t result[64] -); - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __GOLDI_SHA512_H__ */ diff --git a/word.h b/word.h deleted file mode 100644 index d5b32b4..0000000 --- a/word.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#ifndef __WORD_H__ -#define __WORD_H__ - -#include - -typedef uint64_t word_t; -typedef __uint128_t dword_t; -typedef int64_t sword_t; -typedef __int128_t dsword_t; - -static const int WORD_BITS = sizeof(word_t) * 8; - -/* TODO: vector width for procs like ARM; gcc support */ -typedef uint64_t mask_t, vecmask_t __attribute__((ext_vector_type(4))); - -static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -1; - -/* FIXME this only works on clang */ -typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); -typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); -typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); -typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); -typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); -typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); -typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); -typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); - -#if __AVX2__ -typedef uint32x8_t big_register_t; -typedef uint64x4_t uint64xn_t; -#elif __SSE2__ || __ARM_NEON__ -typedef uint32x4_t big_register_t; -typedef uint64x2_t uint64xn_t; -#elif _WIN64 || __amd64__ || __X86_64__ || __aarch64__ -typedef uint64_t big_register_t, uint64xn_t; -#else -typedef uint64_t uint64xn_t; -typedef uint32_t big_register_t; -#endif - - -#if __AVX2__ || __SSE2__ || __ARM_NEON__ -static __inline__ big_register_t -br_is_zero(big_register_t x) { - return (big_register_t)(x == (big_register_t)0); -} -#else -#error "TODO: constant-time equality on vectorless platforms" -#endif - -#endif /* __WORD_H__ */ diff --git a/x86-64-arith.h b/x86-64-arith.h deleted file mode 100644 index 958ba66..0000000 --- a/x86-64-arith.h +++ /dev/null @@ -1,246 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#ifndef __X86_64_ARITH_H__ -#define __X86_64_ARITH_H__ - -#include - -/* TODO: non x86-64 versions of these. - * TODO: autogenerate - */ - -static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { - #ifndef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rax;" - "mulq %[b];" - : [c]"=a"(c), [d]"=d"(d) - : [b]"m"(*b), [a]"m"(*a) - : "cc"); - return (((__uint128_t)(d))<<64) | c; - #else - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx;" - "mulx %[b], %[c], %[d];" - : [c]"=r"(c), [d]"=r"(d) - : [b]"m"(*b), [a]"m"(*a) - : "rdx"); - return (((__uint128_t)(d))<<64) | c; - #endif -} - -static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { - #ifndef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rax;" - "mulq %[b];" - : [c]"=a"(c), [d]"=d"(d) - : [b]"m"(*b), [a]"r"(a) - : "cc"); - return (((__uint128_t)(d))<<64) | c; - #else - uint64_t c,d; - __asm__ volatile - ("mulx %[b], %[c], %[d];" - : [c]"=r"(c), [d]"=r"(d) - : [b]"m"(*b), [a]"d"(a)); - return (((__uint128_t)(d))<<64) | c; - #endif -} - -static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { - #ifndef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rax; " - "addq %%rax, %%rax; " - "mulq %[b];" - : [c]"=a"(c), [d]"=d"(d) - : [b]"m"(*b), [a]"m"(*a) - : "cc"); - return (((__uint128_t)(d))<<64) | c; - #else - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx;" - "leaq (,%%rdx,2), %%rdx;" - "mulx %[b], %[c], %[d];" - : [c]"=r"(c), [d]"=r"(d) - : [b]"m"(*b), [a]"m"(*a) - : "rdx"); - return (((__uint128_t)(d))<<64) | c; - #endif -} - -static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "mulx %[b], %[c], %[d]; " - "addq %[c], %[lo]; " - "adcq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "mulq %[b]; " - "addq %%rax, %[lo]; " - "adcq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("mulx %[b], %[c], %[d]; " - "addq %[c], %[lo]; " - "adcq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"d"(a) - : "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "mulq %[b]; " - "addq %%rax, %[lo]; " - "adcq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"r"(a) - : "rax", "rdx", "cc"); - #endif - - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "addq %%rdx, %%rdx; " - "mulx %[b], %[c], %[d]; " - "addq %[c], %[lo]; " - "adcq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "addq %%rax, %%rax; " - "mulq %[b]; " - "addq %%rax, %[lo]; " - "adcq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "mulx %[b], %[c], %[d]; " - "subq %[c], %[lo]; " - "sbbq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "mulq %[b]; " - "subq %%rax, %[lo]; " - "sbbq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "addq %%rdx, %%rdx; " - "mulx %[b], %[c], %[d]; " - "subq %[c], %[lo]; " - "sbbq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "addq %%rax, %%rax; " - "mulq %[b]; " - "subq %%rax, %[lo]; " - "sbbq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - *acc = (((__uint128_t)(hi))<<64) | lo; - -} - -static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t c,d, lo = *acc, hi = *acc>>64; - __asm__ volatile - ("movq %[a], %%rdx; " - "mulx %[b], %[c], %[d]; " - "subq %[lo], %[c]; " - "sbbq %[hi], %[d]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - *acc = (((__uint128_t)(d))<<64) | c; -} - -static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) { - return ((__uint128_t)(a)) * b; -} - -static __inline__ __int128_t widemuls(int64_t a, int64_t b) { - return ((__int128_t)(a)) * b; -} - -static __inline__ uint64_t opacify(uint64_t x) { - __asm__ volatile("" : "+r"(x)); - return x; -} - -static __inline__ mask_t is_zero(uint64_t x) { - __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x)); - return ~x; -} - -#endif /* __X86_64_ARITH_H__ */