From d7f64fd8851e50bcc2a098592cdc989b58755274 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Sun, 30 Mar 2014 21:23:21 -0700 Subject: [PATCH] delete the text files, for real this time? --- barrett_field.c | 269 ---------------- barrett_field.h | 126 -------- bench.c | 827 ------------------------------------------------ crandom.c | 442 -------------------------- crandom.h | 140 -------- ec_point.c | 745 ------------------------------------------- ec_point.h | 503 ----------------------------- exported.sym | 5 - goldilocks.c | 299 ----------------- goldilocks.h | 171 ---------- intrinsics.h | 199 ------------ p448.c | 446 -------------------------- p448.h | 330 ------------------- scalarmul.c | 776 --------------------------------------------- scalarmul.h | 117 ------- sha512.c | 182 ----------- sha512.h | 49 --- word.h | 55 ---- x86-64-arith.h | 246 -------------- 19 files changed, 5927 deletions(-) delete mode 100644 barrett_field.c delete mode 100644 barrett_field.h delete mode 100644 bench.c delete mode 100644 crandom.c delete mode 100644 crandom.h delete mode 100644 ec_point.c delete mode 100644 ec_point.h delete mode 100644 exported.sym delete mode 100644 goldilocks.c delete mode 100644 goldilocks.h delete mode 100644 intrinsics.h delete mode 100644 p448.c delete mode 100644 p448.h delete mode 100644 scalarmul.c delete mode 100644 scalarmul.h delete mode 100644 sha512.c delete mode 100644 sha512.h delete mode 100644 word.h delete mode 100644 x86-64-arith.h diff --git a/barrett_field.c b/barrett_field.c deleted file mode 100644 index a27095a..0000000 --- a/barrett_field.c +++ /dev/null @@ -1,269 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#include "barrett_field.h" -#include - -word_t -add_nr_ext_packed( - word_t *out, - const word_t *a, - int nwords_a, - const word_t *c, - int nwords_c, - word_t mask -) { - int i; - dword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - for (; i>= WORD_BITS; - } - return carry; -} - -static __inline__ word_t -add_nr_packed( - word_t *a, - const word_t *c, - int nwords -) { - int i; - dword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - return carry; -} - -static __inline__ word_t -sub_nr_packed( - word_t *a, - const word_t *c, - int nwords -) { - int i; - dsword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - return carry; -} - -word_t -sub_nr_ext_packed( - word_t *out, - const word_t *a, - int nwords_a, - const word_t *c, - int nwords_c, - word_t mask -) { - int i; - dsword_t carry = 0; - for (i=0; i>= WORD_BITS; - } - for (; i>= WORD_BITS; - } - return carry; -} - -static word_t -widemac( - word_t *accum, - int nwords_accum, - const word_t *mier, - int nwords_mier, - word_t mand, - word_t carry -) { - int i; - assert(nwords_accum >= nwords_mier); - - for (i=0; i> WORD_BITS; - } - - for (; i> WORD_BITS; - } - - return carry; -} - -void -barrett_negate ( - word_t *a, - int nwords_a, - const word_t *p_lo, - int nwords_p, - int nwords_lo, - int p_shift -) { - int i; - dsword_t carry = 0; - - barrett_reduce(a,nwords_a,0,p_lo,nwords_p,nwords_lo,p_shift); - - /* Have p = 2^big - p_lo. Want p - a = 2^big - p_lo - a */ - - for (i=0; i>= WORD_BITS; - } - for (; i>= WORD_BITS; - } - } - - a[nwords_p-1] = carry = carry + (((word_t)1) << p_shift); - - for (; i>64)); -} - -void -barrett_reduce( - word_t *a, - int nwords_a, - word_t a_carry, - const word_t *p_lo, - int nwords_p, - int nwords_lo, - int p_shift -) { - /* TODO: non 2^k-c primes. */ - int repeat, nwords_left_in_a=nwords_a; - - /* TODO: is there a point to this a_carry business? */ - assert(a_carry < ((word_t)1)<= nwords_p); - - for (; nwords_left_in_a >= nwords_p; nwords_left_in_a--) { - for (repeat=0; repeat<2; repeat++) { - /* PERF: surely a more careful implementation could - * avoid this double round - */ - word_t mand = a[nwords_left_in_a-1] >> p_shift; - a[nwords_left_in_a-1] &= (((word_t)1)<>p_shift); - a[nwords_p-1] &= (((word_t)1)<= nwords_p); - - /* nwords_tmp = max(nwords_a + 1, nwords_p + 1, nwords_accum if doMac); */ - int nwords_tmp = (nwords_a > nwords_p) ? nwords_a : nwords_p; - nwords_tmp++; - if (nwords_tmp < nwords_accum && doMac) - nwords_tmp = nwords_accum; - - word_t tmp[nwords_tmp]; - int bpos, i; - - for (i=0; i= 0; bpos--) { - /* Invariant at the beginning of the loop: the high word is unused. */ - assert(tmp[nwords_tmp-1] == 0); - - /* shift up */ - for (i=nwords_tmp-2; i>=0; i--) { - tmp[i+1] = tmp[i]; - } - tmp[0] = 0; - - /* mac and reduce */ - word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0); - - /* the mac can't carry, because nwords_tmp >= nwords_a+1 and its high word is clear */ - assert(!carry); - barrett_reduce(tmp, nwords_tmp, carry, p_lo, nwords_p, nwords_lo, p_shift); - - /* at this point, the number of words used is nwords_p <= nwords_tmp-1, - * so the high word is again clear */ - } - - if (doMac) { - word_t cout = add_nr_packed(tmp, accum, nwords_accum); - barrett_reduce(tmp, nwords_tmp, cout, p_lo, nwords_p, nwords_lo, p_shift); - } - - for (i=0; i -#include -#include -#include -#include - -#include "p448.h" -#include "ec_point.h" -#include "scalarmul.h" -#include "barrett_field.h" -#include "crandom.h" -#include "goldilocks.h" -#include "sha512.h" - -word_t q448_lo[4] = { - 0xdc873d6d54a7bb0dull, - 0xde933d8d723a70aaull, - 0x3bb124b65129c96full, - 0x000000008335dc16ull -}; - -double now() { - struct timeval tv; - gettimeofday(&tv, NULL); - - return tv.tv_sec + tv.tv_usec/1000000.0; -} - -void p448_randomize( struct crandom_state_t *crand, struct p448_t *a ) { - crandom_generate(crand, (unsigned char *)a, sizeof(*a)); - p448_strong_reduce(a); -} - -void q448_randomize( struct crandom_state_t *crand, uint64_t sk[7] ) { - crandom_generate(crand, (unsigned char *)sk, sizeof(uint64_t)*7); -} - -void p448_print( const char *descr, const struct p448_t *a ) { - p448_t b; - p448_copy(&b, a); - p448_strong_reduce(&b); - int j; - printf("%s = 0x", descr); - for (j=7; j>=0; j--) { - printf("%014llx", (unsigned long long)b.limb[j]); - } - printf("\n"); -} - -void p448_print_full( const char *descr, const struct p448_t *a ) { - int j; - printf("%s = 0x", descr); - for (j=7; j>=0; j--) { - printf("%02llx_%014llx ", a->limb[j]>>56, (unsigned long long)a->limb[j]&(1ull<<56)-1); - } - printf("\n"); -} - -void q448_print( const char *descr, const uint64_t secret[7] ) { - int j; - printf("%s = 0x", descr); - for (j=6; j>=0; j--) { - printf("%016llx", (unsigned long long)secret[j]); - } - printf("\n"); -} - -int main(int argc, char **argv) { - (void)argc; - (void)argv; - - struct tw_extensible_t ext; - struct extensible_t exta; - struct tw_niels_t niels; - struct tw_pniels_t pniels; - struct affine_t affine; - struct montgomery_t mb; - struct p448_t a,b,c,d; - - - double when; - int i,j; - - /* Bad randomness so we can debug. */ - char initial_seed[32]; - for (i=0; i<32; i++) initial_seed[i] = i; - struct crandom_state_t crand; - crandom_init_from_buffer(&crand, initial_seed); - - uint64_t sk[7],tk[7]; - q448_randomize(&crand, sk); - - when = now(); - for (i=0; i<10000000; i++) { - p448_mul(&c, &b, &a); - } - when = now() - when; - printf("mul: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<10000000; i++) { - p448_sqr(&c, &a); - } - when = now() - when; - printf("sqr: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<5000000; i++) { - p448_mul(&c, &b, &a); - p448_mul(&a, &b, &c); - } - when = now() - when; - printf("mul dep: %5.1fns\n", when * 1e9 / i / 2); - - when = now(); - for (i=0; i<10000000; i++) { - p448_mulw(&c, &b, 1234562); - } - when = now() - when; - printf("mulw: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<100000; i++) { - p448_randomize(&crand, &a); - } - when = now() - when; - printf("rand448: %5.1fns\n", when * 1e9 / i); - - struct sha512_ctx_t sha; - uint8_t hashout[128]; - when = now(); - for (i=0; i<10000; i++) { - sha512_init(&sha); - sha512_final(&sha, hashout); - } - when = now() - when; - printf("sha512 1blk: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<10000; i++) { - sha512_update(&sha, hashout, 128); - } - when = now() - when; - printf("sha512 blk: %5.1fns (%0.2f MB/s)\n", when * 1e9 / i, 128*i/when/1e6); - - when = now(); - for (i=0; i<10000; i++) { - p448_isr(&c, &a); - } - when = now() - when; - printf("isr auto: %5.1fµs\n", when * 1e6 / i); - - for (i=0; i<100; i++) { - p448_randomize(&crand, &a); - p448_isr(&d,&a); - p448_sqr(&b,&d); - p448_mul(&c,&b,&a); - p448_sqr(&b,&c); - p448_subw(&b,1); - p448_bias(&b,1); - if (!p448_is_zero(&b)) { - printf("ISR validation failure!\n"); - p448_print("a", &a); - p448_print("s", &d); - } - } - - when = now(); - for (i=0; i<10000; i++) { - elligator_2s_inject(&affine, &a); - } - when = now() - when; - printf("elligator: %5.1fµs\n", when * 1e6 / i); - - for (i=0; i<100; i++) { - p448_randomize(&crand, &a); - elligator_2s_inject(&affine, &a); - if (!validate_affine(&affine)) { - printf("Elligator validation failure!\n"); - p448_print("a", &a); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - } - } - - when = now(); - for (i=0; i<10000; i++) { - deserialize_affine(&affine, &a); - } - when = now() - when; - printf("decompress: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - serialize_extensible(&a, &exta); - } - when = now() - when; - printf("compress: %5.1fµs\n", when * 1e6 / i); - - int goods = 0; - for (i=0; i<100; i++) { - p448_randomize(&crand, &a); - mask_t good = deserialize_affine(&affine, &a); - if (good & !validate_affine(&affine)) { - printf("Deserialize validation failure!\n"); - p448_print("a", &a); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - } else if (good) { - goods++; - convert_affine_to_extensible(&exta,&affine); - serialize_extensible(&b, &exta); - p448_sub(&c,&b,&a); - p448_bias(&c,2); - if (!p448_is_zero(&c)) { - printf("Reserialize validation failure!\n"); - p448_print("a", &a); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - deserialize_affine(&affine, &b); - p448_print("b", &b); - p448_print("x", &affine.x); - p448_print("y", &affine.y); - printf("\n"); - } - } - } - if (goods=0; j--) { - lsk[j] = random(); - lsk[j] = lsk[j]<<22 ^ random(); - lsk[j] = lsk[j]<<22 ^ random(); - } - } - - when = now(); - for (i=0; i<1000000; i++) { - barrett_reduce(lsk,12,0,q448_lo,7,4,62); - } - when = now() - when; - printf("barrett red: %5.1fns\n", when * 1e9 / i); - // - // when = now(); - // for (i=0; i<100000; i++) { - // barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62); - // } - // when = now() - when; - // printf("barrett mac: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - add_tw_niels_to_tw_extensible(&ext, &niels); - } - when = now() - when; - printf("exti+niels: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - add_tw_pniels_to_tw_extensible(&ext, &pniels); - } - when = now() - when; - printf("exti+pniels: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - double_tw_extensible(&ext); - } - when = now() - when; - printf("exti dbl: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - untwist_and_double(&exta, &ext); - } - when = now() - when; - printf("i->a isog: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - twist_and_double(&ext, &exta); - } - when = now() - when; - printf("a->i isog: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000000; i++) { - montgomery_step(&mb); - } - when = now() - when; - printf("monty step: %5.1fns\n", when * 1e9 / i); - - when = now(); - for (i=0; i<1000; i++) { - p448_montgomery_ladder(&a,&b,sk,448,0); - } - when = now() - when; - printf("full ladder: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - edwards_scalar_multiply(&ext,sk); - } - when = now() - when; - printf("edwards smz: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - edwards_scalar_multiply_vlook(&ext,sk); - untwist_and_double_and_serialize(&a,&ext); - } - when = now() - when; - printf("edwards svl: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt(&ext,sk); - } - when = now() - when; - printf("edwards vtm: %5.1fµs\n", when * 1e6 / i); - - struct tw_niels_t wnaft[1<<6]; - when = now(); - for (i=0; i<1000; i++) { - precompute_for_wnaf(wnaft,&ext,6); - } - when = now() - when; - printf("wnaf6 pre: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,6); - } - when = now() - when; - printf("edwards vt6: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - precompute_for_wnaf(wnaft,&ext,4); - } - when = now() - when; - printf("wnaf4 pre: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,4); - } - when = now() - when; - printf("edwards vt4: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - precompute_for_wnaf(wnaft,&ext,5); - } - when = now() - when; - printf("wnaf5 pre: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,5); - } - when = now() - when; - printf("edwards vt5: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - q448_randomize(&crand, sk); - q448_randomize(&crand, tk); - edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); - } - when = now() - when; - printf("vt vf combo: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<1000; i++) { - deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - } - when = now() - when; - printf("edwards sm: %5.1fµs\n", when * 1e6 / i); - - struct tw_niels_t table[80] __attribute__((aligned(32))); - - while (1) { - p448_randomize(&crand, &a); - if (deserialize_affine(&affine, &a)) break; - } - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - when = now(); - for (i=0; i<1000; i++) { - precompute_for_combs(table, &ext, 5, 5, 18); - } - when = now() - when; - printf("pre(5,5,18): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - edwards_comb(&ext, sk, table, 5, 5, 18); - } - when = now() - when; - printf("com(5,5,18): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - edwards_comb(&ext, sk, table, 3, 5, 30); - } - when = now() - when; - printf("com(3,5,30): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - edwards_comb(&ext, sk, table, 8, 4, 14); - } - when = now() - when; - printf("com(4,4,28): %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - q448_randomize(&crand, sk); - edwards_comb(&ext, sk, table, 5, 5, 18); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - } - when = now() - when; - printf("keygen: %5.1fµs\n", when * 1e6 / i); - - printf("\nGoldilocks:\n"); - - int res = goldilocks_init(); - assert(!res); - - struct goldilocks_public_key_t gpk,hpk; - struct goldilocks_private_key_t gsk,hsk; - - when = now(); - for (i=0; i<10000; i++) { - if (i&1) { - res = goldilocks_keygen(&gsk,&gpk); - } else { - res = goldilocks_keygen(&hsk,&hpk); - } - assert(!res); - } - when = now() - when; - printf("keygen: %5.1fµs\n", when * 1e6 / i); - - uint8_t ss1[64],ss2[64]; - int gres1,gres2; - when = now(); - for (i=0; i<10000; i++) { - if (i&1) { - gres1 = goldilocks_shared_secret(ss1,&gsk,&hpk); - } else { - gres2 = goldilocks_shared_secret(ss2,&hsk,&gpk); - } - } - when = now() - when; - printf("ecdh: %5.1fµs\n", when * 1e6 / i); - if (gres1 || gres2 || memcmp(ss1,ss2,64)) { - printf("[FAIL] %d %d\n",gres1,gres2); - - printf("ss1 = "); - for (i=0; i<56; i++) { - printf("%02x", ss1[i]); - } - printf("\nss2 = "); - for (i=0; i<56; i++) { - printf("%02x", ss2[i]); - } - printf("\n"); - } - - uint8_t sout[56*2]; - const char *message = "hello world"; - uint64_t message_len = strlen(message); - when = now(); - for (i=0; i<10000; i++) { - res = goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); - assert(!res); - } - when = now() - when; - printf("sign: %5.1fµs\n", when * 1e6 / i); - - when = now(); - for (i=0; i<10000; i++) { - res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); - } - when = now() - when; - printf("verify: %5.1fµs\n", when * 1e6 / i); - - printf("\nTesting...\n"); - - - int failures=0, successes = 0; - for (i=0; i<1000; i++) { - (void)goldilocks_keygen(&gsk,&gpk); - goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); - res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); - if (res) failures++; - } - if (failures) { - printf("FAIL %d/%d signature checks!\n", failures, i); - } - - failures=0; successes = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - uint64_t two = 2; - mask_t good = p448_montgomery_ladder(&b,&a,&two,2,0); - if (!good) continue; - - uint64_t x = rand(), y=rand(), z=x*y; - p448_montgomery_ladder(&b,&a,&x,64,0); - p448_montgomery_ladder(&c,&b,&y,64,0); - p448_montgomery_ladder(&b,&a,&z,64,0); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - if (!p448_is_zero(&d)) { - printf("Odd ladder validation failure %d!\n", ++failures); - p448_print("a", &a); - printf("x=%llx, y=%llx, z=%llx\n", x,y,z); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } - } - - failures = 0; - for (i=0; i<1000; i++) { - mask_t good; - do { - p448_randomize(&crand, &a); - good = deserialize_affine(&affine, &a); - } while (!good); - - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - untwist_and_double_and_serialize(&c, &ext); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (good && !p448_is_zero(&d)){ - printf("Iso+serial validation failure %d!\n", ++failures); - p448_print("a", &a); - p448_print("b", &b); - p448_print("c", &c); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("Iso+serial variation: only %d/%d successful.\n", successes, i); - } - - failures = 0; - uint64_t four = 4; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - - mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); - good &= p448_montgomery_ladder(&c,&b,sk,448,0); - - mask_t goodb = deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (good != goodb) { - printf("Compatibility validation failure %d: good: %d != %d\n", ++failures, (int)(-good), (int)(-goodb)); - } else if (good && !p448_is_zero(&d)){ - printf("Compatibility validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("Compatibility variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - if (!i) bzero(&sk, sizeof(sk)); - - mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); - good &= p448_montgomery_ladder(&c,&b,sk,448,0); - if (!good) continue; - - deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - - precompute_for_combs(table, &ext, 5, 5, 18); - edwards_comb(&ext, sk, table, 5, 5, 18); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("Comb validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("Comb variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - if (!i) bzero(&sk, sizeof(sk)); - - mask_t good = deserialize_affine(&affine, &a); - if (!good) continue; - - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - struct tw_extensible_t exu; - copy_tw_extensible(&exu, &ext); - - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - edwards_scalar_multiply_vt(&exu,sk); - untwist_and_double(&exta,&exu); - serialize_extensible(&c, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("WNAF validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("WNAF variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - q448_randomize(&crand, sk); - if (!i) bzero(&sk, sizeof(sk)); - - mask_t good = deserialize_affine(&affine, &a); - if (!good) continue; - - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - struct tw_extensible_t exu; - copy_tw_extensible(&exu, &ext); - - edwards_scalar_multiply(&ext,sk); - untwist_and_double(&exta,&ext); - serialize_extensible(&b, &exta); - - precompute_for_wnaf(wnaft,&exu,5); - edwards_scalar_multiply_vt_pre(&exu,sk,wnaft,5); - untwist_and_double(&exta,&exu); - serialize_extensible(&c, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("PreWNAF validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - p448_print("c", &c); - p448_print("b", &b); - for (j=0; j<1<<5; j++) { - printf("WNAFT %d\n", j); - p448_print(" a",&wnaft[j].a); - p448_print(" b",&wnaft[j].b); - p448_print(" c",&wnaft[j].c); - } - printf("\n\n"); - } else if (good) { - successes ++; - } - } - if (successes < i/3) { - printf("PreWNAF variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - struct p448_t aa; - struct tw_extensible_t exu,exv,exw; - - mask_t good; - do { - p448_randomize(&crand, &a); - good = deserialize_affine(&affine, &a); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&ext,&exta); - } while (!good); - do { - p448_randomize(&crand, &aa); - good = deserialize_affine(&affine, &aa); - convert_affine_to_extensible(&exta,&affine); - twist_and_double(&exu,&exta); - } while (!good); - p448_randomize(&crand, &aa); - - q448_randomize(&crand, sk); - if (i==0 || i==2) bzero(&sk, sizeof(sk)); - q448_randomize(&crand, tk); - if (i==0 || i==1) bzero(&tk, sizeof(tk)); - - copy_tw_extensible(&exv, &ext); - copy_tw_extensible(&exw, &exu); - edwards_scalar_multiply(&exv,sk); - edwards_scalar_multiply(&exw,tk); - convert_tw_extensible_to_tw_pniels(&pniels, &exw); - add_tw_pniels_to_tw_extensible(&exv,&pniels); - untwist_and_double(&exta,&exv); - serialize_extensible(&b, &exta); - - precompute_for_wnaf(wnaft,&exu,5); - edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); - untwist_and_double(&exta,&exv); - serialize_extensible(&c, &exta); - - p448_sub(&d,&b,&c); - p448_bias(&d,2); - - if (!p448_is_zero(&d)){ - printf("PreWNAF combo validation failure %d!\n", ++failures); - p448_print("a", &a); - p448_print("A", &aa); - q448_print("s", sk); - q448_print("t", tk); - p448_print("c", &c); - p448_print("b", &b); - printf("\n\n"); - } else if (good) { - successes ++; - } - } - if (successes < i) { - printf("PreWNAF combo variation: only %d/%d successful.\n", successes, i); - } - - successes = failures = 0; - for (i=0; i<1000; i++) { - p448_randomize(&crand, &a); - - q448_randomize(&crand, sk); - q448_randomize(&crand, tk); - - uint64_t two = 2; - mask_t good = p448_montgomery_ladder(&b,&a,&two,2,0); - p448_montgomery_ladder(&b,&a,sk,448,0); - p448_montgomery_ladder(&d,&b,tk,448,0); - p448_montgomery_ladder(&b,&a,tk,448,0); - p448_montgomery_ladder(&c,&b,sk,448,0); - - p448_sub(&b,&c,&d); - p448_bias(&b,2); - - mask_t success = p448_is_zero(&b) | ~good; - - if (!success) { - printf("Ladder validation failure %d!\n", ++failures); - p448_print("a", &a); - q448_print("s", sk); - q448_print("t", tk); - p448_print("c", &c); - p448_print("d", &d); - printf("\n"); - } - } - - return 0; -} diff --git a/crandom.c b/crandom.c deleted file mode 100644 index 468b226..0000000 --- a/crandom.c +++ /dev/null @@ -1,442 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/* Chacha random number generator code copied from crandom */ - -#include "intrinsics.h" -#include "crandom.h" -#include - -volatile unsigned int crandom_features = 0; - -unsigned int crandom_detect_features() { - unsigned int out = GEN; - -# if (defined(__i386__) || defined(__x86_64__)) - u_int32_t a,b,c,d; - - a=1; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); - out |= GEN; - if (d & 1<<26) out |= SSE2; - if (d & 1<< 9) out |= SSSE3; - if (c & 1<<25) out |= AESNI; - if (c & 1<<28) out |= AVX; - if (b & 1<<5) out |= AVX2; - - a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); - if (c & 1<<11) out |= XOP; - if (c & 1<<30) out |= RDRAND; -# endif - - return out; -} - - - -INTRINSIC u_int64_t rdrand(int abort_on_fail) { - uint64_t out = 0; - int tries = 1000; - - if (HAVE(RDRAND)) { - # if defined(__x86_64__) - u_int64_t out, a=0; - for (; tries && !a; tries--) { - __asm__ __volatile__ ( - "rdrand %0\n\tsetc %%al" - : "=r"(out), "+a"(a) :: "cc" - ); - } - # elif (defined(__i386__)) - u_int32_t reg, a=0; - uint64_t out; - for (; tries && !a; tries--) { - __asm__ __volatile__ ( - "rdrand %0\n\tsetc %%al" - : "=r"(reg), "+a"(a) :: "cc" - ); - } - out = reg; a = 0; - for (; tries && !a; tries--) { - __asm__ __volatile__ ( - "rdrand %0\n\tsetc %%al" - : "=r"(reg), "+a"(a) :: "cc" - ); - } - out = out << 32 | reg; - return out; - # else - abort(); // whut - # endif - } else { - tries = 0; - } - - if (abort_on_fail && !tries) { - abort(); - } - - return out; -} - - -/* ------------------------------- Vectorized code ------------------------------- */ -#define shuffle(x,i) _mm_shuffle_epi32(x, \ - i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64) - -#define add _mm_add_epi32 -#define add64 _mm_add_epi64 - -#define NEED_XOP (MIGHT_HAVE(XOP)) -#define NEED_SSSE3 (MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP)) -#define NEED_SSE2 (MIGHT_HAVE(SSE2) && !MUST_HAVE(SSSE3)) -#define NEED_CONV (!MUST_HAVE(SSE2)) - -#if NEED_XOP -static __inline__ void -quarter_round_xop( - ssereg *a, - ssereg *b, - ssereg *c, - ssereg *d -) { - *a = add(*a,*b); *d = xop_rotate(16, *d ^ *a); - *c = add(*c,*d); *b = xop_rotate(12, *b ^ *c); - *a = add(*a,*b); *d = xop_rotate(8, *d ^ *a); - *c = add(*c,*d); *b = xop_rotate(7, *b ^ *c); -} -#endif - -#if NEED_SSSE3 -static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull }; -static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull }; - -INTRINSIC ssereg ssse3_rotate_8(ssereg a) { - return _mm_shuffle_epi8(a, shuffle8); -} - -INTRINSIC ssereg ssse3_rotate_16(ssereg a) { - return _mm_shuffle_epi8(a, shuffle16); -} - -static __inline__ void -quarter_round_ssse3( - ssereg *a, - ssereg *b, - ssereg *c, - ssereg *d -) { - *a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); - *a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); -} -#endif /* MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) */ - -#if NEED_SSE2 -static __inline__ void -quarter_round_sse2( - ssereg *a, - ssereg *b, - ssereg *c, - ssereg *d -) { - *a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); - *a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a); - *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); -} -#endif - -#define DOUBLE_ROUND(qrf) { \ - qrf(&a1,&b1,&c1,&d1); \ - qrf(&a2,&b2,&c2,&d2); \ - b1 = shuffle(b1,1); \ - c1 = shuffle(c1,2); \ - d1 = shuffle(d1,3); \ - b2 = shuffle(b2,1); \ - c2 = shuffle(c2,2); \ - d2 = shuffle(d2,3); \ - \ - qrf(&a1,&b1,&c1,&d1); \ - qrf(&a2,&b2,&c2,&d2); \ - b1 = shuffle(b1,3); \ - c1 = shuffle(c1,2); \ - d1 = shuffle(d1,1); \ - b2 = shuffle(b2,3); \ - c2 = shuffle(c2,2); \ - d2 = shuffle(d2,1); \ - } - -#define OUTPUT_FUNCTION { \ - output[0] = add(a1,aa); \ - output[1] = add(b1,bb); \ - output[2] = add(c1,cc); \ - output[3] = add(d1,dd); \ - output[4] = add(a2,aa); \ - output[5] = add(b2,bb); \ - output[6] = add(c2,add(cc,p)); \ - output[7] = add(d2,dd); \ - \ - output += 8; \ - \ - cc = add64(add64(cc,p), p); \ - a1 = a2 = aa; \ - b1 = b2 = bb; \ - c1 = cc; c2 = add64(cc,p);\ - d1 = d2 = dd; \ - } -/* ------------------------------------------------------------------------------- */ - -INTRINSIC u_int32_t rotate(int r, u_int32_t a) { - return a<>(32-r); -} - -static __inline__ void -quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) { - *a = *a + *b; *d = rotate(16, *d^*a); - *c = *c + *d; *b = rotate(12, *b^*c); - *a = *a + *b; *d = rotate(8, *d^*a); - *c = *c + *d; *b = rotate(7, *b^*c); -} - -static void -crandom_chacha_expand(u_int64_t iv, - u_int64_t ctr, - int nr, - int output_size, - const unsigned char *key_, - unsigned char *output_) { -# if MIGHT_HAVE_SSE2 - if (HAVE(SSE2)) { - ssereg *key = (ssereg *)key_; - ssereg *output = (ssereg *)output_; - - ssereg a1 = key[0], a2 = a1, aa = a1, - b1 = key[1], b2 = b1, bb = b1, - c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1, - d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull}, - d2 = d1, dd = d1, - p = {0, 1}; - - int i,r; -# if (NEED_XOP) - if (HAVE(XOP)) { - for (i=0; i0; r-=2) - DOUBLE_ROUND(quarter_round_xop); - OUTPUT_FUNCTION; - } - return; - } -# endif -# if (NEED_SSSE3) - if (HAVE(SSSE3)) { - for (i=0; i0; r-=2) - DOUBLE_ROUND(quarter_round_ssse3); - OUTPUT_FUNCTION; - } - return; - } -# endif -# if (NEED_SSE2) - if (HAVE(SSE2)) { - for (i=0; i0; r-=2) - DOUBLE_ROUND(quarter_round_sse2); - OUTPUT_FUNCTION; - } - return; - } -# endif - } -# endif - -# if NEED_CONV - { - const u_int32_t *key = (const u_int32_t *)key_; - u_int32_t - x[16], - input[16] = { - key[0], key[1], key[2], key[3], - key[4], key[5], key[6], key[7], - iv, iv>>32, ctr, ctr>>32, - 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 - }, - *output = (u_int32_t *)output_; - int i, r; - - for (i=0; i0; r-=2) { - quarter_round(&x[0], &x[4], &x[8], &x[12]); - quarter_round(&x[1], &x[5], &x[9], &x[13]); - quarter_round(&x[2], &x[6], &x[10], &x[14]); - quarter_round(&x[3], &x[7], &x[11], &x[15]); - - quarter_round(&x[0], &x[5], &x[10], &x[15]); - quarter_round(&x[1], &x[6], &x[11], &x[12]); - quarter_round(&x[2], &x[7], &x[8], &x[13]); - quarter_round(&x[3], &x[4], &x[9], &x[14]); - } - for (r=0; r<16; r++) { - output[r] = x[r] + input[r]; - } - - output += 16; - input[11] ++; - if (!input[11]) input[12]++; - } - } - -#endif /* NEED_CONV */ -} - -/* "return 4", cf xkcd #221 */ -#define CRANDOM_MAGIC 0x72657475726e2034ull - -int -crandom_init_from_file( - struct crandom_state_t *state, - const char *filename, - int reseed_interval, - int reseeds_mandatory -) { - state->fill = 0; - state->reseed_countdown = reseed_interval; - state->reseed_interval = reseed_interval; - state->ctr = 0; - - state->randomfd = open(filename, O_RDONLY); - if (state->randomfd == -1) { - int err = errno; - return err ? err : -1; - } - - ssize_t offset = 0, red; - do { - red = read(state->randomfd, state->seed + offset, 32 - offset); - if (red > 0) offset += red; - } while (red > 0 && offset < 32); - - if (offset < 32) { - int err = errno; - return err ? err : -1; - } - - memset(state->buffer, 0, 96); - - state->magic = CRANDOM_MAGIC; - state->reseeds_mandatory = reseeds_mandatory; - - return 0; -} - -void -crandom_init_from_buffer( - struct crandom_state_t *state, - const char initial_seed[32] -) { - memcpy(state->seed, initial_seed, 32); - memset(state->buffer, 0, 96); - state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0; - state->randomfd = -1; - state->magic = CRANDOM_MAGIC; -} - -int -crandom_generate( - struct crandom_state_t *state, - unsigned char *output, - unsigned long long length -) { - /* the generator isn't seeded; maybe they ignored the return value of init_from_file */ - if (unlikely(state->magic != CRANDOM_MAGIC)) { - abort(); - } - - int ret = 0; - - while (length) { - if (unlikely(state->fill <= 0)) { - uint64_t iv = 0; - if (state->reseed_interval) { - /* it's nondeterministic, stir in some rdrand() or rdtsc() */ - if (HAVE(RDRAND)) { - iv = rdrand(0); - if (!iv) iv = rdtsc(); - } else { - iv = rdtsc(); - } - - state->reseed_countdown--; - if (unlikely(state->reseed_countdown <= 0)) { - /* reseed by xoring in random state */ - state->reseed_countdown = state->reseed_interval; - ssize_t offset = 0, red; - do { - red = read(state->randomfd, state->buffer + offset, 32 - offset); - if (red > 0) offset += red; - } while (red > 0 && offset < 32); - - if (offset < 32) { - /* The read failed. Signal an error with the return code. - * - * If reseeds are mandatory, crash. - * - * If not, the generator is still probably safe to use, because reseeding - * is basically over-engineering for caution. Also, the user might ignore - * the return code, so we still need to fill the request. - * - * Set reseed_countdown = 1 so we'll try again later. If the user's - * performance sucks as a result of ignoring the error code while calling - * us in a loop, well, that's life. - */ - if (state->reseeds_mandatory) { - abort(); - } - - ret = errno; - if (ret == 0) ret = -1; - state->reseed_countdown = 1; - } - - int i; - for (i=0; i<32; i++) { - /* Stir in the buffer. If somehow the read failed, it'll be zeros. */ - state->seed[i] ^= state->buffer[i]; - } - } - } - crandom_chacha_expand(iv,state->ctr,20,128,state->seed,state->seed); - state->ctr++; - state->fill = sizeof(state->buffer); - } - - unsigned long long copy = (length > state->fill) ? state->fill : length; - state->fill -= copy; - memcpy(output, state->buffer + state->fill, copy); - memset(state->buffer + state->fill, 0, copy); - output += copy; length -= copy; - } - - return ret; -} - -void -crandom_destroy( - struct crandom_state_t *state -) { - if (state->magic == CRANDOM_MAGIC && state->randomfd) { - (void) close(state->randomfd); - /* Ignore the return value from close(), because what would it mean? - * "Your random device, which you were reading over NFS, lost some data"? - */ - } - - memset(state, 0, sizeof(*state)); -} diff --git a/crandom.h b/crandom.h deleted file mode 100644 index f603f13..0000000 --- a/crandom.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** - * @file crandom.h - * @author Mike Hamburg - * @brief A miniature version of the (as of yet incomplete) crandom project. - */ - -#ifndef __GOLDI_CRANDOM_H__ -#define __GOLDI_CRANDOM_H__ 1 - -#include /* for uint64_t */ -#include /* for open */ -#include /* for returning errors after open */ -#include /* for abort */ -#include /* for memcpy */ -#include /* for bzero */ -#include /* for read */ - -/** - * @brief The state of a crandom generator. - * - * This object is opaque. It is not protected by a lock, and so must - * not be accessed by multiple threads at the same time. - */ -struct crandom_state_t { - /** @privatesection */ - unsigned char seed[32]; - unsigned char buffer[96]; - uint64_t ctr; - uint64_t magic; - unsigned int fill; - int reseed_countdown; - int reseed_interval; - int reseeds_mandatory; - int randomfd; -} __attribute__((aligned(16))) ; - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Initialize a crandom state from the chosen file. - * - * This function initializes a state from a given state file, or - * from a random device (eg. /dev/random or /dev/urandom). - * - * You must check the return value of this function. - * - * @param [out] state The crandom state variable to initalize. - * @param [in] filename The name of the seed file or random device. - * @param [in] reseed_interval The number of 96-byte blocks which can be - * generated without reseeding. Suggest 10000. - * @param [in] reseeds_mandatory If nonzero, call abort() if a reseed fails. - * Suggest 1. - * - * @retval 0 Success. - * @retval Nonzero An error to be interpreted by strerror(). - */ -int -crandom_init_from_file ( - struct crandom_state_t *state, - const char *filename, - int reseed_interval, - int reseeds_mandatory -) __attribute__((warn_unused_result)); - - -/** - * Initialize a crandom state from a buffer, for deterministic operation. - * - * This function is used to initialize a crandom state deterministically, - * mainly for testing purposes. It can also be used to expand a secret - * random value deterministically. - * - * @warning The crandom implementation is not guaranteed to be stable. - * That is, a later release might produce a different random stream from - * the same seed. - * - * @param [out] state The crandom state variable to initalize. - * @param [in] initial_seed The seed value. - */ -void -crandom_init_from_buffer ( - struct crandom_state_t *state, - const char initial_seed[32] -); - -/** - * Fill the output buffer with random data. - * - * This function uses the given crandom state to produce pseudorandom data - * in the output buffer. - * - * This function may perform reads from the state's random device if it needs - * to reseed. This could block if that file is a blocking source, such as - * a pipe or /dev/random on Linux. If reseeding fails and the state has - * reseeds_mandatory set, this function will call abort(). Otherwise, it will - * return an error code, but it will still randomize the buffer. - * - * If called on a corrupted, uninitialized or destroyed state, this function - * will abort(). - * - * @warning This function is not thread-safe with respect to the state. Don't - * call it from multiple threads with the same state at the same time. - * - * @param [inout] state The crandom state to use for generation. - * @param [out] output The buffer to fill with random data. - * @param [in] length The length of the buffer. - * - * @retval 0 Success. - * @retval Nonezero A non-mandatory reseed operation failed. - */ -int -crandom_generate ( - struct crandom_state_t *state, - unsigned char *output, - unsigned long long length -); - -/** - * Destroy the random state. Further calls to crandom_generate() on that state - * will abort(). - * - * @param [inout] state The state to be destroyed. - */ -void -crandom_destroy ( - struct crandom_state_t *state -); - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __GOLDI_CRANDOM_H__ */ diff --git a/ec_point.c b/ec_point.c deleted file mode 100644 index 5c2d3ab..0000000 --- a/ec_point.c +++ /dev/null @@ -1,745 +0,0 @@ -/** - * @cond internal - * @file ec_point.c - * @copyright - * Copyright (c) 2014 Cryptography Research, Inc. \n - * Released under the MIT License. See LICENSE.txt for license information. - * @author Mike Hamburg - * @warning This file was automatically generated. - */ - -#include "ec_point.h" - - -void -p448_isr ( - struct p448_t* a, - const struct p448_t* x -) { - struct p448_t L0, L1, L2; - p448_sqr ( &L1, x ); - p448_mul ( &L2, x, &L1 ); - p448_sqr ( &L1, &L2 ); - p448_mul ( &L2, x, &L1 ); - p448_sqrn ( &L1, &L2, 3 ); - p448_mul ( &L0, &L2, &L1 ); - p448_sqrn ( &L1, &L0, 3 ); - p448_mul ( &L0, &L2, &L1 ); - p448_sqrn ( &L2, &L0, 9 ); - p448_mul ( &L1, &L0, &L2 ); - p448_sqr ( &L0, &L1 ); - p448_mul ( &L2, x, &L0 ); - p448_sqrn ( &L0, &L2, 18 ); - p448_mul ( &L2, &L1, &L0 ); - p448_sqrn ( &L0, &L2, 37 ); - p448_mul ( &L1, &L2, &L0 ); - p448_sqrn ( &L0, &L1, 37 ); - p448_mul ( &L1, &L2, &L0 ); - p448_sqrn ( &L0, &L1, 111 ); - p448_mul ( &L2, &L1, &L0 ); - p448_sqr ( &L0, &L2 ); - p448_mul ( &L1, x, &L0 ); - p448_sqrn ( &L0, &L1, 223 ); - p448_mul ( a, &L2, &L0 ); -} - -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -) { - struct p448_t L0, L1; - p448_isr ( &L0, x ); - p448_sqr ( &L1, &L0 ); - p448_sqr ( &L0, &L1 ); - p448_mul ( a, x, &L0 ); -} - -void -add_tw_niels_to_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -) { - struct p448_t L0, L1; - p448_bias ( &d->y, 2 ); - p448_bias ( &d->z, 2 ); - p448_sub ( &L1, &d->y, &d->x ); - p448_mul ( &L0, &e->a, &L1 ); - p448_add ( &L1, &d->x, &d->y ); - p448_mul ( &d->y, &e->b, &L1 ); - p448_bias ( &d->y, 2 ); - p448_mul ( &L1, &d->u, &d->t ); - p448_mul ( &d->x, &e->c, &L1 ); - p448_add ( &d->u, &L0, &d->y ); - p448_sub ( &d->t, &d->y, &L0 ); - p448_sub ( &d->y, &d->z, &d->x ); - p448_add ( &L0, &d->x, &d->z ); - p448_mul ( &d->z, &L0, &d->y ); - p448_mul ( &d->x, &d->y, &d->t ); - p448_mul ( &d->y, &L0, &d->u ); -} - -void -sub_tw_niels_from_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -) { - struct p448_t L0, L1; - p448_bias ( &d->y, 2 ); - p448_bias ( &d->z, 2 ); - p448_sub ( &L1, &d->y, &d->x ); - p448_mul ( &L0, &e->b, &L1 ); - p448_add ( &L1, &d->x, &d->y ); - p448_mul ( &d->y, &e->a, &L1 ); - p448_bias ( &d->y, 2 ); - p448_mul ( &L1, &d->u, &d->t ); - p448_mul ( &d->x, &e->c, &L1 ); - p448_add ( &d->u, &L0, &d->y ); - p448_sub ( &d->t, &d->y, &L0 ); - p448_add ( &d->y, &d->x, &d->z ); - p448_sub ( &L0, &d->z, &d->x ); - p448_mul ( &d->z, &L0, &d->y ); - p448_mul ( &d->x, &d->y, &d->t ); - p448_mul ( &d->y, &L0, &d->u ); -} - -void -add_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -) { - struct p448_t L0; - p448_mul ( &L0, &e->z, &a->z ); - p448_copy ( &e->z, &L0 ); - add_tw_niels_to_tw_extensible( e, &a->n ); -} - -void -sub_tw_pniels_from_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -) { - struct p448_t L0; - p448_mul ( &L0, &e->z, &a->z ); - p448_copy ( &e->z, &L0 ); - sub_tw_niels_from_tw_extensible( e, &a->n ); -} - -void -double_tw_extensible ( - struct tw_extensible_t* a -) { - struct p448_t L0, L1, L2; - p448_sqr ( &L2, &a->x ); - p448_sqr ( &L0, &a->y ); - p448_add ( &a->u, &L2, &L0 ); - p448_add ( &a->t, &a->y, &a->x ); - p448_sqr ( &L1, &a->t ); - p448_bias ( &L1, 3 ); - p448_sub ( &a->t, &L1, &a->u ); - p448_sub ( &L1, &L0, &L2 ); - p448_bias ( &L1, 2 ); - p448_sqr ( &a->x, &a->z ); - p448_bias ( &a->x, 2 ); - p448_add ( &a->z, &a->x, &a->x ); - p448_sub ( &L0, &a->z, &L1 ); - p448_mul ( &a->z, &L1, &L0 ); - p448_mul ( &a->x, &L0, &a->t ); - p448_mul ( &a->y, &L1, &a->u ); -} - -void -double_extensible ( - struct extensible_t* a -) { - struct p448_t L0, L1, L2; - p448_sqr ( &L2, &a->x ); - p448_sqr ( &L0, &a->y ); - p448_add ( &L1, &L2, &L0 ); - p448_add ( &a->t, &a->y, &a->x ); - p448_sqr ( &a->u, &a->t ); - p448_bias ( &a->u, 3 ); - p448_sub ( &a->t, &a->u, &L1 ); - p448_sub ( &a->u, &L0, &L2 ); - p448_bias ( &a->u, 2 ); - p448_sqr ( &a->x, &a->z ); - p448_bias ( &a->x, 2 ); - p448_add ( &a->z, &a->x, &a->x ); - p448_sub ( &L0, &a->z, &L1 ); - p448_mul ( &a->z, &L1, &L0 ); - p448_mul ( &a->x, &L0, &a->t ); - p448_mul ( &a->y, &L1, &a->u ); -} - -void -twist_and_double ( - struct tw_extensible_t* b, - const struct extensible_t* a -) { - struct p448_t L0; - p448_sqr ( &b->x, &a->x ); - p448_sqr ( &b->z, &a->y ); - p448_add ( &b->u, &b->x, &b->z ); - p448_add ( &b->t, &a->y, &a->x ); - p448_sqr ( &L0, &b->t ); - p448_bias ( &L0, 3 ); - p448_sub ( &b->t, &L0, &b->u ); - p448_sub ( &L0, &b->z, &b->x ); - p448_bias ( &L0, 2 ); - p448_sqr ( &b->x, &a->z ); - p448_bias ( &b->x, 2 ); - p448_add ( &b->z, &b->x, &b->x ); - p448_sub ( &b->y, &b->z, &b->u ); - p448_mul ( &b->z, &L0, &b->y ); - p448_mul ( &b->x, &b->y, &b->t ); - p448_mul ( &b->y, &L0, &b->u ); -} - -void -untwist_and_double ( - struct extensible_t* b, - const struct tw_extensible_t* a -) { - struct p448_t L0; - p448_sqr ( &b->x, &a->x ); - p448_sqr ( &b->z, &a->y ); - p448_add ( &L0, &b->x, &b->z ); - p448_add ( &b->t, &a->y, &a->x ); - p448_sqr ( &b->u, &b->t ); - p448_bias ( &b->u, 3 ); - p448_sub ( &b->t, &b->u, &L0 ); - p448_sub ( &b->u, &b->z, &b->x ); - p448_bias ( &b->u, 2 ); - p448_sqr ( &b->x, &a->z ); - p448_bias ( &b->x, 2 ); - p448_add ( &b->z, &b->x, &b->x ); - p448_sub ( &b->y, &b->z, &b->u ); - p448_mul ( &b->z, &L0, &b->y ); - p448_mul ( &b->x, &b->y, &b->t ); - p448_mul ( &b->y, &L0, &b->u ); -} - -void -convert_tw_affine_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_affine_t* a -) { - p448_sub ( &b->n.a, &a->y, &a->x ); - p448_bias ( &b->n.a, 2 ); - p448_weak_reduce( &b->n.a ); - p448_add ( &b->n.b, &a->x, &a->y ); - p448_weak_reduce( &b->n.b ); - p448_mul ( &b->n.c, &a->y, &a->x ); - p448_mulw ( &b->z, &b->n.c, 78164 ); - p448_neg ( &b->n.c, &b->z ); - p448_bias ( &b->n.c, 2 ); - p448_weak_reduce( &b->n.c ); - p448_set_ui( &b->z, 2 ); -} - -void -convert_tw_affine_to_tw_extensible ( - struct tw_extensible_t* b, - const struct tw_affine_t* a -) { - p448_copy ( &b->x, &a->x ); - p448_copy ( &b->y, &a->y ); - p448_set_ui( &b->z, 1 ); - p448_copy ( &b->t, &a->x ); - p448_copy ( &b->u, &a->y ); -} - -void -convert_affine_to_extensible ( - struct extensible_t* b, - const struct affine_t* a -) { - p448_copy ( &b->x, &a->x ); - p448_copy ( &b->y, &a->y ); - p448_set_ui( &b->z, 1 ); - p448_copy ( &b->t, &a->x ); - p448_copy ( &b->u, &a->y ); -} - -void -convert_tw_extensible_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_extensible_t* a -) { - p448_sub ( &b->n.a, &a->y, &a->x ); - p448_bias ( &b->n.a, 2 ); - p448_weak_reduce( &b->n.a ); - p448_add ( &b->n.b, &a->x, &a->y ); - p448_weak_reduce( &b->n.b ); - p448_mul ( &b->n.c, &a->u, &a->t ); - p448_mulw ( &b->z, &b->n.c, 78164 ); - p448_neg ( &b->n.c, &b->z ); - p448_bias ( &b->n.c, 2 ); - p448_weak_reduce( &b->n.c ); - p448_add ( &b->z, &a->z, &a->z ); - p448_weak_reduce( &b->z ); -} - -void -convert_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* d -) { - p448_add ( &e->u, &d->n.b, &d->n.a ); - p448_sub ( &e->t, &d->n.b, &d->n.a ); - p448_bias ( &e->t, 2 ); - p448_mul ( &e->x, &d->z, &e->t ); - p448_mul ( &e->y, &d->z, &e->u ); - p448_sqr ( &e->z, &d->z ); -} - -void -convert_tw_niels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_niels_t* d -) { - p448_add ( &e->y, &d->b, &d->a ); - p448_weak_reduce( &e->y ); - p448_sub ( &e->x, &d->b, &d->a ); - p448_bias ( &e->x, 2 ); - p448_weak_reduce( &e->x ); - p448_set_ui( &e->z, 1 ); - p448_copy ( &e->t, &e->x ); - p448_copy ( &e->u, &e->y ); -} - -void -montgomery_step ( - struct montgomery_t* a -) { - struct p448_t L0, L1; - p448_bias ( &a->xd, 2 ); - p448_bias ( &a->xa, 2 ); - p448_add ( &L0, &a->zd, &a->xd ); - p448_sub ( &L1, &a->xd, &a->zd ); - p448_sub ( &a->zd, &a->xa, &a->za ); - p448_mul ( &a->xd, &L0, &a->zd ); - p448_bias ( &a->xd, 2 ); - p448_add ( &a->zd, &a->za, &a->xa ); - p448_mul ( &a->za, &L1, &a->zd ); - p448_add ( &a->xa, &a->za, &a->xd ); - p448_sqr ( &a->zd, &a->xa ); - p448_mul ( &a->xa, &a->z0, &a->zd ); - p448_sub ( &a->zd, &a->xd, &a->za ); - p448_sqr ( &a->za, &a->zd ); - p448_sqr ( &a->xd, &L0 ); - p448_bias ( &a->xd, 2 ); - p448_sqr ( &L0, &L1 ); - p448_mulw ( &a->zd, &a->xd, 39082 ); - p448_bias ( &a->zd, 4 ); - p448_sub ( &L1, &a->xd, &L0 ); - p448_mul ( &a->xd, &L0, &a->zd ); - p448_sub ( &L0, &a->zd, &L1 ); - p448_mul ( &a->zd, &L0, &L1 ); -} - -void -serialize_montgomery ( - struct p448_t* sign, - struct p448_t* ser, - const struct montgomery_t* a, - const struct p448_t* sbz -) { - struct p448_t L0, L1, L2, L3; - p448_mul ( &L2, &a->z0, &a->zd ); - p448_bias ( &L2, 2 ); - p448_sub ( &L0, &L2, &a->xd ); - p448_mul ( &L2, &a->za, &L0 ); - p448_bias ( &L2, 2 ); - p448_mul ( &L1, &a->z0, &a->xd ); - p448_bias ( &L1, 2 ); - p448_sub ( &L0, &L1, &a->zd ); - p448_mul ( &L3, &a->xa, &L0 ); - p448_add ( &L1, &L3, &L2 ); - p448_sub ( &L0, &L2, &L3 ); - p448_mul ( &L2, &L0, &L1 ); - p448_mul ( &L0, sbz, &L2 ); - p448_mul ( &L2, &a->zd, &L0 ); - p448_mul ( sign, &L2, &a->zd ); - p448_mul ( ser, &L2, &a->xd ); - p448_mul ( &L2, sign, ser ); - p448_isr ( &L1, &L2 ); - p448_mul ( ser, sign, &L1 ); - p448_sqr ( &L0, &L1 ); - p448_mul ( sign, &L2, &L0 ); -} - -void -serialize_extensible ( - struct p448_t* b, - const struct extensible_t* a -) { - struct p448_t L0, L1, L2; - p448_sub ( &L0, &a->y, &a->z ); - p448_bias ( &L0, 2 ); - p448_add ( b, &a->z, &a->y ); - p448_mul ( &L1, &a->z, &a->x ); - p448_mul ( &L2, &L0, &L1 ); - p448_mul ( &L1, &L2, &L0 ); - p448_mul ( &L0, &L2, b ); - p448_mul ( &L2, &L1, &L0 ); - p448_isr ( &L0, &L2 ); - p448_mul ( b, &L1, &L0 ); - p448_sqr ( &L1, &L0 ); - p448_mul ( &L0, &L2, &L1 ); -} - -void -untwist_and_double_and_serialize ( - struct p448_t* b, - const struct tw_extensible_t* a -) { - struct p448_t L0, L1, L2, L3; - p448_mul ( &L3, &a->y, &a->x ); - p448_add ( b, &a->y, &a->x ); - p448_sqr ( &L1, b ); - p448_add ( &L2, &L3, &L3 ); - p448_sub ( b, &L1, &L2 ); - p448_bias ( b, 3 ); - p448_sqr ( &L2, &a->z ); - p448_sqr ( &L1, &L2 ); - p448_add ( &L2, b, b ); - p448_mulw ( b, &L2, 39082 ); - p448_neg ( &L2, b ); - p448_bias ( &L2, 2 ); - p448_mulw ( &L0, &L2, 39082 ); - p448_neg ( b, &L0 ); - p448_bias ( b, 2 ); - p448_mul ( &L0, &L2, &L1 ); - p448_mul ( &L2, b, &L0 ); - p448_isr ( &L0, &L2 ); - p448_mul ( &L1, b, &L0 ); - p448_sqr ( b, &L0 ); - p448_mul ( &L0, &L2, b ); - p448_mul ( b, &L1, &L3 ); -} - -void -twist ( - struct tw_extensible_t* b, - const struct extensible_t* a -) { - mask_t L0, L1; - p448_sqr ( &b->y, &a->z ); - p448_sqr ( &b->z, &a->x ); - p448_sub ( &b->u, &b->y, &b->z ); - p448_bias ( &b->u, 2 ); - p448_sub ( &b->z, &a->z, &a->x ); - p448_bias ( &b->z, 2 ); - p448_mul ( &b->y, &b->z, &a->y ); - p448_sub ( &b->z, &a->z, &a->y ); - p448_bias ( &b->z, 2 ); - p448_mul ( &b->x, &b->z, &b->y ); - p448_mul ( &b->t, &b->x, &b->u ); - p448_mul ( &b->y, &b->x, &b->t ); - p448_isr ( &b->t, &b->y ); - p448_mul ( &b->u, &b->x, &b->t ); - p448_sqr ( &b->x, &b->t ); - p448_mul ( &b->t, &b->y, &b->x ); - p448_mul ( &b->x, &a->x, &b->u ); - p448_mul ( &b->y, &a->y, &b->u ); - L1 = p448_is_zero( &b->z ); - L0 = - L1; - p448_addw ( &b->y, L0 ); - p448_weak_reduce( &b->y ); - p448_set_ui( &b->z, 1 ); - p448_copy ( &b->t, &b->x ); - p448_copy ( &b->u, &b->y ); -} - -mask_t -deserialize_affine ( - struct affine_t* a, - const struct p448_t* sz -) { - struct p448_t L0, L1, L2, L3; - p448_sqr ( &L1, sz ); - p448_copy ( &L3, &L1 ); - p448_addw ( &L3, 1 ); - p448_sqr ( &a->x, &L3 ); - p448_mulw ( &L3, &a->x, 39082 ); - p448_neg ( &a->x, &L3 ); - p448_add ( &L3, &L1, &L1 ); - p448_bias ( &L3, 1 ); - p448_add ( &a->y, &L3, &L3 ); - p448_add ( &L3, &a->y, &a->x ); - p448_copy ( &a->y, &L1 ); - p448_subw ( &a->y, 1 ); - p448_neg ( &a->x, &a->y ); - p448_bias ( &a->x, 2 ); - p448_mul ( &a->y, &a->x, &L3 ); - p448_sqr ( &L2, &a->x ); - p448_mul ( &L0, &L2, &a->y ); - p448_mul ( &a->y, &a->x, &L0 ); - p448_isr ( &L3, &a->y ); - p448_mul ( &a->y, &L2, &L3 ); - p448_sqr ( &L2, &L3 ); - p448_mul ( &L3, &L0, &L2 ); - p448_mul ( &L0, &a->x, &L3 ); - p448_bias ( &L0, 1 ); - p448_add ( &L2, &a->y, &a->y ); - p448_mul ( &a->x, sz, &L2 ); - p448_addw ( &L1, 1 ); - p448_mul ( &a->y, &L1, &L3 ); - p448_subw ( &L0, 1 ); - return p448_is_zero( &L0 ); -} - -mask_t -deserialize_and_twist_approx ( - struct tw_extensible_t* a, - const struct p448_t* sdm1, - const struct p448_t* sz -) { - struct p448_t L0, L1; - p448_sqr ( &a->z, sz ); - p448_copy ( &a->y, &a->z ); - p448_addw ( &a->y, 1 ); - p448_sqr ( &a->x, &a->y ); - p448_mulw ( &a->y, &a->x, 39082 ); - p448_neg ( &a->x, &a->y ); - p448_add ( &a->y, &a->z, &a->z ); - p448_bias ( &a->y, 1 ); - p448_add ( &a->u, &a->y, &a->y ); - p448_add ( &a->y, &a->u, &a->x ); - p448_sqr ( &a->x, &a->z ); - p448_subw ( &a->x, 1 ); - p448_neg ( &a->u, &a->x ); - p448_bias ( &a->u, 2 ); - p448_mul ( &a->x, sdm1, &a->u ); - p448_mul ( &L0, &a->x, &a->y ); - p448_mul ( &a->t, &L0, &a->y ); - p448_mul ( &a->u, &a->x, &a->t ); - p448_mul ( &a->t, &a->u, &L0 ); - p448_mul ( &a->y, &a->x, &a->t ); - p448_isr ( &L0, &a->y ); - p448_mul ( &a->y, &a->u, &L0 ); - p448_sqr ( &L1, &L0 ); - p448_mul ( &a->u, &a->t, &L1 ); - p448_mul ( &a->t, &a->x, &a->u ); - p448_bias ( &a->t, 1 ); - p448_add ( &a->x, sz, sz ); - p448_mul ( &L0, &a->u, &a->x ); - p448_copy ( &a->x, &a->z ); - p448_subw ( &a->x, 1 ); - p448_neg ( &L1, &a->x ); - p448_bias ( &L1, 2 ); - p448_mul ( &a->x, &L1, &L0 ); - p448_mul ( &L0, &a->u, &a->y ); - p448_addw ( &a->z, 1 ); - p448_mul ( &a->y, &a->z, &L0 ); - p448_subw ( &a->t, 1 ); - mask_t ret = p448_is_zero( &a->t ); - p448_set_ui( &a->z, 1 ); - p448_copy ( &a->t, &a->x ); - p448_copy ( &a->u, &a->y ); - return ret; -} - -void -set_identity_extensible ( - struct extensible_t* a -) { - p448_set_ui( &a->x, 0 ); - p448_set_ui( &a->y, 1 ); - p448_set_ui( &a->z, 1 ); - p448_set_ui( &a->t, 0 ); - p448_set_ui( &a->u, 0 ); -} - -void -set_identity_tw_extensible ( - struct tw_extensible_t* a -) { - p448_set_ui( &a->x, 0 ); - p448_set_ui( &a->y, 1 ); - p448_set_ui( &a->z, 1 ); - p448_set_ui( &a->t, 0 ); - p448_set_ui( &a->u, 0 ); -} - -void -set_identity_affine ( - struct affine_t* a -) { - p448_set_ui( &a->x, 0 ); - p448_set_ui( &a->y, 1 ); -} - -mask_t -eq_affine ( - const struct affine_t* a, - const struct affine_t* b -) { - mask_t L1, L2; - struct p448_t L0; - p448_sub ( &L0, &a->x, &b->x ); - p448_bias ( &L0, 2 ); - L2 = p448_is_zero( &L0 ); - p448_sub ( &L0, &a->y, &b->y ); - p448_bias ( &L0, 2 ); - L1 = p448_is_zero( &L0 ); - return L2 & L1; -} - -mask_t -eq_extensible ( - const struct extensible_t* a, - const struct extensible_t* b -) { - mask_t L3, L4; - struct p448_t L0, L1, L2; - p448_mul ( &L2, &b->z, &a->x ); - p448_mul ( &L1, &a->z, &b->x ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L4 = p448_is_zero( &L0 ); - p448_mul ( &L2, &b->z, &a->y ); - p448_mul ( &L1, &a->z, &b->y ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L3 = p448_is_zero( &L0 ); - return L4 & L3; -} - -mask_t -eq_tw_extensible ( - const struct tw_extensible_t* a, - const struct tw_extensible_t* b -) { - mask_t L3, L4; - struct p448_t L0, L1, L2; - p448_mul ( &L2, &b->z, &a->x ); - p448_mul ( &L1, &a->z, &b->x ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L4 = p448_is_zero( &L0 ); - p448_mul ( &L2, &b->z, &a->y ); - p448_mul ( &L1, &a->z, &b->y ); - p448_sub ( &L0, &L2, &L1 ); - p448_bias ( &L0, 2 ); - L3 = p448_is_zero( &L0 ); - return L4 & L3; -} - -void -elligator_2s_inject ( - struct affine_t* a, - const struct p448_t* r -) { - mask_t L0, L1; - struct p448_t L2, L3, L4, L5, L6, L7, L8, L9; - p448_sqr ( &a->x, r ); - p448_sqr ( &L3, &a->x ); - p448_copy ( &a->y, &L3 ); - p448_subw ( &a->y, 1 ); - p448_neg ( &L9, &a->y ); - p448_bias ( &L9, 2 ); - p448_sqr ( &L2, &L9 ); - p448_bias ( &L2, 1 ); - p448_mulw ( &L7, &L2, 1527402724 ); - p448_bias ( &L7, 2 ); - p448_mulw ( &L8, &L3, 6108985600 ); - p448_add ( &a->y, &L8, &L7 ); - p448_mulw ( &L8, &L2, 6109454568 ); - p448_sub ( &L7, &a->y, &L8 ); - p448_mulw ( &L4, &a->y, 78160 ); - p448_mul ( &L6, &L7, &L9 ); - p448_mul ( &L8, &L6, &L4 ); - p448_mul ( &L4, &L7, &L8 ); - p448_isr ( &L5, &L4 ); - p448_mul ( &L4, &L6, &L5 ); - p448_sqr ( &L6, &L5 ); - p448_mul ( &L5, &L8, &L6 ); - p448_mul ( &L8, &L7, &L5 ); - p448_mul ( &L7, &L8, &L5 ); - p448_copy ( &L6, &a->x ); - p448_subw ( &L6, 1 ); - p448_addw ( &a->x, 1 ); - p448_mul ( &L5, &a->x, &L8 ); - p448_sub ( &a->x, &L6, &L5 ); - p448_bias ( &a->x, 3 ); - p448_mul ( &L5, &L4, &a->x ); - p448_mulw ( &L4, &L5, 78160 ); - p448_neg ( &a->x, &L4 ); - p448_bias ( &a->x, 2 ); - p448_weak_reduce( &a->x ); - p448_add ( &L4, &L3, &L3 ); - p448_add ( &L3, &L4, &L2 ); - p448_subw ( &L3, 2 ); - p448_mul ( &L2, &L3, &L8 ); - p448_mulw ( &L3, &L2, 3054649120 ); - p448_add ( &L2, &L3, &a->y ); - p448_mul ( &a->y, &L7, &L2 ); - L1 = p448_is_zero( &L9 ); - L0 = - L1; - p448_addw ( &a->y, L0 ); - p448_weak_reduce( &a->y ); -} - -mask_t -validate_affine ( - const struct affine_t* a -) { - struct p448_t L0, L1, L2, L3; - p448_sqr ( &L0, &a->y ); - p448_sqr ( &L2, &a->x ); - p448_add ( &L3, &L2, &L0 ); - p448_subw ( &L3, 1 ); - p448_mulw ( &L1, &L2, 39081 ); - p448_neg ( &L2, &L1 ); - p448_bias ( &L2, 2 ); - p448_mul ( &L1, &L0, &L2 ); - p448_sub ( &L0, &L3, &L1 ); - p448_bias ( &L0, 3 ); - return p448_is_zero( &L0 ); -} - -mask_t -validate_tw_extensible ( - const struct tw_extensible_t* ext -) { - mask_t L4, L5; - struct p448_t L0, L1, L2, L3; - /* - * Check invariant: - * 0 = -x*y + z*t*u - */ - p448_mul ( &L0, &ext->t, &ext->u ); - p448_mul ( &L2, &ext->z, &L0 ); - p448_addw ( &L2, 0 ); - p448_mul ( &L1, &ext->x, &ext->y ); - p448_neg ( &L0, &L1 ); - p448_add ( &L1, &L0, &L2 ); - p448_bias ( &L1, 2 ); - L5 = p448_is_zero( &L1 ); - /* - * Check invariant: - * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 - */ - p448_sqr ( &L2, &ext->y ); - p448_neg ( &L0, &L2 ); - p448_addw ( &L0, 0 ); - p448_sqr ( &L1, &ext->x ); - p448_bias ( &L1, 4 ); - p448_add ( &L2, &L1, &L0 ); - p448_sqr ( &L3, &ext->u ); - p448_sqr ( &L1, &ext->t ); - p448_mul ( &L0, &L1, &L3 ); - p448_mulw ( &L1, &L0, 39081 ); - p448_neg ( &L3, &L1 ); - p448_add ( &L1, &L3, &L2 ); - p448_neg ( &L3, &L0 ); - p448_add ( &L2, &L3, &L1 ); - p448_sqr ( &L1, &ext->z ); - p448_add ( &L0, &L1, &L2 ); - L4 = p448_is_zero( &L0 ); - return L5 & L4; -} - - diff --git a/ec_point.h b/ec_point.h deleted file mode 100644 index 4e28152..0000000 --- a/ec_point.h +++ /dev/null @@ -1,503 +0,0 @@ -/** - * @file ec_point.h - * @copyright - * Copyright (c) 2014 Cryptography Research, Inc. \n - * Released under the MIT License. See LICENSE.txt for license information. - * @author Mike Hamburg - * @warning This file was automatically generated. - */ - -#ifndef __CC_INCLUDED_EC_POINT_H__ -#define __CC_INCLUDED_EC_POINT_H__ - -#include "p448.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Affine point on an Edwards curve. - */ -struct affine_t { - struct p448_t x, y; -}; - -/** - * Affine point on a twisted Edwards curve. - */ -struct tw_affine_t { - struct p448_t x, y; -}; - -/** - * Montgomery buffer. - */ -struct montgomery_t { - struct p448_t z0, xd, zd, xa, za; -}; - -/** - * Extensible coordinates for Edwards curves, suitable for - * accumulators. - * - * Represents the point (x/z, y/z). The extra coordinates - * t,u satisfy xy = tuz, allowing for conversion to Extended - * form by multiplying t and u. - * - * The idea is that you don't have to do this multiplication - * when doubling the accumulator, because the t-coordinate - * isn't used there. At the same time, as long as you only - * have one point in extensible form, additions don't cost - * extra. - * - * This is essentially a lazier version of Hisil et al's - * lookahead trick. It might be worth considering that trick - * instead. - */ -struct extensible_t { - struct p448_t x, y, z, t, u; -}; - -/** - * Extensible coordinates for twisted Edwards curves, - * suitable for accumulators. - */ -struct tw_extensible_t { - struct p448_t x, y, z, t, u; -}; - -/** - * Niels coordinates for twisted Edwards curves. - * - * Good for mixed readdition; suitable for fixed tables. - */ -struct tw_niels_t { - struct p448_t a, b, c; -}; - -/** - * Projective niels coordinates for twisted Edwards curves. - * - * Good for readdition; suitable for temporary tables. - */ -struct tw_pniels_t { - struct tw_niels_t n; - struct p448_t z; -}; - - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_affine ( - struct affine_t* a, - const struct affine_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_affine ( - struct tw_affine_t* a, - const struct tw_affine_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_montgomery ( - struct montgomery_t* a, - const struct montgomery_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_extensible ( - struct extensible_t* a, - const struct extensible_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_extensible ( - struct tw_extensible_t* a, - const struct tw_extensible_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_niels ( - struct tw_niels_t* a, - const struct tw_niels_t* ds -) __attribute__((unused,always_inline)); - -/** - * Auto-generated copy method. - */ -static __inline__ void -copy_tw_pniels ( - struct tw_pniels_t* a, - const struct tw_pniels_t* ds -) __attribute__((unused,always_inline)); - -/** - * Returns 1/sqrt(+- x). - * - * The Legendre symbol of the result is the same as that of the - * input. - * - * If x=0, returns 0. - */ -void -p448_isr ( - struct p448_t* a, - const struct p448_t* x -); - -/** - * Returns 1/x. - * - * If x=0, returns 0. - */ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in half-Niels form. - */ -void -add_tw_niels_to_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in half-Niels form. - */ -void -sub_tw_niels_from_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in projective Niels form. - */ -void -add_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -); - -/** - * Add two points on a twisted Edwards curve, one in Extensible form - * and the other in projective Niels form. - */ -void -sub_tw_pniels_from_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a -); - -/** - * Double a point on a twisted Edwards curve, in "extensible" coordinates. - */ -void -double_tw_extensible ( - struct tw_extensible_t* a -); - -/** - * Double a point on an Edwards curve, in "extensible" coordinates. - */ -void -double_extensible ( - struct extensible_t* a -); - -/** - * Double a point, and transfer it to the twisted curve. - * - * That is, apply the 4-isogeny. - */ -void -twist_and_double ( - struct tw_extensible_t* b, - const struct extensible_t* a -); - -/** - * Double a point, and transfer it to the untwisted curve. - * - * That is, apply the dual isogeny. - */ -void -untwist_and_double ( - struct extensible_t* b, - const struct tw_extensible_t* a -); - -void -convert_tw_affine_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_affine_t* a -); - -void -convert_tw_affine_to_tw_extensible ( - struct tw_extensible_t* b, - const struct tw_affine_t* a -); - -void -convert_affine_to_extensible ( - struct extensible_t* b, - const struct affine_t* a -); - -void -convert_tw_extensible_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_extensible_t* a -); - -void -convert_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* d -); - -void -convert_tw_niels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_niels_t* d -); - -void -montgomery_step ( - struct montgomery_t* a -); - -void -serialize_montgomery ( - struct p448_t* sign, - struct p448_t* ser, - const struct montgomery_t* a, - const struct p448_t* sbz -); - -/** - * Serialize a point on an Edwards curve. - * - * The serialized form would be sqrt((z-y)/(z+y)) with sign of xz. - * - * It would be on 4y^2/(1-d) = x^3 + 2(1+d)/(1-d) * x^2 + x. - * - * But 4/(1-d) isn't square, so we need to twist it: - * - * -x is on 4y^2/(d-1) = x^3 + 2(d+1)/(d-1) * x^2 + x - */ -void -serialize_extensible ( - struct p448_t* b, - const struct extensible_t* a -); - -/** - * - */ -void -untwist_and_double_and_serialize ( - struct p448_t* b, - const struct tw_extensible_t* a -); - -/** - * Expensive transfer from untwisted to twisted. Roughly equivalent to halve and isogeny. - * Correctly transfers point of order 2. - * - * Can't have x=+1 (it's not even). There is code to fix the exception that would otherwise - * occur at (0,1). - * - * Input point must be even. - */ -void -twist ( - struct tw_extensible_t* b, - const struct extensible_t* a -); - -/** - * Deserialize a point to an untwisted affine curve. - */ -mask_t -deserialize_affine ( - struct affine_t* a, - const struct p448_t* sz -); - -/** - * Deserialize a point and transfer it to the twist. - * - * Not guaranteed to preserve the 4-torsion component. - * - * Refuses to deserialize +-1, which are the points of order 2. - */ -mask_t -deserialize_and_twist_approx ( - struct tw_extensible_t* a, - const struct p448_t* sdm1, - const struct p448_t* sz -); - -void -set_identity_extensible ( - struct extensible_t* a -); - -void -set_identity_tw_extensible ( - struct tw_extensible_t* a -); - -void -set_identity_affine ( - struct affine_t* a -); - -mask_t -eq_affine ( - const struct affine_t* a, - const struct affine_t* b -); - -mask_t -eq_extensible ( - const struct extensible_t* a, - const struct extensible_t* b -); - -mask_t -eq_tw_extensible ( - const struct tw_extensible_t* a, - const struct tw_extensible_t* b -); - -void -elligator_2s_inject ( - struct affine_t* a, - const struct p448_t* r -); - -mask_t -validate_affine ( - const struct affine_t* a -); - -/** - * Check the invariants for struct tw_extensible_t. - * PERF: This function was automatically generated - * with no regard for speed. - */ -mask_t -validate_tw_extensible ( - const struct tw_extensible_t* ext -); - - -void -copy_affine ( - struct affine_t* a, - const struct affine_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); -} - -void -copy_tw_affine ( - struct tw_affine_t* a, - const struct tw_affine_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); -} - -void -copy_montgomery ( - struct montgomery_t* a, - const struct montgomery_t* ds -) { - p448_copy ( &a->z0, &ds->z0 ); - p448_copy ( &a->xd, &ds->xd ); - p448_copy ( &a->zd, &ds->zd ); - p448_copy ( &a->xa, &ds->xa ); - p448_copy ( &a->za, &ds->za ); -} - -void -copy_extensible ( - struct extensible_t* a, - const struct extensible_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); - p448_copy ( &a->z, &ds->z ); - p448_copy ( &a->t, &ds->t ); - p448_copy ( &a->u, &ds->u ); -} - -void -copy_tw_extensible ( - struct tw_extensible_t* a, - const struct tw_extensible_t* ds -) { - p448_copy ( &a->x, &ds->x ); - p448_copy ( &a->y, &ds->y ); - p448_copy ( &a->z, &ds->z ); - p448_copy ( &a->t, &ds->t ); - p448_copy ( &a->u, &ds->u ); -} - -void -copy_tw_niels ( - struct tw_niels_t* a, - const struct tw_niels_t* ds -) { - p448_copy ( &a->a, &ds->a ); - p448_copy ( &a->b, &ds->b ); - p448_copy ( &a->c, &ds->c ); -} - -void -copy_tw_pniels ( - struct tw_pniels_t* a, - const struct tw_pniels_t* ds -) { - copy_tw_niels( &a->n, &ds->n ); - p448_copy ( &a->z, &ds->z ); -} - - - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __CC_INCLUDED_EC_POINT_H__ */ diff --git a/exported.sym b/exported.sym deleted file mode 100644 index 424cb0a..0000000 --- a/exported.sym +++ /dev/null @@ -1,5 +0,0 @@ -_goldilocks_init -_goldilocks_keygen -_goldilocks_shared_secret -_goldilocks_sign -_goldilocks_verify diff --git a/goldilocks.c b/goldilocks.c deleted file mode 100644 index ca82294..0000000 --- a/goldilocks.c +++ /dev/null @@ -1,299 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#include - -#include "goldilocks.h" -#include "ec_point.h" -#include "scalarmul.h" -#include "barrett_field.h" -#include "crandom.h" -#include "sha512.h" - -#ifndef GOLDILOCKS_RANDOM_INIT_FILE -#define GOLDILOCKS_RANDOM_INIT_FILE "/dev/urandom" -#endif - -#ifndef GOLDILOCKS_RANDOM_RESEED_INTERVAL -#define GOLDILOCKS_RANDOM_RESEED_INTERVAL 10000 -#endif - -/* We'll check it ourselves */ -#ifndef GOLDILOCKS_RANDOM_RESEEDS_MANDATORY -#define GOLDILOCKS_RANDOM_RESEEDS_MANDATORY 0 -#endif - -/* TODO: word size; precompute */ -const struct affine_t goldilocks_base_point = { - {{ 0xf0de840aed939full, 0xc170033f4ba0c7ull, 0xf3932d94c63d96ull, 0x9cecfa96147eaaull, - 0x5f065c3c59d070ull, 0x3a6a26adf73324ull, 0x1b4faff4609845ull, 0x297ea0ea2692ffull - }}, - {{ 19, 0, 0, 0, 0, 0, 0, 0 }} -}; - -// /* TODO: direct */ -// void -// transfer_and_serialize(struct p448_t *out, const struct tw_extensible_t *twext) { -// struct extensible_t ext; -// transfer_tw_to_un(&ext, twext); -// serialize_extensible(out, &ext); -// } - -// FIXME: threading -// TODO: autogen instead of init -struct { - struct tw_niels_t combs[80]; - struct tw_niels_t wnafs[32]; - struct crandom_state_t rand; -} goldilocks_global; - -int -goldilocks_init () { - struct extensible_t ext; - struct tw_extensible_t text; - - /* Sanity check: the base point is on the curve. */ - assert(validate_affine(&goldilocks_base_point)); - - /* Convert it to twisted Edwards. */ - convert_affine_to_extensible(&ext, &goldilocks_base_point); - twist(&text, &ext); - //p448_transfer_un_to_tw(&text, &ext); - - /* Precompute the tables. */ - precompute_for_combs(goldilocks_global.combs, &text, 5, 5, 18); - precompute_for_wnaf(goldilocks_global.wnafs, &text, 5); - - return crandom_init_from_file(&goldilocks_global.rand, - GOLDILOCKS_RANDOM_INIT_FILE, - GOLDILOCKS_RANDOM_RESEED_INTERVAL, - GOLDILOCKS_RANDOM_RESEEDS_MANDATORY); -} - -static word_t -q448_lo[4] = { - 0xdc873d6d54a7bb0dull, - 0xde933d8d723a70aaull, - 0x3bb124b65129c96full, - 0x000000008335dc16ull -}; - -static const struct p448_t -sqrt_d_minus_1 = {{ - 0xd2e21836749f46ull, - 0x888db42b4f0179ull, - 0x5a189aabdeea38ull, - 0x51e65ca6f14c06ull, - 0xa49f7b424d9770ull, - 0xdcac4628c5f656ull, - 0x49443b8748734aull, - 0x12fec0c0b25b7aull -}}; - -int -goldilocks_keygen ( - struct goldilocks_private_key_t *privkey, - struct goldilocks_public_key_t *pubkey -) { - // TODO: check for init. Also maybe take CRANDOM object? API... - word_t sk[448*2/WORD_BITS]; - - struct tw_extensible_t exta; - struct p448_t pk; - - int ret = crandom_generate(&goldilocks_global.rand, (unsigned char *)sk, sizeof(sk)); - barrett_reduce(sk,sizeof(sk)/sizeof(sk[0]),0,q448_lo,7,4,62); // TODO word size - q448_serialize(privkey->opaque, sk); - - edwards_comb(&exta, sk, goldilocks_global.combs, 5, 5, 18); - //transfer_and_serialize_qtor(&pk, &sqrt_d_minus_1, &exta); - untwist_and_double_and_serialize(&pk, &exta); - - p448_serialize(pubkey->opaque, &pk); - memcpy(&privkey->opaque[56], pubkey->opaque, 56); - - int ret2 = crandom_generate(&goldilocks_global.rand, &privkey->opaque[112], 32); - if (!ret) ret = ret2; - - return ret ? GOLDI_ENODICE : GOLDI_EOK; -} - -int -goldilocks_shared_secret ( - uint8_t shared[64], - const struct goldilocks_private_key_t *my_privkey, - const struct goldilocks_public_key_t *your_pubkey -) { - word_t sk[448/WORD_BITS]; - struct p448_t pk; - - mask_t succ = p448_deserialize(&pk,your_pubkey->opaque), msucc = -1; - -#ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS - struct p448_t sum, prod; - msucc &= p448_deserialize(&sum,&my_privkey->opaque[56]); - p448_mul(&prod,&pk,&sum); - p448_add(&sum,&pk,&sum); -#endif - - msucc &= q448_deserialize(sk,my_privkey->opaque); - succ &= p448_montgomery_ladder(&pk,&pk,sk,446,2); - - p448_serialize(shared,&pk); - - /* obliterate records of our failure by adjusting with obliteration key */ - struct sha512_ctx_t ctx; - sha512_init(&ctx); - -#ifdef EXPERIMENT_ECDH_OBLITERATE_CT - uint8_t oblit[40]; - unsigned i; - for (i=0; i<8; i++) { - oblit[i] = "noshared"[i] & ~(succ&msucc); - } - for (i=0; i<32; i++) { - oblit[8+i] = my_privkey->opaque[112+i] & ~(succ&msucc); - } - sha512_update(&ctx, oblit, 40); -#endif - -#ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS - /* stir in the sum and product of the pubkeys. */ - uint8_t a_pk[56]; - p448_serialize(a_pk, &sum); - sha512_update(&ctx, a_pk, 56); - p448_serialize(a_pk, &prod); - sha512_update(&ctx, a_pk, 56); -#endif - - /* stir in the shared key and finish */ - sha512_update(&ctx, shared, 56); - sha512_final(&ctx, shared); - - return (GOLDI_ECORRUPT & ~msucc) - | (GOLDI_EINVAL & msucc &~ succ) - | (GOLDI_EOK & msucc & succ); -} - -int -goldilocks_sign ( - uint8_t signature_out[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_private_key_t *privkey -) { - - /* challenge = H(pk, [nonceG], message). FIXME: endian. */ - word_t skw[448/WORD_BITS]; - mask_t succ = q448_deserialize(skw,privkey->opaque); - if (!succ) { - memset(skw,0,sizeof(skw)); - return GOLDI_ECORRUPT; - } - - /* Derive a nonce. TODO: use HMAC. FIXME: endian. FUTURE: factor. */ - word_t tk[512/WORD_BITS]; - struct sha512_ctx_t ctx; - sha512_init(&ctx); - sha512_update(&ctx, (const unsigned char *)"signonce", 8); - sha512_update(&ctx, &privkey->opaque[112], 32); - sha512_update(&ctx, message, message_len); - sha512_update(&ctx, &privkey->opaque[112], 32); - sha512_final(&ctx, (unsigned char *)tk); - barrett_reduce(tk,512/WORD_BITS,0,q448_lo,7,4,62); // TODO word size - - /* 4[nonce]G */ - uint8_t signature_tmp[56]; - struct tw_extensible_t exta; - struct p448_t gsk; - edwards_comb(&exta, tk, goldilocks_global.combs, 5, 5, 18); - double_tw_extensible(&exta); - untwist_and_double_and_serialize(&gsk, &exta); - p448_serialize(signature_tmp, &gsk); - - word_t challenge[512/WORD_BITS]; - sha512_update(&ctx, &privkey->opaque[56], 56); - sha512_update(&ctx, signature_tmp, 56); - sha512_update(&ctx, message, message_len); - sha512_final(&ctx, (unsigned char *)challenge); - - // reduce challenge and sub. - barrett_negate(challenge,512/WORD_BITS,q448_lo,7,4,62); - - barrett_mac( - tk,512/WORD_BITS, - challenge,512/WORD_BITS, - skw,448/WORD_BITS, - q448_lo,7,4,62 - ); - - word_t carry = add_nr_ext_packed(tk,tk,512/WORD_BITS,tk,512/WORD_BITS,-1); - barrett_reduce(tk,512/WORD_BITS,carry,q448_lo,7,4,62); - - memcpy(signature_out, signature_tmp, 56); - q448_serialize(signature_out+56, tk); - memset((unsigned char *)tk,0,sizeof(tk)); - memset((unsigned char *)skw,0,sizeof(skw)); - memset((unsigned char *)challenge,0,sizeof(challenge)); - - /* response = 2(nonce_secret - sk*challenge) - * Nonce = 8[nonce_secret]*G - * PK = 2[sk]*G, except doubled (TODO) - * so [2] ( [response]G + 2[challenge]PK ) = Nonce - */ - - return 0; -} - -int -goldilocks_verify ( - const uint8_t signature[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_public_key_t *pubkey -) { - struct p448_t pk; - word_t s[448/WORD_BITS]; - - mask_t succ = p448_deserialize(&pk,pubkey->opaque); - if (!succ) return GOLDI_EINVAL; - - succ = q448_deserialize(s, &signature[56]); - if (!succ) return GOLDI_EINVAL; - - /* challenge = H(pk, [nonceG], message). FIXME: endian. */ - word_t challenge[512/WORD_BITS]; - struct sha512_ctx_t ctx; - sha512_init(&ctx); - sha512_update(&ctx, pubkey->opaque, 56); - sha512_update(&ctx, signature, 56); - sha512_update(&ctx, message, message_len); - sha512_final(&ctx, (unsigned char *)challenge); - barrett_reduce(challenge,512/WORD_BITS,0,q448_lo,7,4,62); - - struct p448_t eph; - struct tw_extensible_t pk_text; - - /* deserialize [nonce]G */ - succ = p448_deserialize(&eph, signature); - if (!succ) return GOLDI_EINVAL; - - - // succ = affine_deserialize(&pk_aff,&pk); - // if (!succ) return EINVAL; - // - // convert_affine_to_extensible(&pk_ext,&pk_aff); - // transfer_un_to_tw(&pk_text,&pk_ext); - succ = deserialize_and_twist_approx(&pk_text, &sqrt_d_minus_1, &pk); - if (!succ) return GOLDI_EINVAL; - - edwards_combo_var_fixed_vt( &pk_text, challenge, s, goldilocks_global.wnafs, 5 ); - - untwist_and_double_and_serialize( &pk, &pk_text ); - p448_sub(&eph, &eph, &pk); - p448_bias(&eph, 2); - - succ = p448_is_zero(&eph); - - return succ ? 0 : GOLDI_EINVAL; -} diff --git a/goldilocks.h b/goldilocks.h deleted file mode 100644 index feba5e9..0000000 --- a/goldilocks.h +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** - * @file goldilocks.h - * @author Mike Hamburg - * @brief Goldilocks high-level functions. - */ -#ifndef __GOLDILOCKS_H__ -#define __GOLDILOCKS_H__ 1 - -#include - -/** - * @brief Serialized form of a Goldilocks public key. - * - * @warning This isn't even my final form! - */ -struct goldilocks_public_key_t { - uint8_t opaque[56]; /**< Serialized data. */ -}; - -/** - * @brief Serialized form of a Goldilocks private key. - * - * Contains 56 bytes of actual private key, 56 bytes of - * public key, and 32 bytes of symmetric key for randomization. - * - * @warning This isn't even my final form! - */ -struct goldilocks_private_key_t { - uint8_t opaque[144]; /**< Serialized data. */ -}; - -#ifdef __cplusplus -extern "C" { -#endif - -/** @brief No error. */ -static const int GOLDI_EOK = 0; - -/** @brief Error: your key is corrupt. */ -static const int GOLDI_ECORRUPT = 44801; - -/** @brief Error: other party's key is corrupt. */ -static const int GOLDI_EINVAL = 44802; - -/** @brief Error: not enough entropy. */ -static const int GOLDI_ENODICE = 44804; - -/** - * @brief Initialize Goldilocks' precomputed tables and - * random number generator. - * @retval GOLDI_EOK Success. - * @retval Nonzero An error occurred. - */ -int -goldilocks_init(); - -/** - * @brief Generate a new random keypair. - * @param [out] privkey The generated private key. - * @param [out] pubkey The generated public key. - * - * @warning This isn't even my final form! - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_ENODICE Insufficient entropy. - */ -int -goldilocks_keygen ( - struct goldilocks_private_key_t *privkey, - struct goldilocks_public_key_t *pubkey -) __attribute__((warn_unused_result)); - -/** - * @brief Generate a Diffie-Hellman shared secret in constant time. - * - * This function uses some compile-time flags whose merit remains to - * be decided. - * - * If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes - * of zeros to the secret before hashing. In the case that the other - * party's key is detectably corrupt, instead the symmetric part - * of the secret key is used to produce a pseudorandom value. - * - * If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of - * the two parties' public keys is prepended to the hash. - * - * @warning This isn't even my final form! - * - * @param [out] shared The shared secret established with the other party. - * @param [in] my_privkey My private key. - * @param [in] your_pubkey The other party's public key. - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_ECORRUPT My key is corrupt. - * @retval GOLDI_EINVAL The other party's key is corrupt. - */ -int -goldilocks_shared_secret ( - uint8_t shared[64], - const struct goldilocks_private_key_t *my_privkey, - const struct goldilocks_public_key_t *your_pubkey -) __attribute__((warn_unused_result)); - -/** - * @brief Sign a message. - * - * The signature is deterministic, using the symmetric secret found in the - * secret key to form a nonce. - * - * The technique used in signing is a modified Schnorr system, like EdDSA. - * - * @warning This isn't even my final form! - * @warning This function contains endian bugs. (TODO) - * - * @param [out] signature_out Space for the output signature. - * @param [in] message The message to be signed. - * @param [in] message_len The length of the message to be signed. - * @param [in] privkey My private key. - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_ECORRUPT My key is corrupt. - */ -int -goldilocks_sign ( - uint8_t signature_out[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_private_key_t *privkey -); - -/** - * @brief Verify a signature. - * - * This function is fairly strict. It will correctly detect when - * the signature has the wrong cofactor companent. Once deserialization - * of numbers is strictified (TODO) it will limit the response to being - * less than q as well. - * - * Currently this function does not detect when the public key is weird, - * eg 0, has cofactor, etc. As a result, a party with a bogus public - * key could create signatures that succeed on some systems and fail on - * others. - * - * @warning This isn't even my final form! - * @warning This function contains endian bugs. (TODO) - * - * @param [out] signature_out The signature. - * @param [in] message The message to be verified. - * @param [in] message_len The length of the message to be verified. - * @param [in] pubkey The signer's public key. - * - * @retval GOLDI_EOK Success. - * @retval GOLDI_EINVAL The public key or signature is corrupt. - */ -int -goldilocks_verify ( - const uint8_t signature[56*2], - const uint8_t *message, - uint64_t message_len, - const struct goldilocks_public_key_t *pubkey -) __attribute__((warn_unused_result)); - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __GOLDILOCKS_H__ */ diff --git a/intrinsics.h b/intrinsics.h deleted file mode 100644 index 93a0974..0000000 --- a/intrinsics.h +++ /dev/null @@ -1,199 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** @file crandom.h - * @brief cRandom intrinsics header. - */ - -#ifndef __CRANDOM_INTRINSICS_H__ -#define __CRANDOM_INTRINSICS_H__ 1 - -#include - -#include - -#define INTRINSIC \ - static __inline__ __attribute__((__gnu_inline__, __always_inline__)) - -#define GEN 1 -#define SSE2 2 -#define SSSE3 4 -#define AESNI 8 -#define XOP 16 -#define AVX 32 -#define AVX2 64 -#define RDRAND 128 - -INTRINSIC u_int64_t rdtsc() { - u_int64_t out = 0; -# if (defined(__i386__) || defined(__x86_64__)) - __asm__ __volatile__ ("rdtsc" : "=A"(out)); -# endif - return out; -} - -/** - * Return x unchanged, but confuse the compiler. - * - * This is mainly for use in test scripts, to prevent the value from - * being constant-folded or removed by dead code elimination. - * - * @param x A 64-bit number. - * @return The same number in a register. - */ -INTRINSIC u_int64_t opacify(u_int64_t x) { - __asm__ volatile("mov %0, %0" : "+r"(x)); - return x; -} - -#ifdef __AVX2__ -# define MIGHT_HAVE_AVX2 1 -# ifndef MUST_HAVE_AVX2 -# define MUST_HAVE_AVX2 0 -# endif -#else -# define MIGHT_HAVE_AVX2 0 -# define MUST_HAVE_AVX2 0 -#endif - -#ifdef __AVX__ -# define MIGHT_HAVE_AVX 1 -# ifndef MUST_HAVE_AVX -# define MUST_HAVE_AVX MUST_HAVE_AVX2 -# endif -#else -# define MIGHT_HAVE_AVX 0 -# define MUST_HAVE_AVX 0 -#endif - -#ifdef __SSSE3__ -# define MIGHT_HAVE_SSSE3 1 -# ifndef MUST_HAVE_SSSE3 -# define MUST_HAVE_SSSE3 MUST_HAVE_AVX -# endif -#else -# define MIGHT_HAVE_SSSE3 0 -# define MUST_HAVE_SSSE3 0 -#endif - -#ifdef __SSE2__ -# define MIGHT_HAVE_SSE2 1 -# ifndef MUST_HAVE_SSE2 -# define MUST_HAVE_SSE2 MUST_HAVE_SSSE3 -# endif - typedef __m128i ssereg; -# define pslldq _mm_slli_epi32 -# define pshufd _mm_shuffle_epi32 - -INTRINSIC ssereg sse2_rotate(int r, ssereg a) { - return _mm_slli_epi32(a, r) ^ _mm_srli_epi32(a, 32-r); -} - -#else -# define MIGHT_HAVE_SSE2 0 -# define MUST_HAVE_SSE2 0 -#endif - -#ifdef __AES__ -/* don't include intrinsics file, because not all platforms have it */ -# define MIGHT_HAVE_AESNI 1 -# ifndef MIGHT_HAVE_RDRAND -# define MIGHT_HAVE_RDRAND 1 -# endif -# ifndef MUST_HAVE_RDRAND -# define MUST_HAVE_RDRAND 0 -# endif -# ifndef MUST_HAVE_AESNI -# define MUST_HAVE_AESNI 0 -# endif - -INTRINSIC ssereg aeskeygenassist(int rc, ssereg x) { - ssereg out; - __asm__("aeskeygenassist %2, %1, %0" : "=x"(out) : "x"(x), "g"(rc)); - return out; -} - -INTRINSIC ssereg aesenc(ssereg subkey, ssereg block) { - ssereg out = block; - __asm__("aesenc %1, %0" : "+x"(out) : "x"(subkey)); - return out; -} - -INTRINSIC ssereg aesenclast(ssereg subkey, ssereg block) { - ssereg out = block; - __asm__("aesenclast %1, %0" : "+x"(out) : "x"(subkey)); - return out; -} - -#else -# define MIGHT_HAVE_AESNI 0 -# define MUST_HAVE_AESNI 0 -# define MIGHT_HAVE_RDRAND 0 -# define MUST_HAVE_RDRAND 0 -#endif - -#ifdef __XOP__ -/* don't include intrinsics file, because not all platforms have it */ -# define MIGHT_HAVE_XOP 1 -# ifndef MUST_HAVE_XOP -# define MUST_HAVE_XOP 0 -# endif -INTRINSIC ssereg xop_rotate(int amount, ssereg x) { - ssereg out; - __asm__ ("vprotd %1, %2, %0" : "=x"(out) : "x"(x), "g"(amount)); - return out; -} -#else -# define MIGHT_HAVE_XOP 0 -# define MUST_HAVE_XOP 0 -#endif - -#define MIGHT_MASK \ - ( SSE2 * MIGHT_HAVE_SSE2 \ - | SSSE3 * MIGHT_HAVE_SSSE3 \ - | AESNI * MIGHT_HAVE_AESNI \ - | XOP * MIGHT_HAVE_XOP \ - | AVX * MIGHT_HAVE_AVX \ - | RDRAND * MIGHT_HAVE_RDRAND \ - | AVX2 * MIGHT_HAVE_AVX2) - -#define MUST_MASK \ - ( SSE2 * MUST_HAVE_SSE2 \ - | SSSE3 * MUST_HAVE_SSSE3 \ - | AESNI * MUST_HAVE_AESNI \ - | XOP * MUST_HAVE_XOP \ - | AVX * MUST_HAVE_AVX \ - | RDRAND * MUST_HAVE_RDRAND \ - | AVX2 * MUST_HAVE_AVX2 ) - -#define MIGHT_HAVE(feature) ((MIGHT_MASK & feature) == feature) -#define MUST_HAVE(feature) ((MUST_MASK & feature) == feature) - -#ifdef __cplusplus -# define extern_c extern "C" -#else -# define extern_c -#endif - -extern_c -unsigned int crandom_detect_features(); - -#ifndef likely -# define likely(x) __builtin_expect((x),1) -# define unlikely(x) __builtin_expect((x),0) -#endif - -extern volatile unsigned int crandom_features; -INTRINSIC int HAVE(unsigned int feature) { - unsigned int features; - if (!MIGHT_HAVE(feature)) return 0; - if (MUST_HAVE(feature)) return 1; - features = crandom_features; - if (unlikely(!features)) - crandom_features = features = crandom_detect_features(); - return likely((features & feature) == feature); -} - -#endif /* __CRANDOM_INTRINSICS_H__ */ diff --git a/p448.c b/p448.c deleted file mode 100644 index 8327370..0000000 --- a/p448.c +++ /dev/null @@ -1,446 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#include "p448.h" -#include "x86-64-arith.h" - -void -p448_mul ( - p448_t *__restrict__ cs, - const p448_t *as, - const p448_t *bs -) { - const uint64_t *a = as->limb, *b = bs->limb; - uint64_t *c = cs->limb; - - __uint128_t accum0 = 0, accum1 = 0, accum2; - uint64_t mask = (1ull<<56) - 1; - - uint64_t aa[4], bb[4]; - - /* For some reason clang doesn't vectorize this without prompting? */ - unsigned int i; - for (i=0; i>= 56; - accum1 >>= 56; - - { - accum2 = accum1; - accum1 += accum0; - accum0 = accum2; - } - - accum2 = widemul(&a[0],&b[0]); - accum1 -= accum2; - accum0 += accum2; - - accum2 = widemul(&aa[1],&bb[3]); - msb(&accum0, &a[1], &b[3]); - mac(&accum1, &a[5], &b[7]); - - msb(&accum0, &a[2], &b[2]); - mac(&accum2, &aa[2], &bb[2]); - mac(&accum1, &a[6], &b[6]); - - msb(&accum0, &a[3], &b[1]); - mac(&accum1, &a[7], &b[5]); - mac(&accum2, &aa[3], &bb[1]); - - accum0 += accum2; - accum1 += accum2; - mac(&accum0, &a[4], &b[4]); - mac(&accum1, &aa[0], &bb[0]); - - c[0] = ((uint64_t)(accum0)) & mask; - c[4] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul(&aa[2],&bb[3]); - msb(&accum0, &a[2], &b[3]); - mac(&accum1, &a[6], &b[7]); - - mac(&accum2, &aa[3], &bb[2]); - msb(&accum0, &a[3], &b[2]); - mac(&accum1, &a[7], &b[6]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul(&a[0],&b[1]); - mac(&accum1, &aa[0], &bb[1]); - mac(&accum0, &a[4], &b[5]); - - mac(&accum2, &a[1], &b[0]); - mac(&accum1, &aa[1], &bb[0]); - mac(&accum0, &a[5], &b[4]); - - accum1 -= accum2; - accum0 += accum2; - - c[1] = ((uint64_t)(accum0)) & mask; - c[5] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul(&aa[3],&bb[3]); - msb(&accum0, &a[3], &b[3]); - mac(&accum1, &a[7], &b[7]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul(&a[0],&b[2]); - mac(&accum1, &aa[0], &bb[2]); - mac(&accum0, &a[4], &b[6]); - - mac(&accum2, &a[1], &b[1]); - mac(&accum1, &aa[1], &bb[1]); - mac(&accum0, &a[5], &b[5]); - - mac(&accum2, &a[2], &b[0]); - mac(&accum1, &aa[2], &bb[0]); - mac(&accum0, &a[6], &b[4]); - - accum1 -= accum2; - accum0 += accum2; - - c[2] = ((uint64_t)(accum0)) & mask; - c[6] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum0 += c[3]; - accum1 += c[7]; - c[3] = ((uint64_t)(accum0)) & mask; - c[7] = ((uint64_t)(accum1)) & mask; - - /* we could almost stop here, but it wouldn't be stable, so... */ - - accum0 >>= 56; - accum1 >>= 56; - c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); - c[0] += ((uint64_t)(accum1)); -} - -void -p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, - uint64_t b -) { - const uint64_t *a = as->limb; - uint64_t *c = cs->limb; - - __uint128_t accum0, accum4; - uint64_t mask = (1ull<<56) - 1; - - accum0 = widemul_rm(b, &a[0]); - accum4 = widemul_rm(b, &a[4]); - - c[0] = accum0 & mask; accum0 >>= 56; - c[4] = accum4 & mask; accum4 >>= 56; - - mac_rm(&accum0, b, &a[1]); - mac_rm(&accum4, b, &a[5]); - - c[1] = accum0 & mask; accum0 >>= 56; - c[5] = accum4 & mask; accum4 >>= 56; - - mac_rm(&accum0, b, &a[2]); - mac_rm(&accum4, b, &a[6]); - - c[2] = accum0 & mask; accum0 >>= 56; - c[6] = accum4 & mask; accum4 >>= 56; - - mac_rm(&accum0, b, &a[3]); - mac_rm(&accum4, b, &a[7]); - - c[3] = accum0 & mask; accum0 >>= 56; - c[7] = accum4 & mask; accum4 >>= 56; - - c[4] += accum0 + accum4; - c[0] += accum4; -} - -void -p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as -) { - const uint64_t *a = as->limb; - uint64_t *c = cs->limb; - - __uint128_t accum0 = 0, accum1 = 0, accum2; - uint64_t mask = (1ull<<56) - 1; - - uint64_t aa[4]; - - /* For some reason clang doesn't vectorize this without prompting? */ - unsigned int i; - for (i=0; i>= 55; - accum1 >>= 55; - - { - accum2 = accum1; - accum1 += accum0; - accum0 = accum2; - } - - accum2 = widemul(&a[0],&a[0]); - accum1 -= accum2; - accum0 += accum2; - - accum2 = widemul2(&aa[1],&aa[3]); - msb2(&accum0, &a[1], &a[3]); - mac2(&accum1, &a[5], &a[7]); - - msb(&accum0, &a[2], &a[2]); - mac(&accum2, &aa[2], &aa[2]); - mac(&accum1, &a[6], &a[6]); - - accum0 += accum2; - accum1 += accum2; - mac(&accum0, &a[4], &a[4]); - mac(&accum1, &aa[0], &aa[0]); - - c[0] = ((uint64_t)(accum0)) & mask; - c[4] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul2(&aa[2],&aa[3]); - msb2(&accum0, &a[2], &a[3]); - mac2(&accum1, &a[6], &a[7]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul2(&a[0],&a[1]); - mac2(&accum1, &aa[0], &aa[1]); - mac2(&accum0, &a[4], &a[5]); - - accum1 -= accum2; - accum0 += accum2; - - c[1] = ((uint64_t)(accum0)) & mask; - c[5] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum2 = widemul(&aa[3],&aa[3]); - msb(&accum0, &a[3], &a[3]); - mac(&accum1, &a[7], &a[7]); - - accum1 += accum2; - accum0 += accum2; - - accum2 = widemul2(&a[0],&a[2]); - mac2(&accum1, &aa[0], &aa[2]); - mac2(&accum0, &a[4], &a[6]); - - mac(&accum2, &a[1], &a[1]); - mac(&accum1, &aa[1], &aa[1]); - mac(&accum0, &a[5], &a[5]); - - accum1 -= accum2; - accum0 += accum2; - - c[2] = ((uint64_t)(accum0)) & mask; - c[6] = ((uint64_t)(accum1)) & mask; - - accum0 >>= 56; - accum1 >>= 56; - - accum0 += c[3]; - accum1 += c[7]; - c[3] = ((uint64_t)(accum0)) & mask; - c[7] = ((uint64_t)(accum1)) & mask; - - /* we could almost stop here, but it wouldn't be stable, so... */ - - accum0 >>= 56; - accum1 >>= 56; - c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); - c[0] += ((uint64_t)(accum1)); -} - -void -p448_strong_reduce ( - p448_t *a -) { - uint64_t mask = (1ull<<56)-1; - - /* first, clear high */ - a->limb[4] += a->limb[7]>>56; - a->limb[0] += a->limb[7]>>56; - a->limb[7] &= mask; - - /* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ - - /* compute total_value - p. No need to reduce mod p. */ - - __int128_t scarry = 0; - int i; - for (i=0; i<8; i++) { - scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); - a->limb[i] = scarry & mask; - scarry >>= 56; - } - - /* uncommon case: it was >= p, so now scarry = 0 and this = x - * common case: it was < p, so now scarry = -1 and this = x - p + 2^448 - * so let's add back in p. will carry back off the top for 2^448. - */ - - assert(is_zero(scarry) | is_zero(scarry+1)); - - uint64_t scarry_mask = scarry & mask; - __uint128_t carry = 0; - - /* add it back */ - for (i=0; i<8; i++) { - carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); - a->limb[i] = carry & mask; - carry >>= 56; - } - - assert(is_zero(carry + scarry)); -} - -mask_t -p448_is_zero ( - const struct p448_t *a -) { - struct p448_t b; - p448_copy(&b,a); - p448_strong_reduce(&b); - - uint64_t any = 0; - int i; - for (i=0; i<8; i++) { - any |= b.limb[i]; - } - return is_zero(any); -} - -void -p448_serialize ( - uint8_t *serial, - const struct p448_t *x -) { - int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); - for (i=0; i<8; i++) { - for (j=0; j<7; j++) { - serial[7*i+j] = red.limb[i]; - red.limb[i] >>= 8; - } - assert(red.limb[i] == 0); - } -} - -void -q448_serialize ( - uint8_t *serial, - const word_t x[7] -) { - int i,j; - for (i=0; i<7; i++) { - for (j=0; j<8; j++) { - serial[8*i+j] = x[i]>>(8*j); - } - } -} - -mask_t -q448_deserialize ( - word_t x[7], - const uint8_t serial[56] -) { - int i,j; - for (i=0; i<7; i++) { - word_t out = 0; - for (j=0; j<8; j++) { - out |= ((word_t)serial[8*i+j])<<(8*j); - } - x[i] = out; - } - /* TODO: check for reduction */ - return MASK_SUCCESS; -} - -mask_t -p448_deserialize ( - p448_t *x, - const uint8_t serial[56] -) { - int i,j; - for (i=0; i<8; i++) { - word_t out = 0; - for (j=0; j<7; j++) { - out |= ((word_t)serial[7*i+j])<<(8*j); - } - x->limb[i] = out; - } - /* TODO: check for reduction */ - return MASK_SUCCESS; -} diff --git a/p448.h b/p448.h deleted file mode 100644 index 3150614..0000000 --- a/p448.h +++ /dev/null @@ -1,330 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __P448_H__ -#define __P448_H__ 1 - -#include -#include - -#include "word.h" - -typedef struct p448_t { - uint64_t limb[8]; -} __attribute__((aligned(32))) p448_t; - -#ifdef __cplusplus -extern "C" { -#endif - -static __inline__ void -p448_set_ui ( - p448_t *out, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_add ( - p448_t *out, - const p448_t *a, - const p448_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_sub ( - p448_t *out, - const p448_t *a, - const p448_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_neg ( - p448_t *out, - const p448_t *a -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_addw ( - p448_t *a, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_subw ( - p448_t *a, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_copy ( - p448_t *out, - const p448_t *a -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_weak_reduce ( - p448_t *inout -) __attribute__((unused,always_inline)); - -void -p448_strong_reduce ( - p448_t *inout -); - -mask_t -p448_is_zero ( - const p448_t *in -); - -static __inline__ void -p448_bias ( - p448_t *inout, - int amount -) __attribute__((unused,always_inline)); - -void -p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b -); - -void -p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, - uint64_t b -); - -void -p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a -); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); - -void -p448_serialize ( - uint8_t *serial, - const struct p448_t *x -); - -void -q448_serialize ( - uint8_t *serial, - const word_t x[7] -); - -mask_t -q448_deserialize ( - word_t x[7], - const uint8_t serial[56] -); - -mask_t -p448_deserialize ( - p448_t *x, - const uint8_t serial[56] -); - -/* -------------- Inline functions begin here -------------- */ - -void -p448_set_ui ( - p448_t *out, - uint64_t x -) { - int i; - out->limb[0] = x; - for (i=1; i<8; i++) { - out->limb[i] = 0; - } -} - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; - big_register_t m = doswap; - - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] + b->limb[i]; - } - */ -} - -void -p448_sub ( - p448_t *out, - const p448_t *a, - const p448_t *b -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] - b->limb[i]; - } - */ -} - -void -p448_neg ( - p448_t *out, - const p448_t *a -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = -a->limb[i]; - } - */ -} - -void -p448_cond_neg( - p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; - big_register_t m = doNegate; - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; ilimb[0] += x; -} - -void -p448_subw ( - p448_t *a, - uint64_t x -) { - a->limb[0] -= x; -} - -void -p448_copy ( - p448_t *out, - const p448_t *a -) { - *out = *a; -} - -void -p448_bias ( - p448_t *a, - int amt -) { - uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; - uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; - uint64x4_t *aa = (uint64x4_t*) a; - aa[0] += lo; - aa[1] += hi; -} - -void -p448_weak_reduce ( - p448_t *a -) { - /* PERF: use pshufb/palignr if anyone cares about speed of this */ - uint64_t mask = (1ull<<56) - 1; - uint64_t tmp = a->limb[7] >> 56; - int i; - a->limb[4] += tmp; - for (i=7; i>0; i--) { - a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56); - } - a->limb[0] = (a->limb[0] & mask) + tmp; -} - -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __P448_H__ */ diff --git a/scalarmul.c b/scalarmul.c deleted file mode 100644 index d2fe50f..0000000 --- a/scalarmul.c +++ /dev/null @@ -1,776 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#include - -#include "scalarmul.h" -#include "string.h" -#include "barrett_field.h" - -mask_t -p448_montgomery_ladder( - struct p448_t *out, - const struct p448_t *in, - const uint64_t *scalar, - int nbits, - int n_extra_doubles -) { - struct montgomery_t mont; - p448_sqr(&mont.z0,in); - p448_copy(&mont.za,&mont.z0); - p448_set_ui(&mont.xa,1); - p448_set_ui(&mont.zd,0); - p448_set_ui(&mont.xd,1); - - int i,j,n=(nbits-1)&63; - mask_t pflip = 0; - for (j=(nbits+63)/64-1; j>=0; j--) { - uint64_t w = scalar[j]; - for (i=n; i>=0; i--) { - mask_t flip = -((w>>i)&1); - p448_cond_swap(&mont.xa,&mont.xd,flip^pflip); - p448_cond_swap(&mont.za,&mont.zd,flip^pflip); - montgomery_step(&mont); - pflip = flip; - } - n = 63; - } - p448_cond_swap(&mont.xa,&mont.xd,pflip); - p448_cond_swap(&mont.za,&mont.zd,pflip); - - for (j=0; ja, &n->b, doNegate); - p448_cond_neg(&n->c, doNegate); -} - -static __inline__ void -cond_negate_tw_pniels( - struct tw_pniels_t *n, - mask_t doNegate -) { - cond_negate_tw_niels(&n->n, doNegate); -} - -void -constant_time_lookup_tw_pniels( - struct tw_pniels_t *out, - const struct tw_pniels_t *in, - int nin, - int idx -) { - big_register_t big_one = 1, big_i = idx; - big_register_t *o = (big_register_t *)out; - const big_register_t *i = (const big_register_t *)in; - int j; - unsigned int k; - - memset(out, 0, sizeof(*out)); - for (j=0; j>= 1; - if (i> (i%64) & 0xF, - inv = (bits>>3)-1; - bits ^= inv; - - constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); - cond_negate_tw_pniels(&pn, inv); - convert_tw_pniels_to_tw_extensible(working, &pn); - - - for (i-=4; i>=0; i-=4) { - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - - bits = scalar2[i/64] >> (i%64) & 0xF; - inv = (bits>>3)-1; - bits ^= inv; - - constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); - cond_negate_tw_pniels(&pn, inv); - add_tw_pniels_to_tw_extensible(working, &pn); - } -} - -void -edwards_scalar_multiply_vlook( - struct tw_extensible_t *working, - const uint64_t scalar[7] -) { - - const int nbits=448; /* HACK? */ - word_t prepared_data[14] = { - 0x9595b847fdf73126ull, - 0x9bb9b8a856af5200ull, - 0xb3136e22f37d5c4full, - 0x0000000189a19442ull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x4000000000000000ull, - - 0x721cf5b5529eec33ull, - 0x7a4cf635c8e9c2abull, - 0xeec492d944a725bfull, - 0x000000020cd77058ull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x0000000000000000ull - }; /* TODO: split off */ - - uint64_t scalar2[7]; - convert_to_signed_window_form(scalar2,scalar,prepared_data,7); - - struct tw_extensible_t tabulator; - copy_tw_extensible(&tabulator, working); - double_tw_extensible(&tabulator); - - struct tw_pniels_t pn, multiples[8]; - convert_tw_extensible_to_tw_pniels(&pn, &tabulator); - convert_tw_extensible_to_tw_pniels(&multiples[0], working); - - int i; - for (i=1; i<8; i++) { - add_tw_pniels_to_tw_extensible(working, &pn); - convert_tw_extensible_to_tw_pniels(&multiples[i], working); - } - - i = nbits - 4; - int bits = scalar2[i/64] >> (i%64) & 0xF, - inv = (bits>>3)-1; - bits ^= inv; - - copy_tw_pniels(&pn, &multiples[bits&7]); - cond_negate_tw_pniels(&pn, inv); - convert_tw_pniels_to_tw_extensible(working, &pn); - - - for (i-=4; i>=0; i-=4) { - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - double_tw_extensible(working); - - bits = scalar2[i/64] >> (i%64) & 0xF; - inv = (bits>>3)-1; - bits ^= inv; - - copy_tw_pniels(&pn, &multiples[bits&7]); - cond_negate_tw_pniels(&pn, inv); - add_tw_pniels_to_tw_extensible(working, &pn); - } -} - - -void -edwards_comb( - struct tw_extensible_t *working, - const word_t scalar[7], - const struct tw_niels_t *table, - int n, - int t, - int s -) { - word_t prepared_data[14] = { - 0xebec9967f5d3f5c2ull, - 0x0aa09b49b16c9a02ull, - 0x7f6126aec172cd8eull, - 0x00000007b027e54dull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x4000000000000000ull, - - 0xc873d6d54a7bb0cfull, - 0xe933d8d723a70aadull, - 0xbb124b65129c96fdull, - 0x00000008335dc163ull, - 0x0000000000000000ull, - 0x0000000000000000ull, - 0x0000000000000000ull - }; /* TODO: split off. Above is for 450 bits */ - - word_t scalar2[7]; - convert_to_signed_window_form(scalar2,scalar,prepared_data,7); - - /* const int n=3, t=5, s=30; */ - int i,j,k; - - struct tw_niels_t ni; - - for (i=0; i> (bit%WORD_BITS) & 1) << k; - } - } - - mask_t invert = (tab>>(t-1))-1; - tab ^= invert; - tab &= (1<<(t-1)) - 1; - - constant_time_lookup_tw_niels(&ni, table + (j<<(t-1)), 1<<(t-1), tab); - cond_negate_tw_niels(&ni, invert); - if (i||j) { - add_tw_niels_to_tw_extensible(working, &ni); - } else { - convert_tw_niels_to_tw_extensible(working, &ni); - } - } - } -} - -void -simultaneous_invert_p448( - struct p448_t *out, - const struct p448_t *in, - int n -) { - if (!n) return; - - p448_copy(&out[1], &in[0]); - int i; - for (i=1; i0; i--) { - p448_mul(&tmp, &out[i], &out[0]); - p448_copy(&out[i], &tmp); - - p448_mul(&tmp, &out[0], &in[i]); - p448_copy(&out[0], &tmp); - } -} - -mask_t -precompute_for_combs( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, - int n, - int t, - int s -) { - if (s < 1) return 0; - - struct tw_extensible_t working, start; - copy_tw_extensible(&working, const_base); - struct tw_pniels_t pn_tmp; - - struct tw_pniels_t *doubles = (struct tw_pniels_t *) malloc(sizeof(*doubles) * (t-1)); - struct p448_t *zs = (struct p448_t *) malloc(sizeof(*zs) * (n<<(t-1))); - struct p448_t *zis = (struct p448_t *) malloc(sizeof(*zis) * (n<<(t-1))); - - if (!doubles || !zs || !zis) { - free(doubles); - free(zs); - free(zis); - return 0; - } - - int i,j,k; - for (i=0; i>1); - int idx = ((i+1)<<(t-1))-1 ^ gray; - - convert_tw_extensible_to_tw_pniels(&pn_tmp, &start); - copy_tw_niels(&out[idx], &pn_tmp.n); - p448_copy(&zs[idx], &pn_tmp.z); - - if (j >= (1<<(t-1)) - 1) break; - int delta = (j+1) ^ ((j+1)>>1) ^ gray; - - for (k=0; delta>1; k++) - delta >>=1; - - if (gray & (1< 0) { - double_tw_extensible(&base); - convert_tw_extensible_to_tw_pniels(&twop, &base); - add_tw_pniels_to_tw_extensible(&base, &tmp); - - convert_tw_extensible_to_tw_pniels(&tmp, &base); - p448_copy(&zs[1], &tmp.z); - copy_tw_niels(&out[1], &tmp.n); - - for (i=2; i < 1<= -2 - tableBits; i--) { - int bit = (i >= 0) - ? (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1 - : 0; - - current = 2*current + bit; - - /* - * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0 - * So current loses (tableBits+1) bits every time. It otherwise gains - * 1 bit per iteration. The number of iterations is - * (nbits + 2 + tableBits), and an additional control word is added at - * the end. So the total number of control words is at most - * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2. - * There's also the stopper with power -1, for a total of +3. - */ - if (current >= (2<> 1; - current = -(current & 1); - - int j; - for (j=i; (delta & 1) == 0; j++) { - delta >>= 1; - } - control[position].power = j+1; - control[position].addend = delta; - position++; - assert(position <= nbits/(tableBits+1) + 2); - } - } - - control[position].power = -1; - control[position].addend = 0; - return position; -} - - -static void -prepare_wnaf_table( - struct tw_pniels_t *output, - struct tw_extensible_t *working, - int tbits -) { - convert_tw_extensible_to_tw_pniels(&output[0], working); - - if (tbits == 0) return; - - double_tw_extensible(working); - struct tw_pniels_t twop; - convert_tw_extensible_to_tw_pniels(&twop, working); - - add_tw_pniels_to_tw_extensible(working, &output[0]); - convert_tw_extensible_to_tw_pniels(&output[1], working); - - for (int i=2; i < 1< 0) { - assert(control[0].addend > 0); - assert(control[0].power >= 0); - convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); - } else { - set_identity_tw_extensible(working); - return; - } - - int conti = 1, i; - for (i = control[0].power - 1; i >= 0; i--) { - double_tw_extensible(working); - - if (i == control[conti].power) { - assert(control[conti].addend); - - if (control[conti].addend > 0) { - add_tw_pniels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); - } else { - sub_tw_pniels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); - } - conti++; - assert(conti <= control_bits); - } - } -} - -void -edwards_scalar_multiply_vt_pre( - struct tw_extensible_t *working, - const uint64_t scalar[7], - const struct tw_niels_t *precmp, - int table_bits -) { - /* HACK: not 448? */ - const int nbits=448; - struct smvt_control control[nbits/(table_bits+1)+3]; - - int control_bits = recode_wnaf(control, scalar, nbits, table_bits); - - if (control_bits > 0) { - assert(control[0].addend > 0); - assert(control[0].power >= 0); - convert_tw_niels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); - } else { - set_identity_tw_extensible(working); - return; - } - - int conti = 1, i; - for (i = control[0].power - 1; i >= 0; i--) { - double_tw_extensible(working); - - if (i == control[conti].power) { - assert(control[conti].addend); - - if (control[conti].addend > 0) { - add_tw_niels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); - } else { - sub_tw_niels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); - } - conti++; - assert(conti <= control_bits); - } - } -} - -void -edwards_combo_var_fixed_vt( - struct tw_extensible_t *working, - const uint64_t scalar_var[7], - const uint64_t scalar_pre[7], - const struct tw_niels_t *precmp, - int table_bits_pre -) { - /* HACK: not 448? */ - const int nbits_var=448, nbits_pre=448, table_bits_var = 3; - struct smvt_control control_var[nbits_var/(table_bits_var+1)+3]; - struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3]; - - int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var); - int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre); - (void)ncb_var; - (void)ncb_pre; - - struct tw_pniels_t precmp_var[1< control_pre[0].power) { - convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); - contv++; - } else if (i == control_pre[0].power && i >=0 ) { - convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); - add_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); - contv++; contp++; - } else { - i = control_pre[0].power; - convert_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); - contp++; - } - - if (i < 0) { - set_identity_tw_extensible(working); - return; - } - - for (i--; i >= 0; i--) { - double_tw_extensible(working); - - if (i == control_var[contv].power) { - assert(control_var[contv].addend); - - if (control_var[contv].addend > 0) { - add_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[contv].addend >> 1]); - } else { - sub_tw_pniels_from_tw_extensible(working, &precmp_var[(-control_var[contv].addend) >> 1]); - } - contv++; - } - - if (i == control_pre[contp].power) { - assert(control_pre[contp].addend); - - if (control_pre[contp].addend > 0) { - add_tw_niels_to_tw_extensible(working, &precmp[control_pre[contp].addend >> 1]); - } else { - sub_tw_niels_from_tw_extensible(working, &precmp[(-control_pre[contp].addend) >> 1]); - } - contp++; - } - } - - assert(contv == ncb_var); - assert(contp == ncb_pre); -} - - - diff --git a/scalarmul.h b/scalarmul.h deleted file mode 100644 index 208fb18..0000000 --- a/scalarmul.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __P448_ALGO_H__ -#define __P448_ALGO_H__ 1 - -#include "ec_point.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Out = scalar * in, encoded in inverse square root - * format. - * - * nbits is the number of bits in scalar. - * - * The scalar is to be presented in little-endian form, - * meaning that scalar[0] contains the least significant - * word of the scalar. - * - * If the point "in" is on the curve, the return - * value will be set (to -1). - * - * If the point "in" is not on the curve, then the - * output will be incorrect. If the scalar is even, - * this condition will be detected by returning 0, - * unless the output is the identity point (0; TODO). - * If the scalar is odd, the value returned will be - * set (to -1; TODO). - * - * The input and output points are always even. - * Therefore on a cofactor-4 curve like Goldilocks, - * it is sufficient for security to make the scalar - * even. (TODO: detect when i/o has cofactor?) - * - * This function takes constant time, depending on - * nbits but not on in or scalar. - */ -mask_t -p448_montgomery_ladder( - struct p448_t *out, - const struct p448_t *in, - const uint64_t *scalar, - int nbits, - int n_extra_doubles -); - -void -edwards_scalar_multiply( - struct tw_extensible_t *working, - const uint64_t scalar[7] - /* TODO? int nbits */ -); - -void -edwards_scalar_multiply_vlook( - struct tw_extensible_t *working, - const uint64_t scalar[7] - /* TODO? int nbits */ -); - -mask_t -precompute_for_combs( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, - int n, - int t, - int s -); - -void -edwards_comb( - struct tw_extensible_t *working, - const word_t scalar[7], - const struct tw_niels_t *table, - int n, - int t, - int s -); - -void -edwards_scalar_multiply_vt( - struct tw_extensible_t *working, - const uint64_t scalar[7] -); - -void -edwards_scalar_multiply_vt_pre( - struct tw_extensible_t *working, - const uint64_t scalar[7], - const struct tw_niels_t *precmp, - int table_bits -); - -mask_t -precompute_for_wnaf( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, - int tbits -); /* TODO: attr don't ignore... */ - -void -edwards_combo_var_fixed_vt( - struct tw_extensible_t *working, - const uint64_t scalar_var[7], - const uint64_t scalar_pre[7], - const struct tw_niels_t *precmp, - int table_bits_pre -); - -#ifdef __cplusplus -}; -#endif - -#endif /* __P448_ALGO_H__ */ diff --git a/sha512.c b/sha512.c deleted file mode 100644 index 311a65b..0000000 --- a/sha512.c +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#include "sha512.h" - -#include -#include - -static inline uint64_t -rotate_r ( - uint64_t x, - int d -) { - return (x >> d) | (x << (64-d)); -} - -/* TODO: get from headers */ -static inline uint64_t -htobe64 (uint64_t x) { - __asm__ ("bswapq %0" : "+r"(x)); - return x; -} - -static const uint64_t -sha512_init_state[8] = { - 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, - 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 -}; - -static const uint64_t -sha512_k[80] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 -}; - -static inline uint64_t S0 (uint64_t h1) { - return rotate_r(h1, 28) ^ rotate_r(h1, 34) ^ rotate_r(h1, 39); -} - -static inline uint64_t S1 (uint64_t h4) { - return rotate_r(h4,14) ^ rotate_r(h4,18) ^ rotate_r(h4,41); -} - -static inline uint64_t s0 (uint64_t a) { - return rotate_r(a,1) ^ rotate_r(a,8) ^ a>>7; -} - -static inline uint64_t s1 (uint64_t b) { - return rotate_r(b,19) ^ rotate_r(b,61) ^ b>>6; -} - -static inline uint64_t ch (uint64_t h4, uint64_t h5, uint64_t h6) { - return h6^(h4 & (h6^h5)); -} - -static inline uint64_t maj(uint64_t h1, uint64_t h2, uint64_t h3) { - return (h1&h2) ^ (h3&(h1^h2)); -} - -static void -sha512_process_block ( - struct sha512_ctx_t *ctx -) { - uint64_t i, tmp, a, b, - *w = (uint64_t *) ctx->block, - *state = ctx->chain, - h0 = state[0], h1 = state[1], h2 = state[2], h3 = state[3], - h4 = state[4], h5 = state[5], h6 = state[6], h7 = state[7]; - - /* Clang doesn't unswitch this automatically */ - for (i=0; i<16; i++) { - /* load up the input word for this round */ - tmp = w[i] = htobe64(w[i]); - - tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; - - /* shift register */ - h7 = h6; h6 = h5; h5 = h4; - h4 = h3 + tmp; - h3 = h2; h2 = h1; h1 = h0; - h0 = tmp + maj(h1,h2,h3) + S0(h1); - } - - for (; i<80; i++) { - /* load up the input word for this round */ - a = w[(i+1 ) & 15]; - b = w[(i+14) & 15]; - tmp = w[i&15] = s0(a) + s1(b) + w[i&15] + w[(i+9) & 15]; - tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; - - /* shift register */ - h7 = h6; h6 = h5; h5 = h4; - h4 = h3 + tmp; - h3 = h2; h2 = h1; h1 = h0; - h0 = tmp + maj(h1,h2,h3) + S0(h1); - } - - state[0] += h0; - state[1] += h1; - state[2] += h2; - state[3] += h3; - state[4] += h4; - state[5] += h5; - state[6] += h6; - state[7] += h7; -} - -void -sha512_init ( - struct sha512_ctx_t *ctx -) { - ctx->nbytes = 0; - memcpy(ctx->chain, sha512_init_state, sizeof(sha512_init_state)); - memset(ctx->block, 0, sizeof(ctx->block)); -} - -void -sha512_update ( - struct sha512_ctx_t *ctx, - const unsigned char *data, - uint64_t bytes -) { - assert(ctx->nbytes < 1ull<<56); - assert(bytes < 1ull<<56); - - while (bytes) { - uint64_t fill = ctx->nbytes % 128, accept = 128 - fill; - if (accept > bytes) accept = bytes; - ctx->nbytes += accept; - memcpy(ctx->block + fill, data, accept); - - if (fill+accept == 128) - sha512_process_block(ctx); - - bytes -= accept; - data += accept; - } - - assert(ctx->nbytes < 1ull<<56); -} - -void -sha512_final ( - struct sha512_ctx_t *ctx, - uint8_t result[64] -) { - uint64_t fill = ctx->nbytes % 128, i; - ctx->block[fill++] = 0x80; - if (fill > 112) { - memset(ctx->block + fill, 0, 128-fill); - sha512_process_block(ctx); - fill = 0; - } - memset(ctx->block + fill, 0, 112-fill); - *((uint64_t *)&ctx->block[112]) = 0; - *((uint64_t *)&ctx->block[120]) = htobe64((ctx->nbytes * 8)); - sha512_process_block(ctx); - for (i=0; i<8; i++) { - ctx->chain[i] = htobe64(ctx->chain[i]); - } - memcpy(result, ctx->chain, sizeof(ctx->chain)); - sha512_init(ctx); -} diff --git a/sha512.h b/sha512.h deleted file mode 100644 index c6c83e5..0000000 --- a/sha512.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __GOLDI_SHA512_H__ -#define __GOLDI_SHA512_H__ 1 - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* TODO: KAT */ - -/** - * SHA512 hashing context. - * - * This structure is opaque. - */ -struct sha512_ctx_t { - /** @privatesection */ - uint64_t chain[8]; - uint8_t block[128]; - uint64_t nbytes; -}; - -void -sha512_init ( - struct sha512_ctx_t *ctx -); - -void -sha512_update ( - struct sha512_ctx_t *ctx, - const unsigned char *data, - uint64_t bytes -); - -void -sha512_final ( - struct sha512_ctx_t *ctx, - uint8_t result[64] -); - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __GOLDI_SHA512_H__ */ diff --git a/word.h b/word.h deleted file mode 100644 index d5b32b4..0000000 --- a/word.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#ifndef __WORD_H__ -#define __WORD_H__ - -#include - -typedef uint64_t word_t; -typedef __uint128_t dword_t; -typedef int64_t sword_t; -typedef __int128_t dsword_t; - -static const int WORD_BITS = sizeof(word_t) * 8; - -/* TODO: vector width for procs like ARM; gcc support */ -typedef uint64_t mask_t, vecmask_t __attribute__((ext_vector_type(4))); - -static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -1; - -/* FIXME this only works on clang */ -typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); -typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); -typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); -typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); -typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); -typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); -typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); -typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); - -#if __AVX2__ -typedef uint32x8_t big_register_t; -typedef uint64x4_t uint64xn_t; -#elif __SSE2__ || __ARM_NEON__ -typedef uint32x4_t big_register_t; -typedef uint64x2_t uint64xn_t; -#elif _WIN64 || __amd64__ || __X86_64__ || __aarch64__ -typedef uint64_t big_register_t, uint64xn_t; -#else -typedef uint64_t uint64xn_t; -typedef uint32_t big_register_t; -#endif - - -#if __AVX2__ || __SSE2__ || __ARM_NEON__ -static __inline__ big_register_t -br_is_zero(big_register_t x) { - return (big_register_t)(x == (big_register_t)0); -} -#else -#error "TODO: constant-time equality on vectorless platforms" -#endif - -#endif /* __WORD_H__ */ diff --git a/x86-64-arith.h b/x86-64-arith.h deleted file mode 100644 index 958ba66..0000000 --- a/x86-64-arith.h +++ /dev/null @@ -1,246 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#ifndef __X86_64_ARITH_H__ -#define __X86_64_ARITH_H__ - -#include - -/* TODO: non x86-64 versions of these. - * TODO: autogenerate - */ - -static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { - #ifndef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rax;" - "mulq %[b];" - : [c]"=a"(c), [d]"=d"(d) - : [b]"m"(*b), [a]"m"(*a) - : "cc"); - return (((__uint128_t)(d))<<64) | c; - #else - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx;" - "mulx %[b], %[c], %[d];" - : [c]"=r"(c), [d]"=r"(d) - : [b]"m"(*b), [a]"m"(*a) - : "rdx"); - return (((__uint128_t)(d))<<64) | c; - #endif -} - -static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { - #ifndef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rax;" - "mulq %[b];" - : [c]"=a"(c), [d]"=d"(d) - : [b]"m"(*b), [a]"r"(a) - : "cc"); - return (((__uint128_t)(d))<<64) | c; - #else - uint64_t c,d; - __asm__ volatile - ("mulx %[b], %[c], %[d];" - : [c]"=r"(c), [d]"=r"(d) - : [b]"m"(*b), [a]"d"(a)); - return (((__uint128_t)(d))<<64) | c; - #endif -} - -static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { - #ifndef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rax; " - "addq %%rax, %%rax; " - "mulq %[b];" - : [c]"=a"(c), [d]"=d"(d) - : [b]"m"(*b), [a]"m"(*a) - : "cc"); - return (((__uint128_t)(d))<<64) | c; - #else - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx;" - "leaq (,%%rdx,2), %%rdx;" - "mulx %[b], %[c], %[d];" - : [c]"=r"(c), [d]"=r"(d) - : [b]"m"(*b), [a]"m"(*a) - : "rdx"); - return (((__uint128_t)(d))<<64) | c; - #endif -} - -static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "mulx %[b], %[c], %[d]; " - "addq %[c], %[lo]; " - "adcq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "mulq %[b]; " - "addq %%rax, %[lo]; " - "adcq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("mulx %[b], %[c], %[d]; " - "addq %[c], %[lo]; " - "adcq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"d"(a) - : "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "mulq %[b]; " - "addq %%rax, %[lo]; " - "adcq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"r"(a) - : "rax", "rdx", "cc"); - #endif - - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "addq %%rdx, %%rdx; " - "mulx %[b], %[c], %[d]; " - "addq %[c], %[lo]; " - "adcq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "addq %%rax, %%rax; " - "mulq %[b]; " - "addq %%rax, %[lo]; " - "adcq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "mulx %[b], %[c], %[d]; " - "subq %[c], %[lo]; " - "sbbq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "mulq %[b]; " - "subq %%rax, %[lo]; " - "sbbq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - *acc = (((__uint128_t)(hi))<<64) | lo; -} - -static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t lo = *acc, hi = *acc>>64; - #ifdef __BMI2__ - uint64_t c,d; - __asm__ volatile - ("movq %[a], %%rdx; " - "addq %%rdx, %%rdx; " - "mulx %[b], %[c], %[d]; " - "subq %[c], %[lo]; " - "sbbq %[d], %[hi]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - #else - __asm__ volatile - ("movq %[a], %%rax; " - "addq %%rax, %%rax; " - "mulq %[b]; " - "subq %%rax, %[lo]; " - "sbbq %%rdx, %[hi]; " - : [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rax", "rdx", "cc"); - #endif - *acc = (((__uint128_t)(hi))<<64) | lo; - -} - -static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { - uint64_t c,d, lo = *acc, hi = *acc>>64; - __asm__ volatile - ("movq %[a], %%rdx; " - "mulx %[b], %[c], %[d]; " - "subq %[lo], %[c]; " - "sbbq %[hi], %[d]; " - : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) - : [b]"m"(*b), [a]"m"(*a) - : "rdx", "cc"); - *acc = (((__uint128_t)(d))<<64) | c; -} - -static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) { - return ((__uint128_t)(a)) * b; -} - -static __inline__ __int128_t widemuls(int64_t a, int64_t b) { - return ((__int128_t)(a)) * b; -} - -static __inline__ uint64_t opacify(uint64_t x) { - __asm__ volatile("" : "+r"(x)); - return x; -} - -static __inline__ mask_t is_zero(uint64_t x) { - __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x)); - return ~x; -} - -#endif /* __X86_64_ARITH_H__ */