From 1f480b0f95fb66adf8a01e3d32cf9621629821de Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Thu, 23 Oct 2014 17:41:51 -0700 Subject: [PATCH] Big changes for curve flexibility. For details see HISTORY.txt. Very experimental Ed480-Ridinghood support is now in. It's not fully optimized, but in general the current build is 8-15% slower than Goldilocks. It only works on arch_x86_64, though arch_ref64 support ought to be easy. Support on other arches will be trickier, which is of course why I chose Goldilocks over Ridinghood in the first place. Next up, E-521. Hopefully. The code is starting to get spread out over a lot of files. Some are per field*arch, some per field, some per curve, some global. It's hard to do much about this, though, with a rather ugly .c.inc system. There's currently no way to make a Ridinghood eBAT. In fact, I haven't tested eBAT support in this commit. I also haven't tested NEON, but at least ARCH_32 works on Intel. --- HISTORY.txt | 41 ++ Makefile | 13 +- include/goldilocks.h | 10 +- include/ridinghood.h | 376 +++++++++++++++ src/ec_point.c | 87 +--- src/include/ec_point.h | 2 - src/include/field.h | 74 +-- src/include/magic.h | 47 +- src/include/word.h | 11 +- src/{ => p448}/arch_32/arch_config.h | 0 src/{ => p448}/arch_32/p448.c | 0 src/{ => p448}/arch_32/p448.h | 0 src/{ => p448}/arch_arm_32/arch_config.h | 0 src/{ => p448}/arch_arm_32/p448.c | 0 src/{ => p448}/arch_arm_32/p448.h | 0 src/{ => p448}/arch_neon/arch_config.h | 0 src/{ => p448}/arch_neon/neon_emulation.h | 0 src/{ => p448}/arch_neon/p448.c | 0 src/{ => p448}/arch_neon/p448.h | 0 .../arch_neon_experimental/arch_config.h | 0 src/{ => p448}/arch_neon_experimental/p448.c | 0 src/{ => p448}/arch_neon_experimental/p448.h | 0 src/{ => p448}/arch_ref64/arch_config.h | 0 src/{ => p448}/arch_ref64/p448.c | 0 src/{ => p448}/arch_ref64/p448.h | 0 src/{ => p448}/arch_x86_64/arch_config.h | 0 src/{ => p448}/arch_x86_64/p448.c | 0 src/{ => p448}/arch_x86_64/p448.h | 0 src/{ => p448}/arch_x86_64/x86-64-arith.h | 0 src/p448/f_arithmetic.c | 43 ++ src/p448/f_field.h | 39 ++ src/p448/f_magic.h | 35 ++ src/p448/field.h | 123 +++++ src/{ => p448}/magic.c | 24 +- src/p480/arch_x86_64/arch_config.h | 1 + src/p480/arch_x86_64/p480.c | 435 ++++++++++++++++++ src/p480/arch_x86_64/p480.h | 257 +++++++++++ src/p480/arch_x86_64/x86-64-arith.h | 279 +++++++++++ src/p480/f_arithmetic.c | 43 ++ src/p480/f_field.h | 39 ++ src/p480/f_magic.h | 35 ++ src/p480/magic.c | 68 +++ src/p521/f_arithmetic.c | 43 ++ src/p521/f_field.h | 39 ++ test/bench.c | 11 +- test/test.c | 9 +- test/test_arithmetic.c | 63 ++- test/test_pointops.c | 10 + test/test_scalarmul.c | 10 +- 49 files changed, 2082 insertions(+), 185 deletions(-) create mode 100644 include/ridinghood.h rename src/{ => p448}/arch_32/arch_config.h (100%) rename src/{ => p448}/arch_32/p448.c (100%) rename src/{ => p448}/arch_32/p448.h (100%) rename src/{ => p448}/arch_arm_32/arch_config.h (100%) rename src/{ => p448}/arch_arm_32/p448.c (100%) rename src/{ => p448}/arch_arm_32/p448.h (100%) rename src/{ => p448}/arch_neon/arch_config.h (100%) rename src/{ => p448}/arch_neon/neon_emulation.h (100%) rename src/{ => p448}/arch_neon/p448.c (100%) rename src/{ => p448}/arch_neon/p448.h (100%) rename src/{ => p448}/arch_neon_experimental/arch_config.h (100%) rename src/{ => p448}/arch_neon_experimental/p448.c (100%) rename src/{ => p448}/arch_neon_experimental/p448.h (100%) rename src/{ => p448}/arch_ref64/arch_config.h (100%) rename src/{ => p448}/arch_ref64/p448.c (100%) rename src/{ => p448}/arch_ref64/p448.h (100%) rename src/{ => p448}/arch_x86_64/arch_config.h (100%) rename src/{ => p448}/arch_x86_64/p448.c (100%) rename src/{ => p448}/arch_x86_64/p448.h (100%) rename src/{ => p448}/arch_x86_64/x86-64-arith.h (100%) create mode 100644 src/p448/f_arithmetic.c create mode 100644 src/p448/f_field.h create mode 100644 src/p448/f_magic.h create mode 100644 src/p448/field.h rename src/{ => p448}/magic.c (82%) create mode 100644 src/p480/arch_x86_64/arch_config.h create mode 100644 src/p480/arch_x86_64/p480.c create mode 100644 src/p480/arch_x86_64/p480.h create mode 100644 src/p480/arch_x86_64/x86-64-arith.h create mode 100644 src/p480/f_arithmetic.c create mode 100644 src/p480/f_field.h create mode 100644 src/p480/f_magic.h create mode 100644 src/p480/magic.c create mode 100644 src/p521/f_arithmetic.c create mode 100644 src/p521/f_field.h diff --git a/HISTORY.txt b/HISTORY.txt index 9a4a24e..c6eba60 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -1,3 +1,44 @@ +October 23, 2014: + Pushing through changes for curve flexibility. First up is + Ed480-Ridinghood, because it has the same number of words. Next + is E-521. + + Experimental support for Ed480-Ridinghood. To use, compile with + make ... FIELD=p480 -XCFLAGS=-DGOLDI_FIELD_BITS=480 + + I still need to figure out what to do about the fact that the library + is called "goldilocks", but in will soon support curves that are not + ed448-goldilocks, at least experimentally. + + Currently the whole system's header "goldilocks.h" doesn't have + a simpler way to override field size, but it does work (as a hack) + with -DGOLDI_FIELD_BITS=... + + There is no support yet for coexistence of multiple fields in one + library. The field routines will have unique names, but scalarmul* + won't, and the top-level goldilocks routines have fixed names. + + Current timings on Haswell: + Goldilocks: 178kcy keygen, 536kcy ecdh + Ridinghood: 193kcy keygen, 617kcy ecdh + + Note that Ridinghood ECDH does worse than 480/448. This is at least + in part because I haven't calculated the overflow handling limits yet + in ec_point.h (this is a disadvantage of dropping the automated + tool for generating that file). So I'm reducing much more often + than I need to. (There's a really loud TODO in ec_point.h for that.) + + Also, I haven't tested the limits on these reductions in a while, so + it could be that there are actual (security-critical) bugs in this + area, at least for p448. Now that there's field flexibility, it's + probably a good idea to make a field impl with extra words to check + this. + + Furthermore, field_mulw_scc will perform differently on these two + curves based on whether the curve constant is positive or negative. + I should probably go optimize the "hot" routines like montgomery_step + to have separate cases for positive and negative. + September 29, 2014: Yesterday I put in some more architecture detection, but it should really be based on the arch directory, because what's in there really diff --git a/Makefile b/Makefile index b9d53a9..83a8066 100644 --- a/Makefile +++ b/Makefile @@ -20,12 +20,13 @@ else ARCH ?= arch_arm_32 endif +FIELD ?= p448 WARNFLAGS = -pedantic -Wall -Wextra -Werror -Wunreachable-code \ -Wmissing-declarations -Wunused-function -Wno-overlength-strings $(EXWARN) -INCFLAGS = -Isrc/include -Iinclude -Isrc/$(ARCH) +INCFLAGS = -Isrc/include -Iinclude -Isrc/$(FIELD) -Isrc/$(FIELD)/$(ARCH) LANGFLAGS = -std=c99 -fno-strict-aliasing GENFLAGS = -ffunction-sections -fdata-sections -fvisibility=hidden -fomit-frame-pointer -fPIC OFLAGS = -O3 @@ -63,7 +64,8 @@ ASFLAGS = $(ARCHFLAGS) HEADERS= Makefile $(shell find . -name "*.h") build/timestamp LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \ - build/p448.o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o build/arithmetic.o + build/$(FIELD).o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o \ + build/f_arithmetic.o build/arithmetic.o TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \ build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o build/magic.o @@ -113,7 +115,10 @@ build/%.s: src/%.c $(HEADERS) build/%.s: test/%.c $(HEADERS) $(CC) $(CFLAGS) -S -c -o $@ $< -build/%.s: src/$(ARCH)/%.c $(HEADERS) +build/%.s: src/$(FIELD)/$(ARCH)/%.c $(HEADERS) + $(CC) $(CFLAGS) -S -c -o $@ $< + +build/%.s: src/$(FIELD)/%.c $(HEADERS) $(CC) $(CFLAGS) -S -c -o $@ $< doc/timestamp: @@ -131,7 +136,7 @@ $(BATNAME): include/* src/* src/*/* test/batarch.map targ="$@/crypto_$$prim/ed448goldilocks"; \ (while read arch where; do \ mkdir -p $$targ/`basename $$arch`; \ - cp include/*.h src/*.c src/include/*.h src/bat/$$prim.c src/$$where/*.c src/$$where/*.h $$targ/`basename $$arch`; \ + cp include/*.h src/*.c src/include/*.h src/bat/$$prim.c src/p448/$$where/*.c src/p448/$$where/*.h src/p448/*.c src/p448/*.h $$targ/`basename $$arch`; \ cp src/bat/api_$$prim.h $$targ/`basename $$arch`/api.h; \ perl -p -i -e 's/.*endif.*GOLDILOCKS_CONFIG_H/#define SUPERCOP_WONT_LET_ME_OPEN_FILES 1\n\n$$&/' $$targ/`basename $$arch`/config.h; \ perl -p -i -e 's/SYSNAME/'`basename $(BATNAME)`_`basename $$arch`'/g' $$targ/`basename $$arch`/api.h; \ diff --git a/include/goldilocks.h b/include/goldilocks.h index 2c3919c..1631c2f 100644 --- a/include/goldilocks.h +++ b/include/goldilocks.h @@ -22,14 +22,18 @@ #define GOLDI_IMPLEMENT_SIGNATURES 1 #endif -/** The size of the Goldilocks field, in bits. */ +/** The size of the Goldilocks field, in bits. + * Ifdef'd so you can override when testing experimental Ed480-Ridinghood or E-521. + */ +#ifndef GOLDI_FIELD_BITS #define GOLDI_FIELD_BITS 448 +#endif /** The size of the Goldilocks scalars, in bits. */ -#define GOLDI_SCALAR_BITS 446 +#define GOLDI_SCALAR_BITS (GOLDI_FIELD_BITS-2) /** The same size, in bytes. */ -#define GOLDI_FIELD_BYTES (GOLDI_FIELD_BITS/8) +#define GOLDI_FIELD_BYTES ((GOLDI_FIELD_BITS+7)/8) /** The size of a Goldilocks public key, in bytes. */ #define GOLDI_PUBLIC_KEY_BYTES GOLDI_FIELD_BYTES diff --git a/include/ridinghood.h b/include/ridinghood.h new file mode 100644 index 0000000..2c3919c --- /dev/null +++ b/include/ridinghood.h @@ -0,0 +1,376 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ + +/** + * @file goldilocks.h + * @author Mike Hamburg + * @brief Goldilocks high-level functions. + */ +#ifndef __GOLDILOCKS_H__ +#define __GOLDILOCKS_H__ 1 + +#include + +#ifndef GOLDI_IMPLEMENT_PRECOMPUTED_KEYS +/** If nonzero, implement precomputation for verify and ECDH. */ +#define GOLDI_IMPLEMENT_PRECOMPUTED_KEYS 1 +#endif + +#ifndef GOLDI_IMPLEMENT_SIGNATURES +/** If nonzero, implement signatures. */ +#define GOLDI_IMPLEMENT_SIGNATURES 1 +#endif + +/** The size of the Goldilocks field, in bits. */ +#define GOLDI_FIELD_BITS 448 + +/** The size of the Goldilocks scalars, in bits. */ +#define GOLDI_SCALAR_BITS 446 + +/** The same size, in bytes. */ +#define GOLDI_FIELD_BYTES (GOLDI_FIELD_BITS/8) + +/** The size of a Goldilocks public key, in bytes. */ +#define GOLDI_PUBLIC_KEY_BYTES GOLDI_FIELD_BYTES + +/** The extra bytes in a Goldilocks private key for the symmetric key. */ +#define GOLDI_SYMKEY_BYTES 32 + +/** The size of a shared secret. */ +#define GOLDI_SHARED_SECRET_BYTES 64 + +/** The size of a Goldilocks private key, in bytes. */ +#define GOLDI_PRIVATE_KEY_BYTES (2*GOLDI_FIELD_BYTES + GOLDI_SYMKEY_BYTES) + +/** The size of a Goldilocks signature, in bytes. */ +#define GOLDI_SIGNATURE_BYTES (2*GOLDI_FIELD_BYTES) + +/** + * @brief Serialized form of a Goldilocks public key. + * + * @warning This isn't even my final form! + */ +struct goldilocks_public_key_t { + uint8_t opaque[GOLDI_PUBLIC_KEY_BYTES]; /**< Serialized data. */ +}; + +/** + * @brief Serialized form of a Goldilocks private key. + * + * Contains 56 bytes of actual private key, 56 bytes of + * public key, and 32 bytes of symmetric key for randomization. + * + * @warning This isn't even my final form! + */ +struct goldilocks_private_key_t { + uint8_t opaque[GOLDI_PRIVATE_KEY_BYTES]; /**< Serialized data. */ +}; + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief No error. */ +static const int GOLDI_EOK = 0; + +/** @brief Error: your key or other state is corrupt. */ +static const int GOLDI_ECORRUPT = 44801; + +/** @brief Error: other party's key is corrupt. */ +static const int GOLDI_EINVAL = 44802; + +/** @brief Error: not enough entropy. */ +static const int GOLDI_ENODICE = 44804; + +/** @brief Error: you need to initialize the library first. */ +static const int GOLDI_EUNINIT = 44805; + +/** @brief Error: called init() but we are already initialized. */ +static const int GOLDI_EALREADYINIT = 44805; + +/** + * @brief Initialize Goldilocks' precomputed tables and + * random number generator. This function must be called before + * any of the other Goldilocks routines (except + * goldilocks_shared_secret in the current version) and should be + * called only once per process. + * + * There is currently no way to tear down this state. It is possible + * that a future version of this library will not require this function. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_EALREADYINIT Already initialized. + * @retval GOLDI_ECORRUPT Memory is corrupted, or another thread is already init'ing. + * @retval Nonzero An error occurred. + */ +int +goldilocks_init (void) +__attribute__((warn_unused_result,visibility ("default"))); + + +/** + * @brief Generate a new random keypair. + * @param [out] privkey The generated private key. + * @param [out] pubkey The generated public key. + * + * @warning This isn't even my final form! + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_ENODICE Insufficient entropy. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_keygen ( + struct goldilocks_private_key_t *privkey, + struct goldilocks_public_key_t *pubkey +) __attribute__((warn_unused_result,nonnull(1,2),visibility ("default"))); + +/** + * @brief Derive a key from its compressed form. + * @param [out] privkey The derived private key. + * @param [in] proto The compressed or proto-key, which must be 32 random bytes. + * + * @warning This isn't even my final form! + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_derive_private_key ( + struct goldilocks_private_key_t *privkey, + const unsigned char proto[GOLDI_SYMKEY_BYTES] +) __attribute__((nonnull(1,2),visibility ("default"))); + +/** + * @brief Compress a private key (by copying out the proto-key) + * @param [out] proto The proto-key. + * @param [in] privkey The private key. + * + * @warning This isn't even my final form! + * @todo test. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +void +goldilocks_underive_private_key ( + unsigned char proto[GOLDI_SYMKEY_BYTES], + const struct goldilocks_private_key_t *privkey +) __attribute__((nonnull(1,2),visibility ("default"))); + +/** + * @brief Extract the public key from a private key. + * + * This is essentially a memcpy from the public part of the privkey. + * + * @param [out] pubkey The extracted private key. + * @param [in] privkey The private key. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_ECORRUPT The private key is corrupt. + */ +int +goldilocks_private_to_public ( + struct goldilocks_public_key_t *pubkey, + const struct goldilocks_private_key_t *privkey +) __attribute__((nonnull(1,2),visibility ("default"))); + +/** + * @brief Generate a Diffie-Hellman shared secret in constant time. + * + * This function uses some compile-time flags whose merit remains to + * be decided. + * + * If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes + * of zeros to the secret before hashing. In the case that the other + * party's key is detectably corrupt, instead the symmetric part + * of the secret key is used to produce a pseudorandom value. + * + * If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of + * the two parties' public keys is prepended to the hash. + * + * In the current version, this function can safely be run even without + * goldilocks_init(). But this property is not guaranteed for future + * versions, so call it anyway. + * + * @warning This isn't even my final form! + * + * @param [out] shared The shared secret established with the other party. + * @param [in] my_privkey My private key. + * @param [in] your_pubkey The other party's public key. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_ECORRUPT My key is corrupt. + * @retval GOLDI_EINVAL The other party's key is corrupt. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_shared_secret ( + uint8_t shared[GOLDI_SHARED_SECRET_BYTES], + const struct goldilocks_private_key_t *my_privkey, + const struct goldilocks_public_key_t *your_pubkey +) __attribute__((warn_unused_result,nonnull(1,2,3),visibility ("default"))); + +#if GOLDI_IMPLEMENT_SIGNATURES +/** + * @brief Sign a message. + * + * The signature is deterministic, using the symmetric secret found in the + * secret key to form a nonce. + * + * The technique used in signing is a modified Schnorr system, like EdDSA. + * + * @warning This isn't even my final form! + * + * @param [out] signature_out Space for the output signature. + * @param [in] message The message to be signed. + * @param [in] message_len The length of the message to be signed. + * @param [in] privkey My private key. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_ECORRUPT My key is corrupt. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_sign ( + uint8_t signature_out[GOLDI_SIGNATURE_BYTES], + const uint8_t *message, + uint64_t message_len, + const struct goldilocks_private_key_t *privkey +) __attribute__((nonnull(1,2,4),visibility ("default"))); + +/** + * @brief Verify a signature. + * + * This function is fairly strict. It will correctly detect when + * the signature has the wrong cofactor component, or when the sig + * values aren't less than p or q. + * + * Currently this function does not detect when the public key is weird, + * eg 0, has cofactor, etc. As a result, a party with a bogus public + * key could create signatures that succeed on some systems and fail on + * others. + * + * @warning This isn't even my final form! + * + * @param [in] signature The signature. + * @param [in] message The message to be verified. + * @param [in] message_len The length of the message to be verified. + * @param [in] pubkey The signer's public key. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_EINVAL The public key or signature is corrupt. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_verify ( + const uint8_t signature[GOLDI_SIGNATURE_BYTES], + const uint8_t *message, + uint64_t message_len, + const struct goldilocks_public_key_t *pubkey +) __attribute__((warn_unused_result,nonnull(1,2,4),visibility ("default"))); +#endif + +#if GOLDI_IMPLEMENT_PRECOMPUTED_KEYS + +/** A public key which has been expanded by precomputation for higher speed. */ +struct goldilocks_precomputed_public_key_t; + +/** + * @brief Expand a public key by precomputation. + * + * @todo Give actual error returns, instead of ambiguous NULL. + * + * @warning This isn't even my final form! + * + * @param [in] pub The public key. + * @retval NULL We ran out of memory, or the + */ +struct goldilocks_precomputed_public_key_t * +goldilocks_precompute_public_key ( + const struct goldilocks_public_key_t *pub +) __attribute__((warn_unused_result,nonnull(1),visibility ("default"))); + +/** + * @brief Overwrite an expanded public key with zeros, then destroy it. + * + * If the input is NULL, this function does nothing. + * + * @param [in] precom The public key. + */ +void +goldilocks_destroy_precomputed_public_key ( + struct goldilocks_precomputed_public_key_t *precom +) __attribute__((visibility ("default"))); + +/** + * @brief Verify a signature. + * + * This function is fairly strict. It will correctly detect when + * the signature has the wrong cofactor component, or when the sig + * values aren't less than p or q. + * + * @warning This isn't even my final form! + * + * @param [in] signature The signature. + * @param [in] message The message to be verified. + * @param [in] message_len The length of the message to be verified. + * @param [in] pubkey The signer's public key, expanded by precomputation. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_EINVAL The public key or signature is corrupt. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_verify_precomputed ( + const uint8_t signature[GOLDI_SIGNATURE_BYTES], + const uint8_t *message, + uint64_t message_len, + const struct goldilocks_precomputed_public_key_t *pubkey +) __attribute__((warn_unused_result,nonnull(1,2,4),visibility ("default"))); + +/** + * @brief Generate a Diffie-Hellman shared secret in constant time. + * Uses a precomputation on the other party's public key for efficiency. + * + * This function uses some compile-time flags whose merit remains to + * be decided. + * + * If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes + * of zeros to the secret before hashing. In the case that the other + * party's key is detectably corrupt, instead the symmetric part + * of the secret key is used to produce a pseudorandom value. + * + * If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of + * the two parties' public keys is prepended to the hash. + * + * In the current version, this function can safely be run even without + * goldilocks_init(). But this property is not guaranteed for future + * versions, so call it anyway. + * + * @warning This isn't even my final form! + * + * @param [out] shared The shared secret established with the other party. + * @param [in] my_privkey My private key. + * @param [in] your_pubkey The other party's precomputed public key. + * + * @retval GOLDI_EOK Success. + * @retval GOLDI_ECORRUPT My key is corrupt. + * @retval GOLDI_EINVAL The other party's key is corrupt. + * @retval GOLDI_EUNINIT You must call goldilocks_init() first. + */ +int +goldilocks_shared_secret_precomputed ( + uint8_t shared[GOLDI_SHARED_SECRET_BYTES], + const struct goldilocks_private_key_t *my_privkey, + const struct goldilocks_precomputed_public_key_t *your_pubkey +) __attribute__((warn_unused_result,nonnull(1,2,3),visibility ("default"))); + +#endif /* GOLDI_IMPLEMENT_PRECOMPUTED_KEYS */ + +#ifdef __cplusplus +}; /* extern "C" */ +#endif + +#endif /* __GOLDILOCKS_H__ */ diff --git a/src/ec_point.c b/src/ec_point.c index eabc3a3..0b9c8bf 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -12,7 +12,8 @@ #include "ec_point.h" #include "magic.h" -#define is32 (GOLDI_BITS == 32) +#define is32 (GOLDI_BITS == 32 || FIELD_BITS == 480) +/* TODO XXX PERF FIXME: better detection of overflow conditions */ /* I wanted to just use if (is32) * But clang's -Wunreachable-code flags it. @@ -52,60 +53,6 @@ field_mulw_scc_wr ( field_weak_reduce(out); } -static __inline__ void -field_sqrn ( - field_t *__restrict__ y, - const field_t *x, - int n -) { - field_t tmp; - assert(n>0); - if (n&1) { - field_sqr(y,x); - n--; - } else { - field_sqr(&tmp,x); - field_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - field_sqr(&tmp,y); - field_sqr(y,&tmp); - } -} - -void -field_isr ( /* TODO: MAGIC */ - struct field_t* a, - const struct field_t* x -) { - struct field_t L0, L1, L2; - field_sqr ( &L1, x ); - field_mul ( &L2, x, &L1 ); - field_sqr ( &L1, &L2 ); - field_mul ( &L2, x, &L1 ); - field_sqrn ( &L1, &L2, 3 ); - field_mul ( &L0, &L2, &L1 ); - field_sqrn ( &L1, &L0, 3 ); - field_mul ( &L0, &L2, &L1 ); - field_sqrn ( &L2, &L0, 9 ); - field_mul ( &L1, &L0, &L2 ); - field_sqr ( &L0, &L1 ); - field_mul ( &L2, x, &L0 ); - field_sqrn ( &L0, &L2, 18 ); - field_mul ( &L2, &L1, &L0 ); - field_sqrn ( &L0, &L2, 37 ); - field_mul ( &L1, &L2, &L0 ); - field_sqrn ( &L0, &L1, 37 ); - field_mul ( &L1, &L2, &L0 ); - field_sqrn ( &L0, &L1, 111 ); - field_mul ( &L2, &L1, &L0 ); - field_sqr ( &L0, &L2 ); - field_mul ( &L1, x, &L0 ); - field_sqrn ( &L0, &L1, 223 ); - field_mul ( a, &L2, &L0 ); -} - void add_tw_niels_to_tw_extensible ( struct tw_extensible_t* d, @@ -396,7 +343,7 @@ montgomery_step ( field_sqr ( &a->za, &a->zd ); field_sqr ( &a->xd, &L0 ); field_sqr ( &L0, &L1 ); - field_mulw ( &a->zd, &a->xd, 1-EDWARDS_D ); + field_mulw_scc ( &a->zd, &a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ field_sub ( &L1, &a->xd, &L0 ); field_bias ( &L1, 2 ); IF32( field_weak_reduce( &L1 ) ); @@ -444,11 +391,9 @@ serialize_montgomery ( field_mul ( &L3, &L1, &L2 ); field_copy ( &L2, &a->z0 ); field_addw ( &L2, 1 ); - field_sqr ( &L1, &L2 ); - field_mulw ( &L2, &L1, 1-EDWARDS_D ); - field_neg ( &L1, &L2 ); + field_sqr ( &L0, &L2 ); + field_mulw_scc_wr ( &L1, &L0, EDWARDS_D-1 ); field_add ( &L2, &a->z0, &a->z0 ); - field_bias ( &L2, 1 ); field_add ( &L0, &L2, &L2 ); field_add ( &L2, &L0, &L1 ); IF32( field_weak_reduce( &L2 ) ); @@ -512,13 +457,9 @@ untwist_and_double_and_serialize ( IF32( field_weak_reduce( b ) ); field_sqr ( &L2, &a->z ); field_sqr ( &L1, &L2 ); - field_add ( &L2, b, b ); - field_mulw ( b, &L2, 1-EDWARDS_D ); - field_neg ( &L2, b ); - field_bias ( &L2, 2 ); - field_mulw ( &L0, &L2, 1-EDWARDS_D ); - field_neg ( b, &L0 ); - field_bias ( b, 2 ); + field_add ( b, b, b ); + field_mulw_scc ( &L2, b, EDWARDS_D-1 ); + field_mulw_scc ( b, &L2, EDWARDS_D-1 ); field_mul ( &L0, &L2, &L1 ); field_mul ( &L2, b, &L0 ); field_isr ( &L0, &L2 ); @@ -654,10 +595,8 @@ deserialize_affine ( field_copy ( &L3, &L1 ); field_addw ( &L3, 1 ); field_sqr ( &L2, &L3 ); - field_mulw ( &L3, &L2, 1-EDWARDS_D ); - field_neg ( &a->x, &L3 ); - field_add ( &L3, &L1, &L1 ); - field_bias ( &L3, 1 ); + field_mulw_scc ( &a->x, &L2, EDWARDS_D-1 ); /* PERF MULW */ + field_add ( &L3, &L1, &L1 ); /* FIXME: i adjusted the bias here, was it right? */ field_add ( &a->y, &L3, &L3 ); field_add ( &L3, &a->y, &a->x ); IF32( field_weak_reduce( &L3 ) ); @@ -694,11 +633,9 @@ deserialize_and_twist_approx ( field_sqr ( &a->z, sz ); field_copy ( &a->y, &a->z ); field_addw ( &a->y, 1 ); - field_sqr ( &a->x, &a->y ); - field_mulw ( &a->y, &a->x, 1-EDWARDS_D ); - field_neg ( &a->x, &a->y ); + field_sqr ( &L0, &a->y ); + field_mulw_scc ( &a->x, &L0, EDWARDS_D-1 ); field_add ( &a->y, &a->z, &a->z ); - field_bias ( &a->y, 1 ); field_add ( &a->u, &a->y, &a->y ); field_add ( &a->y, &a->u, &a->x ); IF32( field_weak_reduce( &a->y ) ); diff --git a/src/include/ec_point.h b/src/include/ec_point.h index 657ee88..74bbe91 100644 --- a/src/include/ec_point.h +++ b/src/include/ec_point.h @@ -543,8 +543,6 @@ copy_tw_pniels ( field_copy ( &a->z, &ds->z ); } - - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/include/field.h b/src/include/field.h index bf36e95..b3160a7 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -1,40 +1,16 @@ /** * @file field.h - * @brief Field switch code. + * @brief Generic field header. * @copyright * Copyright (c) 2014 Cryptography Research, Inc. \n * Released under the MIT License. See LICENSE.txt for license information. * @author Mike Hamburg */ + #ifndef __FIELD_H__ #define __FIELD_H__ -#include -#include "constant_time.h" - -#include "p448.h" -#define FIELD_BITS 448 -#define field_t p448_t -#define field_mul p448_mul -#define field_sqr p448_sqr -#define field_add p448_add -#define field_sub p448_sub -#define field_mulw p448_mulw -#define field_addw p448_addw -#define field_subw p448_subw -#define field_neg p448_neg -#define field_set_ui p448_set_ui -#define field_bias p448_bias -#define field_cond_neg p448_cond_neg -#define field_inverse p448_inverse -#define field_eq p448_eq -#define field_isr p448_isr -#define field_simultaneous_invert p448_simultaneous_invert -#define field_weak_reduce p448_weak_reduce -#define field_strong_reduce p448_strong_reduce -#define field_serialize p448_serialize -#define field_deserialize p448_deserialize -#define field_is_zero p448_is_zero +#include "f_field.h" /** @brief Bytes in a field element */ #define FIELD_BYTES (1+(FIELD_BITS-1)/8) @@ -42,6 +18,22 @@ /** @brief Words in a field element */ #define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) +/* TODO: standardize notation */ +/** @brief The number of words in the Goldilocks field. */ +#define GOLDI_FIELD_WORDS DIV_CEIL(FIELD_BITS,WORD_BITS) + +/** @brief The number of bits in the Goldilocks curve's cofactor (cofactor=4). */ +#define COFACTOR_BITS 2 + +/** @brief The number of bits in a Goldilocks scalar. */ +#define SCALAR_BITS (FIELD_BITS - COFACTOR_BITS) + +/** @brief The number of bytes in a Goldilocks scalar. */ +#define SCALAR_BYTES (1+(SCALAR_BITS)/8) + +/** @brief The number of words in the Goldilocks field. */ +#define SCALAR_WORDS WORDS_FOR_BITS(SCALAR_BITS) + /** * @brief For GMP tests: little-endian representation of the field modulus. */ @@ -119,5 +111,31 @@ field_eq ( const struct field_t *a, const struct field_t *b ); + +/** + * Square x, n times. + */ +static __inline__ void +__attribute__((unused,always_inline)) +field_sqrn ( + field_t *__restrict__ y, + const field_t *x, + int n +) { + field_t tmp; + assert(n>0); + if (n&1) { + field_sqr(y,x); + n--; + } else { + field_sqr(&tmp,x); + field_sqr(y,&tmp); + n-=2; + } + for (; n; n-=2) { + field_sqr(&tmp,y); + field_sqr(y,&tmp); + } +} -#endif /* __FIELD_H__ */ +#endif // __FIELD_H__ diff --git a/src/include/magic.h b/src/include/magic.h index 70be081..c7e296d 100644 --- a/src/include/magic.h +++ b/src/include/magic.h @@ -4,16 +4,24 @@ * Copyright (c) 2014 Cryptography Research, Inc. \n * Released under the MIT License. See LICENSE.txt for license information. * @author Mike Hamburg - * @brief Goldilocks magic numbers (group orders, coefficients, algo params etc). + * @brief Curve-independent declarations of magic numbers. */ - #ifndef __GOLDI_MAGIC_H__ #define __GOLDI_MAGIC_H__ 1 #include "word.h" -#include "p448.h" -#include "ec_point.h" + +/** + * @brief If true, use wider tables for the precomputed combs. + */ +#ifndef USE_BIG_COMBS +#if defined(__ARM_NEON__) +#define USE_BIG_COMBS 1 +#else +#define USE_BIG_COMBS (WORD_BITS==64) +#endif +#endif /* TODO: standardize notation */ @@ -32,16 +40,13 @@ /** @brief The number of words in the Goldilocks field. */ #define SCALAR_WORDS WORDS_FOR_BITS(SCALAR_BITS) +#include "f_magic.h" + /** * @brief sqrt(d-1), used for point formats and twisting. */ extern const struct field_t sqrt_d_minus_1; -/** - * @brief The Edwards "d" term for this curve. - */ -static const int64_t EDWARDS_D = -39081; - /** * @brief The base point for Goldilocks. */ @@ -76,34 +81,10 @@ extern const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS]; */ #define SCALARMUL_WNAF_COMBO_TABLE_BITS 4 -/** - * @brief If true, use wider tables for the precomputed combs. - */ -#ifndef USE_BIG_COMBS -#if defined(__ARM_NEON__) -#define USE_BIG_COMBS 1 -#else -#define USE_BIG_COMBS (WORD_BITS==64) -#endif -#endif - -/** @brief The number of combs to use for signed comb algo */ -#define COMB_N (USE_BIG_COMBS ? 5 : 8) - -/** @brief The number of teeth of the combs for signed comb algo */ -#define COMB_T (USE_BIG_COMBS ? 5 : 4) - -/** @brief The spacing the of combs for signed comb algo */ -#define COMB_S (USE_BIG_COMBS ? 18 : 14) - /** * @brief The bit width of the precomputed WNAF tables. Size is 2^this elements. */ #define WNAF_PRECMP_BITS 5 -/** - * @brief crandom magic structure guard constant = "return 4", cf xkcd #221 - */ -#define CRANDOM_MAGIC 0x72657475726e2034ull #endif /* __GOLDI_MAGIC_H__ */ diff --git a/src/include/word.h b/src/include/word.h index ddc8d36..297bb96 100644 --- a/src/include/word.h +++ b/src/include/word.h @@ -37,9 +37,12 @@ typedef int64_t sword_t; typedef __int128_t dsword_t; #define PRIxWORD PRIx64 #define PRIxWORDfull "%016" PRIx64 -#define PRIxWORD58 "%014" PRIx64 +#define PRIxWORD56 "%014" PRIx64 +#define PRIxWORD60 "%015" PRIx60 #define U64LE(x) x##ull #define U58LE(x) x##ull +#define U56LE(x) x##ull +#define U60LE(x) x##ull #define letohWORD letoh64 #define GOLDI_BITS 64 #else @@ -51,9 +54,11 @@ typedef int32_t sword_t; typedef int64_t dsword_t; #define PRIxWORD PRIx32 #define PRIxWORDfull "%08" PRIx32 -#define PRIxWORD58 "%07" PRIx32 +#define PRIxWORD56 "%07" PRIx32 #define U64LE(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 -#define U58LE(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 +#define U58LE(x) (x##ull)&((1ull<<29)-1), (x##ull)>>29 +#define U56LE(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 +#define U60LE(x) (x##ull)&((1ull<<30)-1), (x##ull)>>30 #define letohWORD letoh32 #define GOLDI_BITS 32 #endif diff --git a/src/arch_32/arch_config.h b/src/p448/arch_32/arch_config.h similarity index 100% rename from src/arch_32/arch_config.h rename to src/p448/arch_32/arch_config.h diff --git a/src/arch_32/p448.c b/src/p448/arch_32/p448.c similarity index 100% rename from src/arch_32/p448.c rename to src/p448/arch_32/p448.c diff --git a/src/arch_32/p448.h b/src/p448/arch_32/p448.h similarity index 100% rename from src/arch_32/p448.h rename to src/p448/arch_32/p448.h diff --git a/src/arch_arm_32/arch_config.h b/src/p448/arch_arm_32/arch_config.h similarity index 100% rename from src/arch_arm_32/arch_config.h rename to src/p448/arch_arm_32/arch_config.h diff --git a/src/arch_arm_32/p448.c b/src/p448/arch_arm_32/p448.c similarity index 100% rename from src/arch_arm_32/p448.c rename to src/p448/arch_arm_32/p448.c diff --git a/src/arch_arm_32/p448.h b/src/p448/arch_arm_32/p448.h similarity index 100% rename from src/arch_arm_32/p448.h rename to src/p448/arch_arm_32/p448.h diff --git a/src/arch_neon/arch_config.h b/src/p448/arch_neon/arch_config.h similarity index 100% rename from src/arch_neon/arch_config.h rename to src/p448/arch_neon/arch_config.h diff --git a/src/arch_neon/neon_emulation.h b/src/p448/arch_neon/neon_emulation.h similarity index 100% rename from src/arch_neon/neon_emulation.h rename to src/p448/arch_neon/neon_emulation.h diff --git a/src/arch_neon/p448.c b/src/p448/arch_neon/p448.c similarity index 100% rename from src/arch_neon/p448.c rename to src/p448/arch_neon/p448.c diff --git a/src/arch_neon/p448.h b/src/p448/arch_neon/p448.h similarity index 100% rename from src/arch_neon/p448.h rename to src/p448/arch_neon/p448.h diff --git a/src/arch_neon_experimental/arch_config.h b/src/p448/arch_neon_experimental/arch_config.h similarity index 100% rename from src/arch_neon_experimental/arch_config.h rename to src/p448/arch_neon_experimental/arch_config.h diff --git a/src/arch_neon_experimental/p448.c b/src/p448/arch_neon_experimental/p448.c similarity index 100% rename from src/arch_neon_experimental/p448.c rename to src/p448/arch_neon_experimental/p448.c diff --git a/src/arch_neon_experimental/p448.h b/src/p448/arch_neon_experimental/p448.h similarity index 100% rename from src/arch_neon_experimental/p448.h rename to src/p448/arch_neon_experimental/p448.h diff --git a/src/arch_ref64/arch_config.h b/src/p448/arch_ref64/arch_config.h similarity index 100% rename from src/arch_ref64/arch_config.h rename to src/p448/arch_ref64/arch_config.h diff --git a/src/arch_ref64/p448.c b/src/p448/arch_ref64/p448.c similarity index 100% rename from src/arch_ref64/p448.c rename to src/p448/arch_ref64/p448.c diff --git a/src/arch_ref64/p448.h b/src/p448/arch_ref64/p448.h similarity index 100% rename from src/arch_ref64/p448.h rename to src/p448/arch_ref64/p448.h diff --git a/src/arch_x86_64/arch_config.h b/src/p448/arch_x86_64/arch_config.h similarity index 100% rename from src/arch_x86_64/arch_config.h rename to src/p448/arch_x86_64/arch_config.h diff --git a/src/arch_x86_64/p448.c b/src/p448/arch_x86_64/p448.c similarity index 100% rename from src/arch_x86_64/p448.c rename to src/p448/arch_x86_64/p448.c diff --git a/src/arch_x86_64/p448.h b/src/p448/arch_x86_64/p448.h similarity index 100% rename from src/arch_x86_64/p448.h rename to src/p448/arch_x86_64/p448.h diff --git a/src/arch_x86_64/x86-64-arith.h b/src/p448/arch_x86_64/x86-64-arith.h similarity index 100% rename from src/arch_x86_64/x86-64-arith.h rename to src/p448/arch_x86_64/x86-64-arith.h diff --git a/src/p448/f_arithmetic.c b/src/p448/f_arithmetic.c new file mode 100644 index 0000000..82f35b8 --- /dev/null +++ b/src/p448/f_arithmetic.c @@ -0,0 +1,43 @@ +/** + * @cond internal + * @file f_arithmetic.c + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * @brief Field-specific arithmetic. + */ + +#include "ec_point.h" + +void +field_isr ( + struct field_t* a, + const struct field_t* x +) { + struct field_t L0, L1, L2; + field_sqr ( &L1, x ); + field_mul ( &L2, x, &L1 ); + field_sqr ( &L1, &L2 ); + field_mul ( &L2, x, &L1 ); + field_sqrn ( &L1, &L2, 3 ); + field_mul ( &L0, &L2, &L1 ); + field_sqrn ( &L1, &L0, 3 ); + field_mul ( &L0, &L2, &L1 ); + field_sqrn ( &L2, &L0, 9 ); + field_mul ( &L1, &L0, &L2 ); + field_sqr ( &L0, &L1 ); + field_mul ( &L2, x, &L0 ); + field_sqrn ( &L0, &L2, 18 ); + field_mul ( &L2, &L1, &L0 ); + field_sqrn ( &L0, &L2, 37 ); + field_mul ( &L1, &L2, &L0 ); + field_sqrn ( &L0, &L1, 37 ); + field_mul ( &L1, &L2, &L0 ); + field_sqrn ( &L0, &L1, 111 ); + field_mul ( &L2, &L1, &L0 ); + field_sqr ( &L0, &L2 ); + field_mul ( &L1, x, &L0 ); + field_sqrn ( &L0, &L1, 223 ); + field_mul ( a, &L2, &L0 ); +} diff --git a/src/p448/f_field.h b/src/p448/f_field.h new file mode 100644 index 0000000..c743c8d --- /dev/null +++ b/src/p448/f_field.h @@ -0,0 +1,39 @@ +/** + * @file f_field.h + * @brief Field-specific code. + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + */ +#ifndef __F_FIELD_H__ +#define __F_FIELD_H__ 1 + +#include +#include "constant_time.h" + +#include "p448.h" +#define FIELD_BITS 448 +#define field_t p448_t +#define field_mul p448_mul +#define field_sqr p448_sqr +#define field_add p448_add +#define field_sub p448_sub +#define field_mulw p448_mulw +#define field_addw p448_addw +#define field_subw p448_subw +#define field_neg p448_neg +#define field_set_ui p448_set_ui +#define field_bias p448_bias +#define field_cond_neg p448_cond_neg +#define field_inverse p448_inverse +#define field_eq p448_eq +#define field_isr p448_isr +#define field_simultaneous_invert p448_simultaneous_invert +#define field_weak_reduce p448_weak_reduce +#define field_strong_reduce p448_strong_reduce +#define field_serialize p448_serialize +#define field_deserialize p448_deserialize +#define field_is_zero p448_is_zero + +#endif /* __F_FIELD_H__ */ diff --git a/src/p448/f_magic.h b/src/p448/f_magic.h new file mode 100644 index 0000000..9e1365a --- /dev/null +++ b/src/p448/f_magic.h @@ -0,0 +1,35 @@ +/** + * @file f_magic.h + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * @brief Goldilocks magic numbers (group orders, coefficients, algo params etc). + */ + +#ifndef __GOLDI_F_MAGIC_H__ +#define __GOLDI_F_MAGIC_H__ 1 + +#include "field.h" +#include "ec_point.h" + +/** + * @brief The Edwards "d" term for this curve. + */ +static const int64_t EDWARDS_D = -39081; + +/** @brief The number of combs to use for signed comb algo */ +#define COMB_N (USE_BIG_COMBS ? 5 : 8) + +/** @brief The number of teeth of the combs for signed comb algo */ +#define COMB_T (USE_BIG_COMBS ? 5 : 4) + +/** @brief The spacing the of combs for signed comb algo */ +#define COMB_S (USE_BIG_COMBS ? 18 : 14) + +/** + * @brief crandom magic structure guard constant = "return 4", cf xkcd #221 + */ +#define CRANDOM_MAGIC 0x72657475726e2034ull + +#endif /* __GOLDI_F_MAGIC_H__ */ diff --git a/src/p448/field.h b/src/p448/field.h new file mode 100644 index 0000000..bf36e95 --- /dev/null +++ b/src/p448/field.h @@ -0,0 +1,123 @@ +/** + * @file field.h + * @brief Field switch code. + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + */ +#ifndef __FIELD_H__ +#define __FIELD_H__ + +#include +#include "constant_time.h" + +#include "p448.h" +#define FIELD_BITS 448 +#define field_t p448_t +#define field_mul p448_mul +#define field_sqr p448_sqr +#define field_add p448_add +#define field_sub p448_sub +#define field_mulw p448_mulw +#define field_addw p448_addw +#define field_subw p448_subw +#define field_neg p448_neg +#define field_set_ui p448_set_ui +#define field_bias p448_bias +#define field_cond_neg p448_cond_neg +#define field_inverse p448_inverse +#define field_eq p448_eq +#define field_isr p448_isr +#define field_simultaneous_invert p448_simultaneous_invert +#define field_weak_reduce p448_weak_reduce +#define field_strong_reduce p448_strong_reduce +#define field_serialize p448_serialize +#define field_deserialize p448_deserialize +#define field_is_zero p448_is_zero + +/** @brief Bytes in a field element */ +#define FIELD_BYTES (1+(FIELD_BITS-1)/8) + +/** @brief Words in a field element */ +#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) + +/** + * @brief For GMP tests: little-endian representation of the field modulus. + */ +extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; + +/** + * Copy one field element to another. + */ +static inline void +__attribute__((unused,always_inline)) +field_copy ( + struct field_t *__restrict__ a, + const struct field_t *__restrict__ b +) { + memcpy(a,b,sizeof(*a)); +} + +/** + * Negate a in place if doNegate. + */ +static inline void +__attribute__((unused,always_inline)) +field_cond_neg( + field_t *a, + mask_t doNegate +) { + struct field_t negated; + field_neg(&negated, a); + field_bias(&negated, 2); + constant_time_select(a, &negated, a, sizeof(negated), doNegate); +} + +/** + * Returns 1/sqrt(+- x). + * + * The Legendre symbol of the result is the same as that of the + * input. + * + * If x=0, returns 0. + */ +void +field_isr ( + struct field_t* a, + const struct field_t* x +); + +/** + * Batch inverts out[i] = 1/in[i] + * + * If any input is zero, all the outputs will be zero. + */ +void +field_simultaneous_invert ( + struct field_t *__restrict__ out, + const struct field_t *in, + unsigned int n +); + +/** + * Returns 1/x. + * + * If x=0, returns 0. + */ +void +field_inverse ( + struct field_t* a, + const struct field_t* x +); + +/** + * Returns -1 if a==b, 0 otherwise. + */ +mask_t +field_eq ( + const struct field_t *a, + const struct field_t *b +); + +#endif /* __FIELD_H__ */ diff --git a/src/magic.c b/src/p448/magic.c similarity index 82% rename from src/magic.c rename to src/p448/magic.c index 5157e14..b1e7ca5 100644 --- a/src/magic.c +++ b/src/p448/magic.c @@ -39,10 +39,10 @@ const struct affine_t goldilocks_base_point = { 0x4c63d96,0x4609845,0xf3932d9,0x1b4faff, 0x6147eaa,0xa2692ff,0x9cecfa9,0x297ea0e }}, #else - {{ U58LE(0xf0de840aed939f), U58LE(0xc170033f4ba0c7), - U58LE(0xf3932d94c63d96), U58LE(0x9cecfa96147eaa), - U58LE(0x5f065c3c59d070), U58LE(0x3a6a26adf73324), - U58LE(0x1b4faff4609845), U58LE(0x297ea0ea2692ff) + {{ U56LE(0xf0de840aed939f), U56LE(0xc170033f4ba0c7), + U56LE(0xf3932d94c63d96), U56LE(0x9cecfa96147eaa), + U56LE(0x5f065c3c59d070), U56LE(0x3a6a26adf73324), + U56LE(0x1b4faff4609845), U56LE(0x297ea0ea2692ff) }}, #endif {{ 19 }} @@ -69,13 +69,13 @@ sqrt_d_minus_1 = {{ 0xbdeea38,0x748734a,0x5a189aa,0x49443b8, 0x6f14c06,0x0b25b7a,0x51e65ca,0x12fec0c #else - U58LE(0xd2e21836749f46), - U58LE(0x888db42b4f0179), - U58LE(0x5a189aabdeea38), - U58LE(0x51e65ca6f14c06), - U58LE(0xa49f7b424d9770), - U58LE(0xdcac4628c5f656), - U58LE(0x49443b8748734a), - U58LE(0x12fec0c0b25b7a) + U56LE(0xd2e21836749f46), + U56LE(0x888db42b4f0179), + U56LE(0x5a189aabdeea38), + U56LE(0x51e65ca6f14c06), + U56LE(0xa49f7b424d9770), + U56LE(0xdcac4628c5f656), + U56LE(0x49443b8748734a), + U56LE(0x12fec0c0b25b7a) #endif }}; diff --git a/src/p480/arch_x86_64/arch_config.h b/src/p480/arch_x86_64/arch_config.h new file mode 100644 index 0000000..58758cc --- /dev/null +++ b/src/p480/arch_x86_64/arch_config.h @@ -0,0 +1 @@ +#define WORD_BITS 64 diff --git a/src/p480/arch_x86_64/p480.c b/src/p480/arch_x86_64/p480.c new file mode 100644 index 0000000..6110373 --- /dev/null +++ b/src/p480/arch_x86_64/p480.c @@ -0,0 +1,435 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ + +#include "p480.h" +#include "x86-64-arith.h" + +void +p480_mul ( + p480_t *__restrict__ cs, + const p480_t *as, + const p480_t *bs +) { + const uint64_t *a = as->limb, *b = bs->limb; + uint64_t *c = cs->limb; + + __uint128_t accum0 = 0, accum1 = 0, accum2; + uint64_t mask = (1ull<<60) - 1; + + uint64_t aa[4] __attribute__((aligned(32))), bb[4] __attribute__((aligned(32))), bbb[4] __attribute__((aligned(32))); + + /* For some reason clang doesn't vectorize this without prompting? */ + unsigned int i; + for (i=0; i>= 60; + accum1 >>= 60; + + mac(&accum0, &aa[1],&bb[3]); + mac(&accum1, &a[5], &b[7]); + mac(&accum0, &aa[2], &bb[2]); + mac(&accum1, &a[6], &b[6]); + mac(&accum0, &aa[3], &bb[1]); + accum1 += accum0; + + accum2 = widemul(&a[0],&b[0]); + accum1 -= accum2; + accum0 += accum2; + + msb(&accum0, &a[1], &b[3]); + msb(&accum0, &a[2], &b[2]); + mac(&accum1, &a[7], &b[5]); + msb(&accum0, &a[3], &b[1]); + mac(&accum1, &aa[0], &bb[0]); + mac(&accum0, &a[4], &b[4]); + + c[0] = ((uint64_t)(accum0)) & mask; + c[4] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 60; + accum1 >>= 60; + + accum2 = widemul(&a[2],&b[7]); + mac(&accum0, &a[6], &bb[3]); + mac(&accum1, &aa[2], &bbb[3]); + + mac(&accum2, &a[3], &b[6]); + mac(&accum0, &a[7], &bb[2]); + mac(&accum1, &aa[3], &bbb[2]); + + mac(&accum2, &a[0],&b[1]); + mac(&accum1, &aa[0], &bb[1]); + mac(&accum0, &a[4], &b[5]); + + mac(&accum2, &a[1], &b[0]); + mac(&accum1, &aa[1], &bb[0]); + mac(&accum0, &a[5], &b[4]); + + accum1 -= accum2; + accum0 += accum2; + + c[1] = ((uint64_t)(accum0)) & mask; + c[5] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 60; + accum1 >>= 60; + + accum2 = widemul(&a[3],&b[7]); + mac(&accum0, &a[7], &bb[3]); + mac(&accum1, &aa[3], &bbb[3]); + + mac(&accum2, &a[0],&b[2]); + mac(&accum1, &aa[0], &bb[2]); + mac(&accum0, &a[4], &b[6]); + + mac(&accum2, &a[1], &b[1]); + mac(&accum1, &aa[1], &bb[1]); + mac(&accum0, &a[5], &b[5]); + + mac(&accum2, &a[2], &b[0]); + mac(&accum1, &aa[2], &bb[0]); + mac(&accum0, &a[6], &b[4]); + + accum1 -= accum2; + accum0 += accum2; + + c[2] = ((uint64_t)(accum0)) & mask; + c[6] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 60; + accum1 >>= 60; + + accum0 += c[3]; + accum1 += c[7]; + c[3] = ((uint64_t)(accum0)) & mask; + c[7] = ((uint64_t)(accum1)) & mask; + + /* we could almost stop here, but it wouldn't be stable, so... */ + + accum0 >>= 60; + accum1 >>= 60; + c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); + c[0] += ((uint64_t)(accum1)); +} + +void +p480_mulw ( + p480_t *__restrict__ cs, + const p480_t *as, + uint64_t b +) { + const uint64_t *a = as->limb; + uint64_t *c = cs->limb; + + __uint128_t accum0, accum4; + uint64_t mask = (1ull<<60) - 1; + + accum0 = widemul_rm(b, &a[0]); + accum4 = widemul_rm(b, &a[4]); + + c[0] = accum0 & mask; accum0 >>= 60; + c[4] = accum4 & mask; accum4 >>= 60; + + mac_rm(&accum0, b, &a[1]); + mac_rm(&accum4, b, &a[5]); + + c[1] = accum0 & mask; accum0 >>= 60; + c[5] = accum4 & mask; accum4 >>= 60; + + mac_rm(&accum0, b, &a[2]); + mac_rm(&accum4, b, &a[6]); + + c[2] = accum0 & mask; accum0 >>= 60; + c[6] = accum4 & mask; accum4 >>= 60; + + mac_rm(&accum0, b, &a[3]); + mac_rm(&accum4, b, &a[7]); + + c[3] = accum0 & mask; accum0 >>= 60; + c[7] = accum4 & mask; accum4 >>= 60; + + accum0 += accum4 + c[4]; + c[4] = accum0 & mask; + c[5] += accum0 >> 60; + + accum4 += c[0]; + c[0] = accum4 & mask; + c[1] += accum4 >> 60; +} + +void +p480_sqr ( + p480_t *__restrict__ cs, + const p480_t *as +) { + const uint64_t *a = as->limb; + uint64_t *c = cs->limb; + + __uint128_t accum0 = 0, accum1 = 0, accum2; + uint64_t mask = (1ull<<60) - 1; + + uint64_t aa[4] __attribute__((aligned(32))); + + /* For some reason clang doesn't vectorize this without prompting? */ + unsigned int i; + for (i=0; i>= 59; + accum1 >>= 59; + + mac2(&accum0, &aa[1],&aa[3]); + mac2(&accum1, &a[5], &a[7]); + mac(&accum0, &aa[2], &aa[2]); + accum1 += accum0; + + msb2(&accum0, &a[1], &a[3]); + mac(&accum1, &a[6], &a[6]); + + accum2 = widemul(&a[0],&a[0]); + accum1 -= accum2; + accum0 += accum2; + + msb(&accum0, &a[2], &a[2]); + mac(&accum1, &aa[0], &aa[0]); + mac(&accum0, &a[4], &a[4]); + + c[0] = ((uint64_t)(accum0)) & mask; + c[4] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 60; + accum1 >>= 60; + + accum2 = widemul2(&aa[2],&aa[3]); + msb2(&accum0, &a[2], &a[3]); + mac2(&accum1, &a[6], &a[7]); + + accum1 += accum2; + accum0 += accum2; + + accum2 = widemul2(&a[0],&a[1]); + mac2(&accum1, &aa[0], &aa[1]); + mac2(&accum0, &a[4], &a[5]); + + accum1 -= accum2; + accum0 += accum2; + + c[1] = ((uint64_t)(accum0)) & mask; + c[5] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 60; + accum1 >>= 60; + + accum2 = widemul(&aa[3],&aa[3]); + msb(&accum0, &a[3], &a[3]); + mac(&accum1, &a[7], &a[7]); + + accum1 += accum2; + accum0 += accum2; + + accum2 = widemul2(&a[0],&a[2]); + mac2(&accum1, &aa[0], &aa[2]); + mac2(&accum0, &a[4], &a[6]); + + mac(&accum2, &a[1], &a[1]); + mac(&accum1, &aa[1], &aa[1]); + mac(&accum0, &a[5], &a[5]); + + accum1 -= accum2; + accum0 += accum2; + + c[2] = ((uint64_t)(accum0)) & mask; + c[6] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 60; + accum1 >>= 60; + + accum0 += c[3]; + accum1 += c[7]; + c[3] = ((uint64_t)(accum0)) & mask; + c[7] = ((uint64_t)(accum1)) & mask; + + /* we could almost stop here, but it wouldn't be stable, so... */ + + accum0 >>= 60; + accum1 >>= 60; + c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); + c[0] += ((uint64_t)(accum1)); +} + +void +p480_strong_reduce ( + p480_t *a +) { + uint64_t mask = (1ull<<60)-1; + + /* first, clear high */ + a->limb[4] += a->limb[7]>>60; + a->limb[0] += a->limb[7]>>60; + a->limb[7] &= mask; + + /* now the total is less than 2^480 - 2^(480-60) + 2^(480-60+8) < 2p */ + + /* compute total_value - p. No need to reduce mod p. */ + + __int128_t scarry = 0; + int i; + for (i=0; i<8; i++) { + scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); + a->limb[i] = scarry & mask; + scarry >>= 60; + } + + /* uncommon case: it was >= p, so now scarry = 0 and this = x + * common case: it was < p, so now scarry = -1 and this = x - p + 2^480 + * so let's add back in p. will carry back off the top for 2^480. + */ + + assert(is_zero(scarry) | is_zero(scarry+1)); + + uint64_t scarry_mask = scarry & mask; + __uint128_t carry = 0; + + /* add it back */ + for (i=0; i<8; i++) { + carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); + a->limb[i] = carry & mask; + carry >>= 60; + } + + assert(is_zero(carry + scarry)); +} + +mask_t +p480_is_zero ( + const struct p480_t *a +) { + struct p480_t b; + p480_copy(&b,a); + p480_strong_reduce(&b); + + uint64_t any = 0; + int i; + for (i=0; i<8; i++) { + any |= b.limb[i]; + } + return is_zero(any); +} + +void +p480_serialize ( + uint8_t *serial, + const struct p480_t *x +) { + int i,j,k=0; + p480_t red; + p480_copy(&red, x); + p480_strong_reduce(&red); + word_t r = 0; + for (i=0; i<8; i+=2) { + r = red.limb[i]; + for (j=0; j<7; j++) { + serial[k++] = r; + r >>= 8; + } + assert(r<16); + r += red.limb[i+1]<<4; + for (j=0; j<8; j++) { + serial[k++] = r; + r >>= 8; + } + assert(r==0); + } +} + +mask_t +p480_deserialize ( + p480_t *x, + const uint8_t serial[60] +) { + int i,j,k=0; + + for (i=0; i<8; i+=2) { + word_t r = 0; + for (j=0; j<8; j++) { + r |= ((word_t)serial[k++])<<(8*j); + } + x->limb[i] = r & ((1ull<<60)-1); + r >>= 60; + for (j=0; j<7; j++) { + r |= ((word_t)serial[k++])<<(8*j+4); + } + x->limb[i+1] = r; + } + + /* Check for reduction. + * + * The idea is to create a variable ge which is all ones (rather, 60 ones) + * if and only if the low $i$ words of $x$ are >= those of p. + * + * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) + */ + word_t ge = -1, mask = (1ull<<60)-1; + for (i=0; i<4; i++) { + ge &= x->limb[i]; + } + + /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ + ge = (ge & (x->limb[4] + 1)) | is_zero(x->limb[4] ^ mask); + + /* Propagate the rest */ + for (i=5; i<8; i++) { + ge &= x->limb[i]; + } + + return ~is_zero(ge ^ mask); +} + diff --git a/src/p480/arch_x86_64/p480.h b/src/p480/arch_x86_64/p480.h new file mode 100644 index 0000000..a49c6d0 --- /dev/null +++ b/src/p480/arch_x86_64/p480.h @@ -0,0 +1,257 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ +#ifndef __p480_H__ +#define __p480_H__ 1 + +#include +#include + +#include "word.h" + +typedef struct p480_t { + uint64_t limb[8]; +} __attribute__((aligned(32))) p480_t; + +#ifdef __cplusplus +extern "C" { +#endif + +static __inline__ void +p480_set_ui ( + p480_t *out, + uint64_t x +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_add ( + p480_t *out, + const p480_t *a, + const p480_t *b +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_sub ( + p480_t *out, + const p480_t *a, + const p480_t *b +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_neg ( + p480_t *out, + const p480_t *a +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_addw ( + p480_t *a, + uint64_t x +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_subw ( + p480_t *a, + uint64_t x +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_copy ( + p480_t *out, + const p480_t *a +) __attribute__((unused,always_inline)); + +static __inline__ void +p480_weak_reduce ( + p480_t *inout +) __attribute__((unused,always_inline)); + +void +p480_strong_reduce ( + p480_t *inout +); + +mask_t +p480_is_zero ( + const p480_t *in +); + +static __inline__ void +p480_bias ( + p480_t *inout, + int amount +) __attribute__((unused,always_inline)); + +void +p480_mul ( + p480_t *__restrict__ out, + const p480_t *a, + const p480_t *b +); + +void +p480_mulw ( + p480_t *__restrict__ out, + const p480_t *a, + uint64_t b +); + +void +p480_sqr ( + p480_t *__restrict__ out, + const p480_t *a +); + +void +p480_serialize ( + uint8_t *serial, + const struct p480_t *x +); + +mask_t +p480_deserialize ( + p480_t *x, + const uint8_t serial[60] +); + +/* -------------- Inline functions begin here -------------- */ + +void +p480_set_ui ( + p480_t *out, + uint64_t x +) { + int i; + out->limb[0] = x; + for (i=1; i<8; i++) { + out->limb[i] = 0; + } +} + +void +p480_add ( + p480_t *out, + const p480_t *a, + const p480_t *b +) { + unsigned int i; + for (i=0; ilimb[0]); i++) { + out->limb[i] = a->limb[i] + b->limb[i]; + } + */ +} + +void +p480_sub ( + p480_t *out, + const p480_t *a, + const p480_t *b +) { + unsigned int i; + for (i=0; ilimb[0]); i++) { + out->limb[i] = a->limb[i] - b->limb[i]; + } + */ +} + +void +p480_neg ( + struct p480_t *out, + const p480_t *a +) { + unsigned int i; + for (i=0; ilimb[0]); i++) { + out->limb[i] = -a->limb[i]; + } + */ +} + +void +p480_addw ( + p480_t *a, + uint64_t x +) { + a->limb[0] += x; +} + +void +p480_subw ( + p480_t *a, + uint64_t x +) { + a->limb[0] -= x; +} + +void +p480_copy ( + p480_t *out, + const p480_t *a +) { + unsigned int i; + for (i=0; ilimb[i] += (i==4) ? co2 : co1; + } +#endif +} + +void +p480_weak_reduce ( + p480_t *a +) { + /* PERF: use pshufb/palignr if anyone cares about speed of this */ + uint64_t mask = (1ull<<60) - 1; + uint64_t tmp = a->limb[7] >> 60; + int i; + a->limb[4] += tmp; + for (i=7; i>0; i--) { + a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>60); + } + a->limb[0] = (a->limb[0] & mask) + tmp; +} + +#ifdef __cplusplus +}; /* extern "C" */ +#endif + +#endif /* __p480_H__ */ diff --git a/src/p480/arch_x86_64/x86-64-arith.h b/src/p480/arch_x86_64/x86-64-arith.h new file mode 100644 index 0000000..32ee832 --- /dev/null +++ b/src/p480/arch_x86_64/x86-64-arith.h @@ -0,0 +1,279 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ + +#ifndef __X86_64_ARITH_H__ +#define __X86_64_ARITH_H__ + +#include + +/* TODO: non x86-64 versions of these. + * FUTURE: autogenerate + */ + +static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rax;" + "mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"m"(*b), [a]"m"(*a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx;" + "mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"m"(*b), [a]"m"(*a) + : "rdx"); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rax;" + "mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"m"(*b), [a]"r"(a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"m"(*b), [a]"d"(a)); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rax; " + "addq %%rax, %%rax; " + "mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"m"(*b), [a]"m"(*a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx;" + "leaq (,%%rdx,2), %%rdx;" + "mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"m"(*b), [a]"m"(*a) + : "rdx"); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + uint64_t lo2 = *acc2, hi2 = *acc2>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + "addq %[c], %[lo2]; " + "adcq %[d], %[hi2]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + "addq %%rax, %[lo2]; " + "adcq %%rdx, %[hi2]; " + : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; + *acc2 = (((__uint128_t)(hi2))<<64) | lo2; +} + +static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"d"(a) + : "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"r"(a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "addq %%rdx, %%rdx; " + "mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "addq %%rax, %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "subq %[c], %[lo]; " + "sbbq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "subq %%rax, %[lo]; " + "sbbq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "addq %%rdx, %%rdx; " + "mulx %[b], %[c], %[d]; " + "subq %[c], %[lo]; " + "sbbq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "addq %%rax, %%rax; " + "mulq %[b]; " + "subq %%rax, %[lo]; " + "sbbq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + *acc = (((__uint128_t)(hi))<<64) | lo; + +} + +static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t c,d, lo = *acc, hi = *acc>>64; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "subq %[lo], %[c]; " + "sbbq %[hi], %[d]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + *acc = (((__uint128_t)(d))<<64) | c; +} + +static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) { + return ((__uint128_t)(a)) * b; +} + +static __inline__ __int128_t widemuls(int64_t a, int64_t b) { + return ((__int128_t)(a)) * b; +} + +static __inline__ uint64_t opacify(uint64_t x) { + __asm__ volatile("" : "+r"(x)); + return x; +} + +static __inline__ mask_t is_zero(uint64_t x) { + __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x)); + return ~x; +} + +#endif /* __X86_64_ARITH_H__ */ diff --git a/src/p480/f_arithmetic.c b/src/p480/f_arithmetic.c new file mode 100644 index 0000000..d616e42 --- /dev/null +++ b/src/p480/f_arithmetic.c @@ -0,0 +1,43 @@ +/** + * @cond internal + * @file f_arithmetic.c + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * @brief Field-specific arithmetic. + */ + +#include "ec_point.h" + +void +field_isr ( + struct field_t* a, + const struct field_t* x +) { + struct field_t L0, L1, L2, L3; + field_sqr ( &L2, x ); + field_mul ( &L1, x, &L2 ); + field_sqrn ( &L0, &L1, 2 ); + field_mul ( &L2, &L1, &L0 ); + field_sqrn ( &L0, &L2, 4 ); + field_mul ( &L1, &L2, &L0 ); + field_sqr ( &L0, &L1 ); + field_mul ( &L2, x, &L0 ); + field_sqrn ( &L0, &L2, 8 ); + field_mul ( &L2, &L1, &L0 ); + field_sqrn ( &L0, &L2, 17 ); + field_mul ( &L1, &L2, &L0 ); + field_sqrn ( &L0, &L1, 17 ); + field_mul ( &L1, &L2, &L0 ); + field_sqrn ( &L3, &L1, 17 ); + field_mul ( &L0, &L2, &L3 ); + field_sqrn ( &L2, &L0, 51 ); + field_mul ( &L0, &L1, &L2 ); + field_sqrn ( &L1, &L0, 119 ); + field_mul ( &L2, &L0, &L1 ); + field_sqr ( &L0, &L2 ); + field_mul ( &L1, x, &L0 ); + field_sqrn ( &L0, &L1, 239 ); + field_mul ( a, &L2, &L0 ); +} diff --git a/src/p480/f_field.h b/src/p480/f_field.h new file mode 100644 index 0000000..397f83d --- /dev/null +++ b/src/p480/f_field.h @@ -0,0 +1,39 @@ +/** + * @file f_field.h + * @brief Field-specific code. + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + */ +#ifndef __F_FIELD_H__ +#define __F_FIELD_H__ 1 + +#include +#include "constant_time.h" + +#include "p480.h" +#define FIELD_BITS 480 +#define field_t p480_t +#define field_mul p480_mul +#define field_sqr p480_sqr +#define field_add p480_add +#define field_sub p480_sub +#define field_mulw p480_mulw +#define field_addw p480_addw +#define field_subw p480_subw +#define field_neg p480_neg +#define field_set_ui p480_set_ui +#define field_bias p480_bias +#define field_cond_neg p480_cond_neg +#define field_inverse p480_inverse +#define field_eq p480_eq +#define field_isr p480_isr +#define field_simultaneous_invert p480_simultaneous_invert +#define field_weak_reduce p480_weak_reduce +#define field_strong_reduce p480_strong_reduce +#define field_serialize p480_serialize +#define field_deserialize p480_deserialize +#define field_is_zero p480_is_zero + +#endif /* __F_FIELD_H__ */ diff --git a/src/p480/f_magic.h b/src/p480/f_magic.h new file mode 100644 index 0000000..d5d095a --- /dev/null +++ b/src/p480/f_magic.h @@ -0,0 +1,35 @@ +/** + * @file f_magic.h + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * @brief Goldilocks magic numbers (group orders, coefficients, algo params etc). + */ + +#ifndef __GOLDI_F_MAGIC_H__ +#define __GOLDI_F_MAGIC_H__ 1 + +#include "field.h" +#include "ec_point.h" + +/** + * @brief The Edwards "d" term for this curve. + */ +static const int64_t EDWARDS_D = 53825; + +/** @brief The number of combs to use for signed comb algo */ +#define COMB_N (USE_BIG_COMBS ? 6 : 5) + +/** @brief The number of teeth of the combs for signed comb algo */ +#define COMB_T (USE_BIG_COMBS ? 5 : 4) + +/** @brief The spacing the of combs for signed comb algo */ +#define COMB_S (USE_BIG_COMBS ? 16 : 24) + +/** + * @brief crandom magic structure guard constant = "return 4", cf xkcd #221 + */ +#define CRANDOM_MAGIC 0x72657475726e2034ull + +#endif /* __GOLDI_F_MAGIC_H__ */ diff --git a/src/p480/magic.c b/src/p480/magic.c new file mode 100644 index 0000000..ee90a0a --- /dev/null +++ b/src/p480/magic.c @@ -0,0 +1,68 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ + +#include "field.h" +#include "magic.h" +#include "barrett_field.h" + +/* FUTURE: automatically generate this file? */ + +const uint8_t FIELD_MODULUS[FIELD_BYTES] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +/*!*/ 0xfe, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { + U64LE(0x58b51bc56ea8f0c4), + U64LE(0xd361f6a2348b50c9), + U64LE(0x08089c139c0002ae), + U64LE(0x0001d2ac3d9503a0), + U64LE(0x0000000000000000), + U64LE(0x0000000000000000), + U64LE(0x0000000000000000), + 0x40000000, + + U64LE(0xcb9c25073e36965b), + U64LE(0x6f2d48d8460f1661), + U64LE(0x0ab6256f7aaaae3e), + U64LE(0x00026e3afcc6af80), + U64LE(0x0000000000000000), + U64LE(0x0000000000000000), + U64LE(0x0000000000000000), + 0x00000000 +}; + +const struct affine_t goldilocks_base_point = { + {{ + U60LE(0x849ff7f845c30d3), + U60LE(0x7dda488553a4c5b), + U60LE(0x1d3a2d9844831ea), + U60LE(0xb33ecf6ade470a2), + U60LE(0x8b3cb95210bd3c3), + U60LE(0xfc955e59aeefa65), + U60LE(0x3ab247cd530013c), + U60LE(0x7ca42af3d564280) + }}, + {{ 5 }} +}; + +static const word_t curve_prime_order_lo[(240+WORD_BITS-1)/WORD_BITS] = { + U64LE(0x72e70941cf8da597), + U64LE(0x9bcb52361183c598), + U64LE(0x02ad895bdeaaab8f), + U64LE(0x9b8ebf31abe0) +}; +const struct barrett_prime_t curve_prime_order = { + GOLDI_FIELD_WORDS, + 30 % WORD_BITS, + sizeof(curve_prime_order_lo)/sizeof(curve_prime_order_lo[0]), + curve_prime_order_lo +}; + +const struct field_t +sqrt_d_minus_1 = {{ + 232 /* Whoa, it comes out even. */ +}}; diff --git a/src/p521/f_arithmetic.c b/src/p521/f_arithmetic.c new file mode 100644 index 0000000..7fbdfb8 --- /dev/null +++ b/src/p521/f_arithmetic.c @@ -0,0 +1,43 @@ +/** + * @cond internal + * @file f_arithmetic.c + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * @brief Field-specific arithmetic. + */ + +#include "ec_point.h" + +void +field_isr ( + struct field_t* a, + const struct field_t* x +) { + struct field_t L0, L1, L2; + field_sqr ( &L1, x ); + field_mul ( &L0, x, &L1 ); + field_sqrn ( &L2, &L0, 2 ); + field_mul ( &L1, &L0, &L2 ); + field_sqrn ( &L2, &L1, 4 ); + field_mul ( &L0, &L1, &L2 ); + field_sqrn ( &L2, &L0, 8 ); + field_mul ( &L1, &L0, &L2 ); + field_sqrn ( &L2, &L1, 16 ); + field_mul ( &L0, &L1, &L2 ); + field_sqrn ( &L2, &L0, 32 ); + field_mul ( &L1, &L0, &L2 ); + field_sqr ( &L2, &L1 ); + field_mul ( &L0, x, &L2 ); + field_sqrn ( &L2, &L0, 64 ); + field_mul ( &L0, &L1, &L2 ); + field_sqrn ( &L2, &L0, 129 ); + field_mul ( &L1, &L0, &L2 ); + field_sqr ( &L2, &L1 ); + field_mul ( &L0, x, &L2 ); + field_sqrn ( &L2, &L0, 259 ); + field_mul ( &L1, &L0, &L2 ); + field_sqr ( &L0, &L1 ); + field_mul ( a, x, &L0 ); +} diff --git a/src/p521/f_field.h b/src/p521/f_field.h new file mode 100644 index 0000000..f17fe3d --- /dev/null +++ b/src/p521/f_field.h @@ -0,0 +1,39 @@ +/** + * @file f_field.h + * @brief Field-specific code. + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + */ +#ifndef __F_FIELD_H__ +#define __F_FIELD_H__ 1 + +#include +#include "constant_time.h" + +#include "p521.h" +#define FIELD_BITS 521 +#define field_t p521_t +#define field_mul p521_mul +#define field_sqr p521_sqr +#define field_add p521_add +#define field_sub p521_sub +#define field_mulw p521_mulw +#define field_addw p521_addw +#define field_subw p521_subw +#define field_neg p521_neg +#define field_set_ui p521_set_ui +#define field_bias p521_bias +#define field_cond_neg p521_cond_neg +#define field_inverse p521_inverse +#define field_eq p521_eq +#define field_isr p521_isr +#define field_simultaneous_invert p521_simultaneous_invert +#define field_weak_reduce p521_weak_reduce +#define field_strong_reduce p521_strong_reduce +#define field_serialize p521_serialize +#define field_deserialize p521_deserialize +#define field_is_zero p521_is_zero + +#endif /* __F_FIELD_H__ */ diff --git a/test/bench.c b/test/bench.c index 028844f..399337d 100644 --- a/test/bench.c +++ b/test/bench.c @@ -39,13 +39,12 @@ static void q448_randomize( struct crandom_state_t *crand, word_t sk[SCALAR_WORD } static void field_print( const char *descr, const struct field_t *a ) { - field_t b; - field_copy(&b, a); - field_strong_reduce(&b); int j; + unsigned char ser[FIELD_BYTES]; + field_serialize(ser,a); printf("%s = 0x", descr); - for (j=sizeof(*a)/sizeof(a->limb[0])-1; j>=0; j--) { - printf(PRIxWORD58, b.limb[j]); + for (j=FIELD_BYTES - 1; j>=0; j--) { + printf("%02x", ser[j]); } printf("\n"); } @@ -58,7 +57,7 @@ field_print_full ( int j; printf("%s = 0x", descr); for (j=15; j>=0; j--) { - printf("%02" PRIxWORD "_" PRIxWORD58 " ", + printf("%02" PRIxWORD "_" PRIxWORD56 " ", a->limb[j]>>28, a->limb[j]&((1<<28)-1)); } printf("\n"); diff --git a/test/test.c b/test/test.c index 3c25700..d3c41b4 100644 --- a/test/test.c +++ b/test/test.c @@ -84,13 +84,12 @@ void field_print ( const char *descr, const struct field_t *a ) { - field_t b; - field_copy(&b, a); - field_strong_reduce(&b); int j; + unsigned char ser[FIELD_BYTES]; + field_serialize(ser,a); printf("%s = 0x", descr); - for (j=FIELD_WORDS - 1; j>=0; j--) { - printf(PRIxWORD58, b.limb[LIMBPERM(j)]); + for (j=FIELD_BYTES - 1; j>=0; j--) { + printf("%02x", ser[j]); } printf("\n"); } diff --git a/test/test_arithmetic.c b/test/test_arithmetic.c index a956c03..4d2d7d1 100644 --- a/test/test_arithmetic.c +++ b/test/test_arithmetic.c @@ -22,6 +22,8 @@ static mask_t mpz_to_field ( static mask_t field_assert_eq_gmp( const char *descr, + const struct field_t *a, + const struct field_t *b, const struct field_t *x, const mpz_t y, float lowBound, @@ -40,7 +42,7 @@ static mask_t field_assert_eq_gmp( unsigned int i; for (i=0; ilimb[0]); i++) { - int radix_bits = sizeof(x->limb[0]) * 448 / sizeof(*x); + int radix_bits = sizeof(x->limb[0]) * FIELD_BITS / sizeof(*x); word_t yardstick = (i==sizeof(*x)/sizeof(x->limb[0])/2) ? (1ull<limb[i] < yardstick * lowBound || x->limb[i] > yardstick * highBound) { @@ -54,6 +56,8 @@ static mask_t field_assert_eq_gmp( if (memcmp(xser,yser,FIELD_BYTES)) { youfail(); printf(" Failed arithmetic test %s\n", descr); + field_print(" a", a); + field_print(" b", b); field_print(" goldi", x); printf(" gmp = 0x"); int j; @@ -82,28 +86,30 @@ static mask_t test_add_sub ( field_add(&tt,&xx,&yy); mpz_add(t,x,y); - succ &= field_assert_eq_gmp("add",&tt,t,0,2.1); + succ &= field_assert_eq_gmp("add",&xx,&yy,&tt,t,0,2.1); field_sub(&tt,&xx,&yy); field_bias(&tt,2); mpz_sub(t,x,y); - succ &= field_assert_eq_gmp("sub",&tt,t,0,3.1); + succ &= field_assert_eq_gmp("sub",&xx,&yy,&tt,t,0,3.1); field_copy(&tt,&xx); field_addw(&tt,word); mpz_add_ui(t,x,word); - succ &= field_assert_eq_gmp("addw",&tt,t,0,2.1); + succ &= field_assert_eq_gmp("addw",&xx,&yy,&tt,t,0,2.1); field_copy(&tt,&xx); field_subw(&tt,word); field_bias(&tt,1); mpz_sub_ui(t,x,word); - succ &= field_assert_eq_gmp("subw",&tt,t,0,2.1); - + succ &= field_assert_eq_gmp("subw",&xx,&yy,&tt,t,0,2.1); + + /* if (!succ) { field_print(" x", &xx); field_print(" y", &yy); } + */ mpz_clear(t); @@ -124,19 +130,19 @@ static mask_t test_mul_sqr ( field_mul(&tt,&xx,&yy); mpz_mul(t,x,y); - succ &= field_assert_eq_gmp("mul",&tt,t,0,1.1); + succ &= field_assert_eq_gmp("mul",&xx,&yy,&tt,t,0,1.1); field_mulw(&tt,&xx,word); mpz_mul_ui(t,x,word); - succ &= field_assert_eq_gmp("mulw",&tt,t,0,1.1); + succ &= field_assert_eq_gmp("mulw",&xx,&yy,&tt,t,0,1.1); field_sqr(&tt,&xx); mpz_mul(t,x,x); - succ &= field_assert_eq_gmp("sqrx",&tt,t,0,1.1); + succ &= field_assert_eq_gmp("sqrx",&xx,&yy,&tt,t,0,1.1); field_sqr(&tt,&yy); mpz_mul(t,y,y); - succ &= field_assert_eq_gmp("sqy",&tt,t,0,1.1); + succ &= field_assert_eq_gmp("sqy",&xx,&yy,&tt,t,0,1.1); if (!succ) { field_print(" x", &xx); @@ -148,6 +154,36 @@ static mask_t test_mul_sqr ( return succ; } +static mask_t test_isr ( + const mpz_t x +) { + struct field_t xx,yy,ss,tt; + mask_t succ = 0; + succ = mpz_to_field(&xx,x); + + field_isr(&ss,&xx); + field_sqr(&tt,&ss); + field_mul(&yy,&xx,&tt); + + field_addw(&tt,1); + succ |= field_is_zero(&tt); + + field_subw(&tt,2); + field_bias(&tt,1); + succ |= field_is_zero(&tt); + + field_addw(&tt,1); + if (~succ) { + youfail(); + printf("ISR failure.\n"); + field_print(" x", &xx); + field_print(" s", &ss); + field_print(" t", &tt); + } + + return succ; +} + int test_arithmetic (void) { int j, ntests = 100000; @@ -168,8 +204,8 @@ int test_arithmetic (void) { if (j<256) { mpz_set_ui(x,0); mpz_set_ui(y,0); - mpz_setbit(x,(j%16)*28); // FIELD_MAGIC - mpz_setbit(y,(j/16)*28); // FIELD_MAGIC + mpz_setbit(x,(j%16)*28); + mpz_setbit(y,(j/16)*28); } else if (j&1) { mpz_rrandomb(x, state, FIELD_BITS); mpz_rrandomb(y, state, FIELD_BITS); @@ -183,6 +219,9 @@ int test_arithmetic (void) { succ &= test_add_sub(x,y,word); succ &= test_mul_sqr(x,y,word); + if (j < 1000) + succ &= test_isr(x); + // TODO: test neg, cond_neg, set_ui, wrd, srd, inv, ...? } diff --git a/test/test_pointops.c b/test/test_pointops.c index 8907608..6d4230d 100644 --- a/test/test_pointops.c +++ b/test/test_pointops.c @@ -3,6 +3,7 @@ #include #include "ec_point.h" +#include "magic.h" #include "field.h" #include "crandom.h" @@ -256,6 +257,15 @@ int test_pointops (void) { struct crandom_state_t crand; crandom_init_from_buffer(&crand, "test_pointops random initializer"); + struct extensible_t ext_base; + if (!validate_affine(&goldilocks_base_point)) { + youfail(); + printf(" Base point isn't on the curve.\n"); + return -1; + } + convert_affine_to_extensible(&ext_base, &goldilocks_base_point); + if (!validate_ext(&ext_base, 2, "base")) return -1; + int i, ret; for (i=0; i<1000; i++) { uint8_t ser[FIELD_BYTES]; diff --git a/test/test_scalarmul.c b/test/test_scalarmul.c index 82989b9..80636cf 100644 --- a/test/test_scalarmul.c +++ b/test/test_scalarmul.c @@ -39,8 +39,14 @@ single_scalarmul_compatibility_test ( if (!succ) { return 1; } - - struct { int n,t,s; } params[] = {{5,5,18},{3,5,30},{4,4,28},{1,2,224}}; // FIELD_MAGIC + +#if FIELD_BITS == 448 + struct { int n,t,s; } params[] = {{5,5,18},{3,5,30},{4,4,28},{1,2,224}}; +#elif FIELD_BITS == 480 + struct { int n,t,s; } params[] = {{5,6,16},{6,5,16},{4,5,24},{4,4,30},{1,2,240}}; +#else + struct { int n,t,s; } params[] = {{5,5,(SCALAR_BITS+24)/25},{1,2,(SCALAR_BITS+1)/2}}; +#endif const int nparams = sizeof(params)/sizeof(params[0]); struct fixed_base_table_t fbt; const int nsizes = 6;