generate f_field.h

hace 8 años · 294eabaec7
--- a/+ 14
+++ b/+ 14
@@ -11,6 +11,7 @@ MACHINE := $(shell uname -m)
 BUILD_ASM = build/obj
 BUILD_OBJ = build/obj
 BUILD_C   = build/obj
 BUILD_H   = build/obj/include
 BUILD_PY  = build/obj
 BUILD_LIB = build/lib
 BUILD_INC = build/include
@@ -40,8 +41,8 @@ endif
 WARNFLAGS = -pedantic -Wall -Wextra -Werror -Wunreachable-code \
 	 -Wmissing-declarations -Wunused-function -Wno-overlength-strings $(EXWARN)

 INCFLAGS = -Isrc/include -Ibuild/include
 PUB_INCFLAGS = -Ibuild/include
 INCFLAGS = -Isrc/include -I$(BUILD_INC) -I$(BUILD_H)
 PUB_INCFLAGS = -I$(BUILD_INC)
 LANGFLAGS = -std=c99 -fno-strict-aliasing
 LANGXXFLAGS = -fno-strict-aliasing
 GENFLAGS = -ffunction-sections -fdata-sections -fvisibility=hidden -fomit-frame-pointer -fPIC
@@ -124,7 +125,7 @@ endif
 # Create all the build subdirectories
 $(BUILD_OBJ)/timestamp:
 	mkdir -p $(BUILD_ASM) $(BUILD_OBJ) $(BUILD_C) $(BUILD_PY) \
 		$(BUILD_LIB) $(BUILD_INC) $(BUILD_BIN) $(BUILD_IBIN) $(BUILD_INC)/decaf
 		$(BUILD_LIB) $(BUILD_INC) $(BUILD_BIN) $(BUILD_IBIN) $(BUILD_H) $(BUILD_INC)/decaf
 	touch $@

 $(BUILD_OBJ)/%.o: $(BUILD_ASM)/%.s
@@ -133,7 +134,7 @@ $(BUILD_OBJ)/%.o: $(BUILD_ASM)/%.s
 gen_headers: $(GEN_HEADERS)
 	
 $(GEN_HEADERS): src/gen_headers/*.py src/public_include/decaf/*
 	python -B src/gen_headers/main.py --hpre=$(BUILD_INC) --cpre=$(BUILD_C)
 	python -B src/gen_headers/main.py --hpre=$(BUILD_INC) --ihpre=$(BUILD_H) --cpre=$(BUILD_C)
 	cp src/public_include/decaf/* $(BUILD_INC)/decaf/

 ################################################################
@@ -145,17 +146,18 @@ COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o
 LIBCOMPONENTS += $$(COMPONENTS_OF_$(1))

 $$(BUILD_ASM)/$(1)_arithmetic.s: src/$(1)/f_arithmetic.c $$(HEADERS)
 	$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -S -c -o $$@ $$<
 	$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -I $(BUILD_H)/$(1) -I $(BUILD_H)/$(1)/$(2) -S -c -o $$@ $$<

 $$(BUILD_ASM)/$(1)_impl.s: src/$(1)/$(2)/f_impl.c $$(HEADERS)
 	$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -S -c -o $$@ $$<
 	$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -I $(BUILD_H)/$(1) -I $(BUILD_H)/$(1)/$(2) -S -c -o $$@ $$<
 endef

 ################################################################
 # Per-field, per-curve code: call with curve, field
 ################################################################
 define define_curve
 $$(BUILD_IBIN)/decaf_gen_tables_$(1): $$(BUILD_OBJ)/decaf_gen_tables_$(1).o $$(BUILD_OBJ)/decaf_$(1).o $$(BUILD_OBJ)/utils.o \
 $$(BUILD_IBIN)/decaf_gen_tables_$(1): $$(BUILD_OBJ)/decaf_gen_tables_$(1).o \
 		$$(BUILD_OBJ)/decaf_$(1).o $$(BUILD_OBJ)/utils.o \
 		$$(COMPONENTS_OF_$(2))
 	$$(LD) $$(LDFLAGS) -o $$@ $$^

@@ -165,20 +167,24 @@ $$(BUILD_C)/decaf_tables_$(1).c: $$(BUILD_IBIN)/decaf_gen_tables_$(1)
 $$(BUILD_ASM)/decaf_tables_$(1).s: $$(BUILD_C)/decaf_tables_$(1).c $$(HEADERS)
 	$$(CC) $$(CFLAGS) -S -c -o $$@ $$< \
 		-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \
 		-I $(BUILD_H)/curve_$(1) -I $(BUILD_H)/$(2) -I $(BUILD_H)/$(2)/$$(ARCH_FOR_$(2))

 $$(BUILD_ASM)/decaf_gen_tables_$(1).s: src/decaf_gen_tables.c $$(HEADERS)
 	$$(CC) $$(CFLAGS) \
 		-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \
 		-I src/curve_$(1) -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \
 		-I $(BUILD_H)/curve_$(1) -I $(BUILD_H)/$(2) -I $(BUILD_H)/$(2)/$$(ARCH_FOR_$(2)) \
 		-S -c -o $$@ $$<

 $$(BUILD_ASM)/decaf_$(1).s: src/decaf.c $$(HEADERS)
 	$$(CC) $$(CFLAGS) \
 		-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \
 		-I $(BUILD_H)/curve_$(1) -I $(BUILD_H)/$(2) -I $(BUILD_H)/$(2)/$$(ARCH_FOR_$(2)) \
 		-S -c -o $$@ $$<

 $$(BUILD_ASM)/decaf_crypto_$(1).s: src/decaf_crypto.c $$(HEADERS)
 	$$(CC) $$(CFLAGS) \
 		-I src/curve_$(1)/ \
 		-I $(BUILD_H)/curve_$(1) \
 		-S -c -o $$@ $$<

 LIBCOMPONENTS += $$(BUILD_OBJ)/decaf_$(1).o $$(BUILD_OBJ)/decaf_tables_$(1).o
--- a/src/gen_headers/crypto_h.py
+++ b/src/gen_headers/crypto_h.py
@@ -1,6 +1,8 @@
 from gen_file import gen_file

 crypto_h = gen_file(
    public = True,
    per = "curve",
    name = "decaf/crypto_%(shortname)s.h",
    doc = """
        Example Decaf crypto routines.
--- a/src/gen_headers/crypto_hxx.py
+++ b/src/gen_headers/crypto_hxx.py
@@ -1,6 +1,8 @@
 from gen_file import gen_file

 crypto_hxx = gen_file(
    public = True,
    per = "curve",
    name = "decaf/crypto_%(shortname)s.hxx",
    doc = """
        Example Decaf cyrpto routines, C++ wrapper.
--- a/src/gen_headers/curve_data.py
+++ b/src/gen_headers/curve_data.py
@@ -1,3 +1,20 @@
 field_data = {
    "p25519" : {
        "gf_desc" : "2^255 - 19",
        "modulus" : 2**255 - 19,
        "gf_shortname" : "25519",
        "gf_impl_bits" : 320,
        "gf_lit_limb_bits" : 51
    },
    "p448" : {
        "gf_desc" : "2^448 - 2^224 - 1",
        "modulus" : 2**448 - 2**224 - 1,
        "gf_shortname" : "448",
        "gf_impl_bits" : 512,
        "gf_lit_limb_bits" : 56
    }
 }

 curve_data = {
    "Curve25519" : {
        "iso_to" : "Curve25519",
@@ -7,9 +24,8 @@ curve_data = {
        "longnum" : "25519",
        "c_ns" : "decaf_255",
        "cofactor" : 8,
        "modulus" : 2**255 - 19,
        "scalar_bits" : 253,
        "gf_bits" : 320
        "field" : "p25519",
        "scalar_bits" : 253
    },
    "Ed448" : {
        "iso_to" : "Ed448-Goldilocks",
@@ -19,9 +35,8 @@ curve_data = {
        "longnum" : "448",
        "c_ns" : "decaf_448",
        "cofactor" : 4,
        "modulus" : 2**448 - 2**224 - 1,
        "scalar_bits" : 446,
        "gf_bits" : 512
        "field" : "p448",
        "scalar_bits" : 446
    }
 }

@@ -33,7 +48,15 @@ def ceil_log2(x):
        out += 1
    return out

 for field,data in field_data.iteritems():
    if "gf_bits" not in data:
        data["gf_bits"] = ceil_log2(data["modulus"])

 for curve,data in curve_data.iteritems():
    for key in field_data[data["field"]]:
        if key not in data:
            data[key] = field_data[data["field"]][key]
    
    if "modulus_type" not in data:
        mod = data["modulus"]
        ptwo = 2
--- a/src/gen_headers/decaf_h.py
+++ b/src/gen_headers/decaf_h.py
@@ -1,6 +1,8 @@
 from gen_file import gen_file

 decaf_h = gen_file(
    public = True,
    per = "curve",
    name = "decaf/%(c_ns)s.h",
    doc = """@brief A group of prime order p, based on %(iso_to)s.""",
    code = """
@@ -11,7 +13,7 @@ extern "C" {
 #endif

 /** @cond internal */
 #define %(C_NS)s_LIMBS (%(gf_bits)d/DECAF_WORD_BITS)
 #define %(C_NS)s_LIMBS (%(gf_impl_bits)d/DECAF_WORD_BITS)
 #define %(C_NS)s_SCALAR_LIMBS ((%(scalar_bits)d-1)/DECAF_WORD_BITS+1)
 /** @endcond */

--- a/src/gen_headers/decaf_hxx.py
+++ b/src/gen_headers/decaf_hxx.py
@@ -1,6 +1,8 @@
 from gen_file import gen_file

 decaf_hxx = gen_file(
    public = True,
    per = "curve",
    name = "decaf/%(c_ns)s.hxx",
    doc = """
        A group of prime order p, C++ wrapper.
--- a/src/gen_headers/f_field_h.py
+++ b/src/gen_headers/f_field_h.py
@@ -0,0 +1,30 @@
 from gen_file import gen_file

 f_field_h = gen_file(
    public = False,
    per = "field",
    name = "p%(gf_shortname)s/f_field.h",
    doc = """@brief Field-specific code for %(gf_desc)s.""",
    code = """
 #include "constant_time.h"
 #include <string.h>

 #include "f_impl.h"
 #define GF_LIT_LIMB_BITS  %(gf_lit_limb_bits)d
 #define GF_BITS           %(gf_bits)d
 #define gf                gf_%(gf_shortname)s_t
 #define gf_s              gf_%(gf_shortname)s_s
 #define gf_mul            gf_%(gf_shortname)s_mul
 #define gf_sqr            gf_%(gf_shortname)s_sqr
 #define gf_add_RAW        gf_%(gf_shortname)s_add_RAW
 #define gf_sub_RAW        gf_%(gf_shortname)s_sub_RAW
 #define gf_mulw           gf_%(gf_shortname)s_mulw
 #define gf_bias           gf_%(gf_shortname)s_bias
 #define gf_isr            gf_%(gf_shortname)s_isr
 #define gf_weak_reduce    gf_%(gf_shortname)s_weak_reduce
 #define gf_strong_reduce  gf_%(gf_shortname)s_strong_reduce
 #define gf_serialize      gf_%(gf_shortname)s_serialize
 #define gf_deserialize    gf_%(gf_shortname)s_deserialize

 #define SQRT_MINUS_ONE    P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */
 """)
--- a/src/gen_headers/gen_file.py
+++ b/src/gen_headers/gen_file.py
@@ -1,4 +1,4 @@
 from curve_data import curve_data
 from curve_data import curve_data, field_data
 from textwrap import dedent

 def redoc(filename,doc,author):
@@ -22,10 +22,12 @@ def redoc(filename,doc,author):

 gend_files = {}

 def gen_file(name,doc,code,author="Mike Hamburg"):
 per_map = {"field":field_data, "curve":curve_data, "global":{"global":{}} }

 def gen_file(public,name,doc,code,per="global",author="Mike Hamburg"):
    is_header = name.endswith(".h") or name.endswith(".hxx") or name.endswith(".h++")
    
    for curve,data in curve_data.iteritems():
    for curve,data in per_map[per].iteritems():
        ns_name = name % data
        
        _,_,name_base = ns_name.rpartition("/")
@@ -44,8 +46,5 @@ def gen_file(name,doc,code,author="Mike Hamburg"):
                #endif /* %(header_guard)s */
                """) % { "header_guard" : header_guard, "code": ns_code }
        ret += ns_code[1:-1]
        gend_files[ns_name] = ret
        
        if ns_name == name:
            # It's not namespaced
            break
        gend_files[ns_name] = (public,ret)
--- a/src/gen_headers/main.py
+++ b/src/gen_headers/main.py
@@ -5,16 +5,18 @@ import argparse
 import re

 parser = argparse.ArgumentParser(description='Generate Decaf headers and other such files.')
 parser.add_argument('--hpre', required = True, help = "Where to put the header files")
 parser.add_argument('--hpre', required = True, help = "Where to put the public header files")
 parser.add_argument('--ihpre', required = True, help = "Where to put the internal header files")
 parser.add_argument('--cpre', required = True, help = "Where to put the C/C++ implementation files")
 args = parser.parse_args()

 prefixes = { "h" : args.hpre, "hxx" : args.hpre, "c" : args.cpre }
 prefixes = { (True,"h") : args.hpre, (True,"hxx") : args.hpre, (False,"c") : args.cpre, (False,"h") : args.ihpre }

 from decaf_hxx import decaf_hxx
 from decaf_h import decaf_h
 from crypto_h import crypto_h
 from crypto_hxx import crypto_hxx
 from f_field_h import f_field_h

 root_hxx_code = "\n".join((
    "#include <%s>" % name
@@ -22,6 +24,8 @@ root_hxx_code = "\n".join((
    if re.match("^decaf/decaf_\d+.hxx$",name)
 ))
 decaf_root_hxx = gen_file(
    public = True,
    per = "global",
    name = "decaf.hxx",
    doc = """@brief Decaf curve metaheader.""",
    code = "\n"+root_hxx_code+"\n"
@@ -33,6 +37,8 @@ crypto_h_code = "\n".join((
    if re.match("^decaf/crypto_\d+.h$",name)
 ))
 crypto_h = gen_file(
    public = True,
    per = "global",
    name = "decaf/crypto.h",
    doc = """
        Example Decaf crypto routines, metaheader.
@@ -49,6 +55,8 @@ crypto_hxx_code = "\n".join((
    if re.match("^decaf/crypto_\d+.hxx$",name)
 ))
 crypto_hxx = gen_file(
    public = True,
    per = "global",
    name = "decaf/crypto.hxx",
    doc = """
        Example Decaf crypto routines, C++, metaheader.
@@ -65,6 +73,8 @@ root_h_code = "\n".join((
    if re.match("^decaf/decaf_\d+.h$",name)
 ))
 decaf_root_hxx = gen_file(
    public = True,
    per = "global",
    name = "decaf.h",
    doc = """
        Master header for Decaf library.
@@ -84,9 +94,9 @@ decaf_root_hxx = gen_file(
 )


 for name,code in gend_files.iteritems():        
 for name,(public,code) in gend_files.iteritems():        
    _,_,name_suffix = name.partition(".")
    prefix = prefixes[name_suffix]
    prefix = prefixes[(public,name_suffix)]
    if not os.path.exists(os.path.dirname(prefix + "/" + name)):
        os.makedirs(os.path.dirname(prefix + "/" + name))
    with open(prefix + "/" + name,"w") as f:
--- a/src/p25519/f_field.h
+++ b/src/p25519/f_field.h
@@ -1,33 +0,0 @@
 /**
 * @file f_field.h
 * @brief Field-specific code.
 * @copyright
 *   Copyright (c) 2014 Cryptography Research, Inc.  \n
 *   Released under the MIT License.  See LICENSE.txt for license information.
 * @author Mike Hamburg
 */
 #ifndef __F_FIELD_H__
 #define __F_FIELD_H__ 1

 #include "constant_time.h"
 #include <string.h>

 #include "f_impl.h"
 #define GF_LIT_LIMB_BITS  51
 #define GF_BITS           255
 #define gf                gf_25519_t
 #define gf_s              gf_25519_s
 #define gf_mul            gf_25519_mul
 #define gf_sqr            gf_25519_sqr
 #define gf_add_RAW        gf_25519_add_RAW
 #define gf_sub_RAW        gf_25519_sub_RAW
 #define gf_mulw           gf_25519_mulw
 #define gf_bias           gf_25519_bias
 #define gf_isr            gf_25519_isr
 #define gf_weak_reduce    gf_25519_weak_reduce
 #define gf_strong_reduce  gf_25519_strong_reduce
 #define gf_serialize      gf_25519_serialize
 #define gf_deserialize    gf_25519_deserialize
 #define SQRT_MINUS_ONE    P25519_SQRT_MINUS_ONE

 #endif /* __F_FIELD_H__ */
--- a/src/p448/arch_32/f_impl.c
+++ b/src/p448/arch_32/f_impl.c
@@ -22,7 +22,7 @@ static uint64_t widemul_32 (
 }

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    const gf_448_t bs
@@ -83,7 +83,7 @@ p448_mul (
 }

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    uint64_t b
@@ -127,15 +127,15 @@ p448_mulw (
 }

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ cs,
    const gf_448_t as
 ) {
    p448_mul(cs,as,as); /* PERF */
    gf_448_mul(cs,as,as); /* PERF */
 }

 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t a
 ) {
    word_t mask = (1ull<<28)-1;
@@ -178,14 +178,14 @@ p448_strong_reduce (
 }

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 ) {
    int i,j;
    gf_448_t red;
    p448_copy(red, x);
    p448_strong_reduce(red);
    gf_448_copy(red, x);
    gf_448_strong_reduce(red);
    for (i=0; i<8; i++) {
        uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
        for (j=0; j<7; j++) {
@@ -197,7 +197,7 @@ p448_serialize (
 }

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 ) {
--- a/src/p448/arch_32/f_impl.h
+++ b/src/p448/arch_32/f_impl.h
@@ -23,69 +23,69 @@ extern "C" {
 #endif

 static __inline__ void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t inout
 ) __attribute__((unused,always_inline));
             
 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t inout
 );
             
 static __inline__ void
 p448_bias (
 gf_448_bias (
    gf_448_t inout,
    int amount
 ) __attribute__((unused,always_inline));

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    const gf_448_t b
 );

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    uint64_t b
 );

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ out,
    const gf_448_t a
 );

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 );

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 );
@@ -93,7 +93,7 @@ p448_deserialize (
 /* -------------- Inline functions begin here -------------- */

 void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -111,7 +111,7 @@ p448_add_RAW (
 }

 void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -129,7 +129,7 @@ p448_sub_RAW (
 }

 void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) {
@@ -137,7 +137,7 @@ p448_copy (
 }

 void
 p448_bias (
 gf_448_bias (
    gf_448_t a,
    int amt
 ) {
@@ -151,7 +151,7 @@ p448_bias (
 }

 void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t a
 ) {
    uint64_t mask = (1ull<<28) - 1;
--- a/src/p448/arch_arm_32/f_impl.c
+++ b/src/p448/arch_arm_32/f_impl.c
@@ -99,7 +99,7 @@ smull2 (
 }

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    const gf_448_t bs
@@ -450,7 +450,7 @@ p448_mul (
 }

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ cs,
    const gf_448_t as
 ) {
@@ -748,7 +748,7 @@ p448_sqr (
 }

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    uint64_t b
@@ -862,7 +862,7 @@ p448_mulw (
 }

 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t a
 ) {
    word_t mask = (1ull<<28)-1;
@@ -905,14 +905,14 @@ p448_strong_reduce (
 }

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 ) {
    int i,j;
    gf_448_t red;
    p448_copy(red, x);
    p448_strong_reduce(red);
    gf_448_copy(red, x);
    gf_448_strong_reduce(red);
    for (i=0; i<8; i++) {
        uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
        for (j=0; j<7; j++) {
@@ -924,7 +924,7 @@ p448_serialize (
 }

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 ) {
--- a/src/p448/arch_arm_32/f_impl.h
+++ b/src/p448/arch_arm_32/f_impl.h
@@ -23,69 +23,69 @@ extern "C" {
 #endif

 static __inline__ void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t inout
 ) __attribute__((unused,always_inline));
             
 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t inout
 );
             
 static __inline__ void
 p448_bias (
 gf_448_bias (
    gf_448_t inout,
    int amount
 ) __attribute__((unused,always_inline));

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    const gf_448_t b
 );

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    uint64_t b
 );

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ out,
    const gf_448_t a
 );

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 );

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 );
@@ -93,7 +93,7 @@ p448_deserialize (
 /* -------------- Inline functions begin here -------------- */

 void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -111,7 +111,7 @@ p448_add_RAW (
 }

 void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -129,7 +129,7 @@ p448_sub_RAW (
 }

 void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) {
@@ -137,7 +137,7 @@ p448_copy (
 }

 void
 p448_bias (
 gf_448_bias (
    gf_448_t a,
    int amt
 ) {
@@ -151,7 +151,7 @@ p448_bias (
 }

 void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t a
 ) {
    uint64_t mask = (1ull<<28) - 1;
--- a/src/p448/arch_neon_experimental/f_impl.c
+++ b/src/p448/arch_neon_experimental/f_impl.c
@@ -69,7 +69,7 @@ smull2 (
 }

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    const gf_448_t bs
@@ -368,7 +368,7 @@ p448_mul (
 }

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ cs,
    const gf_448_t bs
 ) {
@@ -569,7 +569,7 @@ p448_sqr (
 }

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    uint64_t b
@@ -620,7 +620,7 @@ p448_mulw (

 /* PERF: vectorize? */
 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t a
 ) { 
    word_t mask = (1ull<<28)-1;
@@ -663,14 +663,14 @@ p448_strong_reduce (
 }

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 ) {
    int i,j;
    gf_448_t red;
    p448_copy(red, x);
    p448_strong_reduce(red);
    gf_448_copy(red, x);
    gf_448_strong_reduce(red);
    
    for (i=0; i<8; i++) {
        uint64_t limb = red->limb[LIMBPERM(2*i)] + (((uint64_t)red->limb[LIMBPERM(2*i+1)])<<28);
@@ -683,7 +683,7 @@ p448_serialize (
 }

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 ) {
--- a/src/p448/arch_neon_experimental/f_impl.h
+++ b/src/p448/arch_neon_experimental/f_impl.h
@@ -29,69 +29,69 @@ extern "C" {
 #endif

 static __inline__ void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t inout
 ) __attribute__((unused,always_inline));
             
 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t inout
 );
             
 static __inline__ void
 p448_bias (
 gf_448_bias (
    gf_448_t inout,
    int amount
 ) __attribute__((unused,always_inline));

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    const gf_448_t b
 );

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    uint64_t b
 );

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ out,
    const gf_448_t a
 );

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 );

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 );
@@ -99,7 +99,7 @@ p448_deserialize (
 /* -------------- Inline functions begin here -------------- */

 void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -111,7 +111,7 @@ p448_add_RAW (
 }

 void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -129,7 +129,7 @@ p448_sub_RAW (
 }

 void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) {
@@ -137,7 +137,7 @@ p448_copy (
 }

 void
 p448_bias (
 gf_448_bias (
    gf_448_t a,
    int amt
 ) {
@@ -151,7 +151,7 @@ p448_bias (
 }

 void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t a
 ) {

--- a/src/p448/arch_ref64/f_impl.c
+++ b/src/p448/arch_ref64/f_impl.c
@@ -17,7 +17,7 @@ static __inline__ uint64_t is_zero(uint64_t a) {
 }

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    const gf_448_t bs
@@ -183,7 +183,7 @@ p448_mul (
 }

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    uint64_t b
@@ -212,7 +212,7 @@ p448_mulw (
 }

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ cs,
    const gf_448_t as
 ) {
@@ -327,7 +327,7 @@ p448_sqr (
 }

 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t a
 ) {
    uint64_t mask = (1ull<<56)-1;
@@ -370,14 +370,14 @@ p448_strong_reduce (
 }

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 ) {
    int i,j;
    gf_448_t red;
    p448_copy(red, x);
    p448_strong_reduce(red);
    gf_448_copy(red, x);
    gf_448_strong_reduce(red);
    for (i=0; i<8; i++) {
        for (j=0; j<7; j++) {
            serial[7*i+j] = red->limb[i];
@@ -388,7 +388,7 @@ p448_serialize (
 }

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 ) {
--- a/src/p448/arch_ref64/f_impl.h
+++ b/src/p448/arch_ref64/f_impl.h
@@ -22,69 +22,69 @@ extern "C" {
 #endif

 static __inline__ void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused));
             
 static __inline__ void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused));
             
 static __inline__ void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) __attribute__((unused));
             
 static __inline__ void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t inout
 ) __attribute__((unused));
             
 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t inout
 );

 static __inline__ void
 p448_bias (
 gf_448_bias (
    gf_448_t inout,
    int amount
 ) __attribute__((unused));
         
 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    const gf_448_t b
 );

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    uint64_t b
 );

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ out,
    const gf_448_t a
 );

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 );

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 );
@@ -92,7 +92,7 @@ p448_deserialize (
 /* -------------- Inline functions begin here -------------- */

 void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -101,11 +101,11 @@ p448_add_RAW (
    for (i=0; i<8; i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
    }
    p448_weak_reduce(out);
    gf_448_weak_reduce(out);
 }

 void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -115,11 +115,11 @@ p448_sub_RAW (
    for (i=0; i<8; i++) {
        out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
    }
    p448_weak_reduce(out);
    gf_448_weak_reduce(out);
 }

 void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) {
@@ -127,7 +127,7 @@ p448_copy (
 }

 void
 p448_bias (
 gf_448_bias (
    gf_448_t a,
    int amt
 ) {
@@ -136,7 +136,7 @@ p448_bias (
 }

 void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t a
 ) {
    uint64_t mask = (1ull<<56) - 1;
--- a/src/p448/arch_x86_64/f_impl.c
+++ b/src/p448/arch_x86_64/f_impl.c
@@ -6,7 +6,7 @@
 #include "x86-64-arith.h"

 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    const gf_448_t bs
@@ -146,7 +146,7 @@ p448_mul (
 }

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ cs,
    const gf_448_t as,
    uint64_t b
@@ -191,7 +191,7 @@ p448_mulw (
 }

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ cs,
    const gf_448_t as
 ) {
@@ -306,7 +306,7 @@ p448_sqr (
 }

 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t a
 ) {
    uint64_t mask = (1ull<<56)-1;
@@ -349,14 +349,14 @@ p448_strong_reduce (
 }

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 ) {
    int i,j;
    gf_448_t red;
    p448_copy(red, x);
    p448_strong_reduce(red);
    gf_448_copy(red, x);
    gf_448_strong_reduce(red);
    for (i=0; i<8; i++) {
        for (j=0; j<7; j++) {
            serial[7*i+j] = red->limb[i];
@@ -367,7 +367,7 @@ p448_serialize (
 }

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 ) {
--- a/src/p448/arch_x86_64/f_impl.h
+++ b/src/p448/arch_x86_64/f_impl.h
@@ -18,69 +18,69 @@ extern "C" {
 #endif

 static __inline__ void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) __attribute__((unused,always_inline));
             
 static __inline__ void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t inout
 ) __attribute__((unused,always_inline));
             
 void
 p448_strong_reduce (
 gf_448_strong_reduce (
    gf_448_t inout
 );

 static __inline__ void
 p448_bias (
 gf_448_bias (
    gf_448_t inout,
    int amount
 ) __attribute__((unused,always_inline));
         
 void
 p448_mul (
 gf_448_mul (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    const gf_448_t b
 );

 void
 p448_mulw (
 gf_448_mulw (
    gf_448_s *__restrict__ out,
    const gf_448_t a,
    uint64_t b
 );

 void
 p448_sqr (
 gf_448_sqr (
    gf_448_s *__restrict__ out,
    const gf_448_t a
 );

 void
 p448_serialize (
 gf_448_serialize (
    uint8_t *serial,
    const gf_448_t x
 );

 mask_t
 p448_deserialize (
 gf_448_deserialize (
    gf_448_t x,
    const uint8_t serial[56]
 );
@@ -88,7 +88,7 @@ p448_deserialize (
 /* -------------- Inline functions begin here -------------- */

 void
 p448_add_RAW (
 gf_448_add_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -106,7 +106,7 @@ p448_add_RAW (
 }

 void
 p448_sub_RAW (
 gf_448_sub_RAW (
    gf_448_t out,
    const gf_448_t a,
    const gf_448_t b
@@ -124,7 +124,7 @@ p448_sub_RAW (
 }

 void
 p448_copy (
 gf_448_copy (
    gf_448_t out,
    const gf_448_t a
 ) {
@@ -135,7 +135,7 @@ p448_copy (
 }

 void
 p448_bias (
 gf_448_bias (
    gf_448_t a,
    int amt
 ) {
@@ -162,7 +162,7 @@ p448_bias (
 }

 void
 p448_weak_reduce (
 gf_448_weak_reduce (
    gf_448_t a
 ) {
    /* PERF: use pshufb/palignr if anyone cares about speed of this */
--- a/src/p448/f_field.h
+++ b/src/p448/f_field.h
@@ -1,32 +0,0 @@
 /**
 * @file f_field.h
 * @brief Field-specific code.
 * @copyright
 *   Copyright (c) 2014 Cryptography Research, Inc.  \n
 *   Released under the MIT License.  See LICENSE.txt for license information.
 * @author Mike Hamburg
 */
 #ifndef __F_FIELD_H__
 #define __F_FIELD_H__ 1

 #include "constant_time.h"
 #include <string.h>

 #include "f_impl.h"
 #define GF_LIT_LIMB_BITS  56
 #define GF_BITS           448
 #define gf                gf_448_t
 #define gf_s              gf_448_s
 #define gf_mul            p448_mul
 #define gf_sqr            p448_sqr
 #define gf_add_RAW        p448_add_RAW
 #define gf_sub_RAW        p448_sub_RAW
 #define gf_mulw           p448_mulw
 #define gf_bias           p448_bias
 #define gf_isr            p448_isr
 #define gf_weak_reduce    p448_weak_reduce
 #define gf_strong_reduce  p448_strong_reduce
 #define gf_serialize      p448_serialize
 #define gf_deserialize    p448_deserialize

 #endif /* __F_FIELD_H__ */
--- a/src/p480/f_field.h
+++ b/src/p480/f_field.h
@@ -1,31 +0,0 @@
 /**
 * @file f_field.h
 * @brief Field-specific code.
 * @copyright
 *   Copyright (c) 2014 Cryptography Research, Inc.  \n
 *   Released under the MIT License.  See LICENSE.txt for license information.
 * @author Mike Hamburg
 */
 #ifndef __F_FIELD_H__
 #define __F_FIELD_H__ 1

 #include "constant_time.h"
 #include <string.h>

 #include "f_impl.h"
 #define GF_LIT_LIMB_BITS  60
 #define GF_BITS           480
 #define gf              p480_t
 #define gf_mul            p480_mul
 #define gf_sqr            p480_sqr
 #define gf_add_RAW        p480_add_RAW
 #define gf_sub_RAW        p480_sub_RAW
 #define gf_mulw           p480_mulw
 #define gf_bias           p480_bias
 #define gf_isr            p480_isr
 #define gf_weak_reduce    p480_weak_reduce
 #define gf_strong_reduce  p480_strong_reduce
 #define gf_serialize      p480_serialize
 #define gf_deserialize    p480_deserialize

 #endif /* __F_FIELD_H__ */
--- a/src/p521/f_field.h
+++ b/src/p521/f_field.h
@@ -1,31 +0,0 @@
 /**
 * @file f_field.h
 * @brief Field-specific code.
 * @copyright
 *   Copyright (c) 2014 Cryptography Research, Inc.  \n
 *   Released under the MIT License.  See LICENSE.txt for license information.
 * @author Mike Hamburg
 */
 #ifndef __F_FIELD_H__
 #define __F_FIELD_H__ 1

 #include <string.h>
 #include "constant_time.h"

 #include "f_impl.h"
 #define GF_LIT_LIMB_BITS  58
 #define GF_BITS           521
 #define gf              p521_t
 #define gf_mul            p521_mul
 #define gf_sqr            p521_sqr
 #define gf_add_RAW        p521_add_RAW
 #define gf_sub_RAW        p521_sub_RAW
 #define gf_mulw           p521_mulw
 #define gf_bias           p521_bias
 #define gf_isr            p521_isr
 #define gf_weak_reduce    p521_weak_reduce
 #define gf_strong_reduce  p521_strong_reduce
 #define gf_serialize      p521_serialize
 #define gf_deserialize    p521_deserialize

 #endif /* __F_FIELD_H__ */