further reduce the code in f_impl.h

8 lat temu · 233f8453c7
--- a/src/gen_headers/f_field_h.py
+++ b/src/gen_headers/f_field_h.py
@@ -8,7 +8,7 @@ f_field_h = gen_file(
    code = """
 #include "constant_time.h"
 #include <string.h>

 #include <assert.h>

 #include "decaf/decaf_%(gf_bits)s.h" /* HACK in genheader */
 #include "word.h"
--- a/src/p25519/arch_ref64/f_impl.h
+++ b/src/p25519/arch_ref64/f_impl.h
@@ -1,23 +1,9 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P25519_H__
 #define __P25519_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>
 #include <string.h>

 #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<5; i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
@@ -47,8 +33,3 @@ void gf_weak_reduce (gf a) {
    a->limb[0] = (a->limb[0] & mask) + tmp*19;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P25519_H__ */
--- a/src/p25519/arch_x86_64/f_impl.h
+++ b/src/p25519/arch_x86_64/f_impl.h
@@ -1,19 +1,9 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P25519_H__
 #define __P25519_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>
 #include <string.h>

 #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<5; i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
@@ -41,9 +31,3 @@ void gf_weak_reduce (gf a) {
    }
    a->limb[0] = (a->limb[0] & mask) + tmp*19;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P25519_H__ */
--- a/src/p448/arch_32/f_impl.h
+++ b/src/p448/arch_32/f_impl.h
@@ -1,24 +1,11 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P448_H__
 #define __P448_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>

 #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
    {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
@@ -63,8 +50,3 @@ void gf_weak_reduce (gf a) {
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P448_H__ */
--- a/src/p448/arch_arm_32/f_impl.h
+++ b/src/p448/arch_arm_32/f_impl.h
@@ -1,24 +1,11 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P448_H__
 #define __P448_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>

 #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
    {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
@@ -61,8 +48,3 @@ void gf_weak_reduce (gf a) {
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P448_H__ */
--- a/src/p448/arch_neon_experimental/f_impl.h
+++ b/src/p448/arch_neon_experimental/f_impl.h
@@ -1,13 +1,6 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P448_H__
 #define __P448_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>

 #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
 #define USE_NEON_PERM 1
@@ -19,12 +12,6 @@
      LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
      LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}

 #ifdef __cplusplus
 extern "C" {
 #endif
    
 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
@@ -64,8 +51,3 @@ void gf_weak_reduce (gf a) {
    aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P448_H__ */
--- a/src/p448/arch_ref64/f_impl.h
+++ b/src/p448/arch_ref64/f_impl.h
@@ -1,31 +1,17 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P448_H__
 #define __P448_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>
 #include <string.h>

 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf  out, const gf  a, const gf  b) {
 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<8; i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
    }
    gf_weak_reduce(out);
 }

 void gf_sub_RAW (gf  out, const gf  a, const gf  b) {
 void gf_sub_RAW (gf out, const gf a, const gf b) {
    uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
    for (unsigned int i=0; i<8; i++) {
        out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
@@ -33,12 +19,12 @@ void gf_sub_RAW (gf  out, const gf  a, const gf  b) {
    gf_weak_reduce(out);
 }

 void gf_bias (gf  a, int amt) {
 void gf_bias (gf a, int amt) {
    (void) a;
    (void) amt;
 }

 void gf_weak_reduce (gf  a) {
 void gf_weak_reduce (gf a) {
    uint64_t mask = (1ull<<56) - 1;
    uint64_t tmp = a->limb[7] >> 56;
    a->limb[4] += tmp;
@@ -47,9 +33,3 @@ void gf_weak_reduce (gf  a) {
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P448_H__ */
--- a/src/p448/arch_x86_64/f_impl.h
+++ b/src/p448/arch_x86_64/f_impl.h
@@ -1,23 +1,10 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P448_H__
 #define __P448_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>

 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf  out, const gf  a, const gf  b) {
 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
    }
@@ -29,7 +16,7 @@ void gf_add_RAW (gf  out, const gf  a, const gf  b) {
    */
 }

 void gf_sub_RAW (gf  out, const gf  a, const gf  b) {
 void gf_sub_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
    }
@@ -41,7 +28,7 @@ void gf_sub_RAW (gf  out, const gf  a, const gf  b) {
    */
 }

 void gf_bias (gf  a, int amt) {
 void gf_bias (gf a, int amt) {
    uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
    
 #if __AVX2__
@@ -63,7 +50,7 @@ void gf_bias (gf  a, int amt) {
 #endif
 }

 void gf_weak_reduce (gf  a) {
 void gf_weak_reduce (gf a) {
    /* PERF: use pshufb/palignr if anyone cares about speed of this */
    uint64_t mask = (1ull<<56) - 1;
    uint64_t tmp = a->limb[7] >> 56;
@@ -74,8 +61,3 @@ void gf_weak_reduce (gf  a) {
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P448_H__ */
--- a/src/p480/arch_x86_64/f_impl.h
+++ b/src/p480/arch_x86_64/f_impl.h
@@ -1,23 +1,8 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __gf_H__
 #define __gf_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>

 #include "word.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf  *out, const gf  *a, const gf  *b) {
 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
    }
@@ -29,7 +14,7 @@ void gf_add_RAW (gf  *out, const gf  *a, const gf  *b) {
    */
 }

 void gf_sub_RAW (gf  *out, const gf  *a, const gf  *b) {
 void gf_sub_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
    }
@@ -41,14 +26,14 @@ void gf_sub_RAW (gf  *out, const gf  *a, const gf  *b) {
    */
 }

 void gf_copy (gf  *out, const gf  *a) {
 void gf_copy (gf out, const gf a) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
        ((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
    }
 }

 void gf_bias (
    gf  *a, int amt
    gf a, int amt
 ) {
    uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt;
    
@@ -71,7 +56,7 @@ void gf_bias (
 #endif
 }

 void gf_weak_reduce (gf  *a) {
 void gf_weak_reduce (gf a) {
    /* PERF: use pshufb/palignr if anyone cares about speed of this */
    uint64_t mask = (1ull<<60) - 1;
    uint64_t tmp = a->limb[7] >> 60;
@@ -81,9 +66,3 @@ void gf_weak_reduce (gf  *a) {
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __gf_H__ */
--- a/src/p521/arch_ref64/f_impl.h
+++ b/src/p521/arch_ref64/f_impl.h
@@ -1,29 +1,15 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P521_H__
 #define __P521_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>
 #include <string.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 void gf_add_RAW (gf  *out, const gf  *a, const gf  *b) {
 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<9; i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
    }
    gf_weak_reduce(out);
 }

 void gf_sub_RAW (gf  *out, const gf  *a, const gf  *b) {
 void gf_sub_RAW (gf out, const gf a, const gf b) {
    uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4;
    for (unsigned int i=0; i<9; i++) {
        out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1);
@@ -31,12 +17,12 @@ void gf_sub_RAW (gf  *out, const gf  *a, const gf  *b) {
    gf_weak_reduce(out);
 }

 void gf_bias (gf *a, int amt) {
 void gf_bias (gf a, int amt) {
    (void) a;
    (void) amt;
 }

 void gf_weak_reduce (gf  *a) {
 void gf_weak_reduce (gf a) {
    uint64_t mask = (1ull<<58) - 1;
    uint64_t tmp = a->limb[8] >> 57;
    for (unsigned int i=8; i>0; i--) {
@@ -44,9 +30,3 @@ void gf_weak_reduce (gf  *a) {
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P521_H__ */
--- a/src/p521/arch_x86_64_r12/f_impl.h
+++ b/src/p521/arch_x86_64_r12/f_impl.h
@@ -1,27 +1,11 @@
 /* Copyright (c) 2014 Cryptography Research, Inc.
 /* Copyright (c) 2014-2016 Cryptography Research, Inc.
 * Released under the MIT License.  See LICENSE.txt for license information.
 */
 #ifndef __P521_H__
 #define __P521_H__ 1

 #include "f_field.h"

 #include <stdint.h>
 #include <assert.h>
 #include <string.h>

 #include "constant_time.h"

 /* FIXME: Currenmtlty desn't work at all, because the struct is declared [9] and not [12] */
 /* FIXME: Currently this file desn't work at all, because the struct is declared [9] and not [12] */
 #define LIMBPERM(x) (((x)%3)*4 + (x)/3)
 #define USE_P521_3x3_TRANSPOSE

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* -------------- Inline functions begin here -------------- */

 typedef uint64x4_t uint64x3_t; /* fit it in a vector register */

 static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 };
@@ -31,19 +15,19 @@ static inline uint64x3_t timesW (uint64x3_t u) {
    return u.zxyw + u.zwww;
 }

 void gf_add_RAW (gf  *out, const gf  *a, const gf  *b) {
 void gf_add_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
    }
 }

 void gf_sub_RAW (gf  *out, const gf  *a, const gf  *b) {
 void gf_sub_RAW (gf out, const gf a, const gf b) {
    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
    }
 }

 void gf_bias (gf  *a, int amt) {
 void gf_bias (gf a, int amt) {
    uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt;
    uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 };
    ((uint64x4_t*)a)[0] += vlo;
@@ -51,7 +35,7 @@ void gf_bias (gf  *a, int amt) {
    ((uint64x4_t*)a)[2] += vhi;
 }

 void gf_weak_reduce (gf  *a) {
 void gf_weak_reduce (gf a) {
 #if 0
    int i;
    assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0);
@@ -72,9 +56,3 @@ void gf_weak_reduce (gf  *a) {
    ((uint64x4_t*)a)[1] = out1;
    ((uint64x4_t*)a)[2] = out2;
 }

 #ifdef __cplusplus
 }; /* extern "C" */
 #endif

 #endif /* __P521_H__ */