@@ -4,14 +4,12 @@ | |||||
#ifndef __P25519_H__ | #ifndef __P25519_H__ | ||||
#define __P25519_H__ 1 | #define __P25519_H__ 1 | ||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "decaf/decaf_255.h" | |||||
#include "word.h" | |||||
#define LBITS 51 | |||||
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
@@ -20,54 +18,29 @@ extern "C" { | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_25519_add_RAW ( | |||||
gf_25519_t out, | |||||
const gf_25519_t a, | |||||
const gf_25519_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<5; i++) { | for (i=0; i<5; i++) { | ||||
out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
} | } | ||||
gf_25519_weak_reduce(out); | |||||
gf_weak_reduce(out); | |||||
} | } | ||||
void | |||||
gf_25519_sub_RAW ( | |||||
gf_25519_t out, | |||||
const gf_25519_t a, | |||||
const gf_25519_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; | uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; | ||||
for (i=0; i<5; i++) { | for (i=0; i<5; i++) { | ||||
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1); | out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1); | ||||
} | } | ||||
gf_25519_weak_reduce(out); | |||||
} | |||||
void | |||||
gf_25519_copy ( | |||||
gf_25519_t out, | |||||
const gf_25519_t a | |||||
) { | |||||
memcpy(out,a,sizeof(*a)); | |||||
gf_weak_reduce(out); | |||||
} | } | ||||
void | |||||
gf_25519_bias ( | |||||
gf_25519_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
(void) a; | (void) a; | ||||
(void) amt; | (void) amt; | ||||
} | } | ||||
void | |||||
gf_25519_weak_reduce ( | |||||
gf_25519_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
uint64_t mask = (1ull<<51) - 1; | uint64_t mask = (1ull<<51) - 1; | ||||
uint64_t tmp = a->limb[4] >> 51; | uint64_t tmp = a->limb[4] >> 51; | ||||
int i; | int i; | ||||
@@ -4,36 +4,24 @@ | |||||
#ifndef __P25519_H__ | #ifndef __P25519_H__ | ||||
#define __P25519_H__ 1 | #define __P25519_H__ 1 | ||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "decaf/decaf_255.h" | |||||
#include "word.h" | |||||
#define DECAF_255_LIMB_BITS 51 | |||||
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | ||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_25519_add_RAW ( | |||||
gf_25519_t out, | |||||
const gf_25519_t a, | |||||
const gf_25519_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<5; i++) { | for (i=0; i<5; i++) { | ||||
out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
} | } | ||||
} | } | ||||
void | |||||
gf_25519_sub_RAW ( | |||||
gf_25519_t out, | |||||
const gf_25519_t a, | |||||
const gf_25519_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; | uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; | ||||
for (i=0; i<5; i++) { | for (i=0; i<5; i++) { | ||||
@@ -41,11 +29,7 @@ gf_25519_sub_RAW ( | |||||
} | } | ||||
} | } | ||||
void | |||||
gf_25519_bias ( | |||||
gf_25519_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt; | a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt; | ||||
int i; | int i; | ||||
for (i=1; i<5; i++) { | for (i=1; i<5; i++) { | ||||
@@ -53,10 +37,7 @@ gf_25519_bias ( | |||||
} | } | ||||
} | } | ||||
void | |||||
gf_25519_weak_reduce ( | |||||
gf_25519_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
uint64_t mask = (1ull<<51) - 1; | uint64_t mask = (1ull<<51) - 1; | ||||
uint64_t tmp = a->limb[4] >> 51; | uint64_t tmp = a->limb[4] >> 51; | ||||
int i; | int i; | ||||
@@ -4,17 +4,12 @@ | |||||
#ifndef __P448_H__ | #ifndef __P448_H__ | ||||
#define __P448_H__ 1 | #define __P448_H__ 1 | ||||
#include "word.h" | |||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
typedef struct gf_448_s { | |||||
uint32_t limb[16]; | |||||
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||||
#define LBITS 28 | |||||
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS | |||||
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | |||||
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | ||||
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | ||||
@@ -24,12 +19,7 @@ extern "C" { | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_448_add_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | ||||
@@ -42,12 +32,7 @@ gf_448_add_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_sub_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | ||||
@@ -60,11 +45,7 @@ gf_448_sub_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_bias ( | |||||
gf_448_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | ||||
uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | ||||
uint32x4_t *aa = (uint32x4_t*) a; | uint32x4_t *aa = (uint32x4_t*) a; | ||||
@@ -74,10 +55,7 @@ gf_448_bias ( | |||||
aa[3] += lo; | aa[3] += lo; | ||||
} | } | ||||
void | |||||
gf_448_weak_reduce ( | |||||
gf_448_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
uint64_t mask = (1ull<<28) - 1; | uint64_t mask = (1ull<<28) - 1; | ||||
uint64_t tmp = a->limb[15] >> 28; | uint64_t tmp = a->limb[15] >> 28; | ||||
int i; | int i; | ||||
@@ -4,17 +4,12 @@ | |||||
#ifndef __P448_H__ | #ifndef __P448_H__ | ||||
#define __P448_H__ 1 | #define __P448_H__ 1 | ||||
#include "word.h" | |||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
typedef struct gf_448_s { | |||||
uint32_t limb[16]; | |||||
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||||
#define LBITS 28 | |||||
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS | |||||
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | |||||
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | ||||
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | ||||
@@ -24,12 +19,7 @@ extern "C" { | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_448_add_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | ||||
@@ -42,12 +32,7 @@ gf_448_add_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_sub_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | ||||
@@ -60,11 +45,7 @@ gf_448_sub_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_bias ( | |||||
gf_448_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | ||||
uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | ||||
uint32x4_t *aa = (uint32x4_t*) a; | uint32x4_t *aa = (uint32x4_t*) a; | ||||
@@ -74,10 +55,7 @@ gf_448_bias ( | |||||
aa[3] += lo; | aa[3] += lo; | ||||
} | } | ||||
void | |||||
gf_448_weak_reduce ( | |||||
gf_448_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
uint64_t mask = (1ull<<28) - 1; | uint64_t mask = (1ull<<28) - 1; | ||||
uint64_t tmp = a->limb[15] >> 28; | uint64_t tmp = a->limb[15] >> 28; | ||||
int i; | int i; | ||||
@@ -4,20 +4,15 @@ | |||||
#ifndef __P448_H__ | #ifndef __P448_H__ | ||||
#define __P448_H__ 1 | #define __P448_H__ 1 | ||||
#include "word.h" | |||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
typedef struct gf_448_s { | |||||
uint32_t limb[16]; | |||||
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||||
#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15) | #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15) | ||||
#define USE_NEON_PERM 1 | #define USE_NEON_PERM 1 | ||||
#define LBITS 28 | |||||
#define LIMBHI(x) ((x##ull)>>LBITS) | |||||
#define LIMBLO(x) ((x##ull)&((1ull<<LBITS)-1)) | |||||
#define LIMBHI(x) ((x##ull)>>28) | |||||
#define LIMBLO(x) ((x##ull)&((1ull<<28)-1)) | |||||
# define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | # define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | ||||
{{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \ | {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \ | ||||
LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ | LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ | ||||
@@ -30,24 +25,14 @@ extern "C" { | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_448_add_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | ||||
} | } | ||||
} | } | ||||
void | |||||
gf_448_sub_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | ||||
@@ -60,11 +45,7 @@ gf_448_sub_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_bias ( | |||||
gf_448_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | ||||
uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1}; | uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1}; | ||||
uint32x4_t *aa = (uint32x4_t*) a; | uint32x4_t *aa = (uint32x4_t*) a; | ||||
@@ -74,10 +55,7 @@ gf_448_bias ( | |||||
aa[3] += hi; | aa[3] += hi; | ||||
} | } | ||||
void | |||||
gf_448_weak_reduce ( | |||||
gf_448_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1}, | uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1}, | ||||
tmp = vshr_n_u32(aa[7],28); | tmp = vshr_n_u32(aa[7],28); | ||||
@@ -4,17 +4,12 @@ | |||||
#ifndef __P448_H__ | #ifndef __P448_H__ | ||||
#define __P448_H__ 1 | #define __P448_H__ 1 | ||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "word.h" | |||||
typedef struct gf_448_s { | |||||
uint64_t limb[8]; | |||||
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||||
#define LBITS 56 | |||||
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
@@ -23,46 +18,29 @@ extern "C" { | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_448_add_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<8; i++) { | for (i=0; i<8; i++) { | ||||
out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
} | } | ||||
gf_448_weak_reduce(out); | |||||
gf_weak_reduce(out); | |||||
} | } | ||||
void | |||||
gf_448_sub_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; | uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; | ||||
for (i=0; i<8; i++) { | for (i=0; i<8; i++) { | ||||
out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1); | out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1); | ||||
} | } | ||||
gf_448_weak_reduce(out); | |||||
gf_weak_reduce(out); | |||||
} | } | ||||
void | |||||
gf_448_bias ( | |||||
gf_448_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
(void) a; | (void) a; | ||||
(void) amt; | (void) amt; | ||||
} | } | ||||
void | |||||
gf_448_weak_reduce ( | |||||
gf_448_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
uint64_t mask = (1ull<<56) - 1; | uint64_t mask = (1ull<<56) - 1; | ||||
uint64_t tmp = a->limb[7] >> 56; | uint64_t tmp = a->limb[7] >> 56; | ||||
int i; | int i; | ||||
@@ -4,13 +4,11 @@ | |||||
#ifndef __P448_H__ | #ifndef __P448_H__ | ||||
#define __P448_H__ 1 | #define __P448_H__ 1 | ||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include "decaf/decaf_448.h" | |||||
#include "word.h" | |||||
#define LBITS 56 | |||||
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
@@ -19,12 +17,7 @@ extern "C" { | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_448_add_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ||||
@@ -37,12 +30,7 @@ gf_448_add_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_sub_RAW ( | |||||
gf_448_t out, | |||||
const gf_448_t a, | |||||
const gf_448_t b | |||||
) { | |||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ||||
@@ -55,11 +43,7 @@ gf_448_sub_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_448_bias ( | |||||
gf_448_t a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf a, int amt) { | |||||
uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; | uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; | ||||
#if __AVX2__ | #if __AVX2__ | ||||
@@ -82,10 +66,7 @@ gf_448_bias ( | |||||
#endif | #endif | ||||
} | } | ||||
void | |||||
gf_448_weak_reduce ( | |||||
gf_448_t a | |||||
) { | |||||
void gf_weak_reduce (gf a) { | |||||
/* PERF: use pshufb/palignr if anyone cares about speed of this */ | /* PERF: use pshufb/palignr if anyone cares about speed of this */ | ||||
uint64_t mask = (1ull<<56) - 1; | uint64_t mask = (1ull<<56) - 1; | ||||
uint64_t tmp = a->limb[7] >> 56; | uint64_t tmp = a->limb[7] >> 56; | ||||
@@ -1,78 +1,23 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | /* Copyright (c) 2014 Cryptography Research, Inc. | ||||
* Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
*/ | */ | ||||
#ifndef __gf_480_H__ | |||||
#define __gf_480_H__ 1 | |||||
#ifndef __gf_H__ | |||||
#define __gf_H__ 1 | |||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include "word.h" | #include "word.h" | ||||
typedef struct gf_480_t { | |||||
uint64_t limb[8]; | |||||
} __attribute__((aligned(32))) gf_480_t; | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
static __inline__ void | |||||
gf_480_weak_reduce ( | |||||
gf_480_t *inout | |||||
) __attribute__((unused,always_inline)); | |||||
void | |||||
gf_480_strong_reduce ( | |||||
gf_480_t *inout | |||||
); | |||||
static __inline__ void | |||||
gf_480_bias ( | |||||
gf_480_t *inout, | |||||
int amount | |||||
) __attribute__((unused,always_inline)); | |||||
void | |||||
gf_480_mul ( | |||||
gf_480_t *__restrict__ out, | |||||
const gf_480_t *a, | |||||
const gf_480_t *b | |||||
); | |||||
void | |||||
gf_480_mulw ( | |||||
gf_480_t *__restrict__ out, | |||||
const gf_480_t *a, | |||||
uint64_t b | |||||
); | |||||
void | |||||
gf_480_sqr ( | |||||
gf_480_t *__restrict__ out, | |||||
const gf_480_t *a | |||||
); | |||||
void | |||||
gf_480_serialize ( | |||||
uint8_t *serial, | |||||
const struct gf_480_t *x | |||||
); | |||||
mask_t | |||||
gf_480_deserialize ( | |||||
gf_480_t *x, | |||||
const uint8_t serial[60] | |||||
); | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_480_add_RAW ( | |||||
gf_480_t *out, | |||||
const gf_480_t *a, | |||||
const gf_480_t *b | |||||
) { | |||||
void gf_add_RAW (gf *out, const gf *a, const gf *b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ||||
@@ -85,12 +30,7 @@ gf_480_add_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_480_sub_RAW ( | |||||
gf_480_t *out, | |||||
const gf_480_t *a, | |||||
const gf_480_t *b | |||||
) { | |||||
void gf_sub_RAW (gf *out, const gf *a, const gf *b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ||||
@@ -103,21 +43,15 @@ gf_480_sub_RAW ( | |||||
*/ | */ | ||||
} | } | ||||
void | |||||
gf_480_copy ( | |||||
gf_480_t *out, | |||||
const gf_480_t *a | |||||
) { | |||||
void gf_copy (gf *out, const gf *a) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) { | ||||
((big_register_t *)out)[i] = ((const big_register_t *)a)[i]; | ((big_register_t *)out)[i] = ((const big_register_t *)a)[i]; | ||||
} | } | ||||
} | } | ||||
void | |||||
gf_480_bias ( | |||||
gf_480_t *a, | |||||
int amt | |||||
void gf_bias ( | |||||
gf *a, int amt | |||||
) { | ) { | ||||
uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt; | uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt; | ||||
@@ -141,10 +75,7 @@ gf_480_bias ( | |||||
#endif | #endif | ||||
} | } | ||||
void | |||||
gf_480_weak_reduce ( | |||||
gf_480_t *a | |||||
) { | |||||
void gf_weak_reduce (gf *a) { | |||||
/* PERF: use pshufb/palignr if anyone cares about speed of this */ | /* PERF: use pshufb/palignr if anyone cares about speed of this */ | ||||
uint64_t mask = (1ull<<60) - 1; | uint64_t mask = (1ull<<60) - 1; | ||||
uint64_t tmp = a->limb[7] >> 60; | uint64_t tmp = a->limb[7] >> 60; | ||||
@@ -160,4 +91,4 @@ gf_480_weak_reduce ( | |||||
}; /* extern "C" */ | }; /* extern "C" */ | ||||
#endif | #endif | ||||
#endif /* __gf_480_H__ */ | |||||
#endif /* __gf_H__ */ |
@@ -4,118 +4,41 @@ | |||||
#ifndef __P521_H__ | #ifndef __P521_H__ | ||||
#define __P521_H__ 1 | #define __P521_H__ 1 | ||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "word.h" | |||||
typedef struct gf_521_t { | |||||
uint64_t limb[9]; | |||||
} gf_521_t; | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
static __inline__ void | |||||
gf_521_weak_reduce ( | |||||
gf_521_t *inout | |||||
) __attribute__((unused)); | |||||
void | |||||
gf_521_strong_reduce ( | |||||
gf_521_t *inout | |||||
); | |||||
static __inline__ void | |||||
gf_521_bias ( | |||||
gf_521_t *inout, | |||||
int amount | |||||
) __attribute__((unused)); | |||||
void | |||||
gf_521_mul ( | |||||
gf_521_t *__restrict__ out, | |||||
const gf_521_t *a, | |||||
const gf_521_t *b | |||||
); | |||||
void | |||||
gf_521_mulw ( | |||||
gf_521_t *__restrict__ out, | |||||
const gf_521_t *a, | |||||
uint64_t b | |||||
); | |||||
void | |||||
gf_521_sqr ( | |||||
gf_521_t *__restrict__ out, | |||||
const gf_521_t *a | |||||
); | |||||
void | |||||
gf_521_serialize ( | |||||
uint8_t *serial, | |||||
const struct gf_521_t *x | |||||
); | |||||
mask_t | |||||
gf_521_deserialize ( | |||||
gf_521_t *x, | |||||
const uint8_t serial[66] | |||||
); | |||||
/* -------------- Inline functions begin here -------------- */ | /* -------------- Inline functions begin here -------------- */ | ||||
void | |||||
gf_521_add_RAW ( | |||||
gf_521_t *out, | |||||
const gf_521_t *a, | |||||
const gf_521_t *b | |||||
) { | |||||
void gf_add_RAW (gf *out, const gf *a, const gf *b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<9; i++) { | for (i=0; i<9; i++) { | ||||
out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
} | } | ||||
gf_521_weak_reduce(out); | |||||
gf_weak_reduce(out); | |||||
} | } | ||||
void | |||||
gf_521_sub_RAW ( | |||||
gf_521_t *out, | |||||
const gf_521_t *a, | |||||
const gf_521_t *b | |||||
) { | |||||
void gf_sub_RAW (gf *out, const gf *a, const gf *b) { | |||||
unsigned int i; | unsigned int i; | ||||
uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4; | uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4; | ||||
for (i=0; i<9; i++) { | for (i=0; i<9; i++) { | ||||
out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1); | out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1); | ||||
} | } | ||||
gf_521_weak_reduce(out); | |||||
} | |||||
void | |||||
gf_521_copy ( | |||||
gf_521_t *out, | |||||
const gf_521_t *a | |||||
) { | |||||
memcpy(out,a,sizeof(*a)); | |||||
gf_weak_reduce(out); | |||||
} | } | ||||
void | |||||
gf_521_bias ( | |||||
gf_521_t *a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf *a, int amt) { | |||||
(void) a; | (void) a; | ||||
(void) amt; | (void) amt; | ||||
} | } | ||||
void | |||||
gf_521_weak_reduce ( | |||||
gf_521_t *a | |||||
) { | |||||
void gf_weak_reduce (gf *a) { | |||||
uint64_t mask = (1ull<<58) - 1; | uint64_t mask = (1ull<<58) - 1; | ||||
uint64_t tmp = a->limb[8] >> 57; | uint64_t tmp = a->limb[8] >> 57; | ||||
int i; | int i; | ||||
@@ -4,20 +4,18 @@ | |||||
#ifndef __P521_H__ | #ifndef __P521_H__ | ||||
#define __P521_H__ 1 | #define __P521_H__ 1 | ||||
#include "f_field.h" | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "word.h" | |||||
#include "constant_time.h" | #include "constant_time.h" | ||||
/* FIXME: Currenmtlty desn't work at all, because the struct is declared [9] and not [12] */ | |||||
#define LIMBPERM(x) (((x)%3)*4 + (x)/3) | #define LIMBPERM(x) (((x)%3)*4 + (x)/3) | ||||
#define USE_P521_3x3_TRANSPOSE | #define USE_P521_3x3_TRANSPOSE | ||||
typedef struct gf_521_s { | |||||
uint64_t limb[12]; | |||||
} __attribute__((aligned(32))) gf_521_t; | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -29,43 +27,25 @@ typedef uint64x4_t uint64x3_t; /* fit it in a vector register */ | |||||
static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 }; | static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 }; | ||||
/* Currently requires CLANG. Sorry. */ | /* Currently requires CLANG. Sorry. */ | ||||
static inline uint64x3_t | |||||
__attribute__((unused)) | |||||
timesW ( | |||||
uint64x3_t u | |||||
) { | |||||
return u.zxyw + u.zwww; | |||||
static inline uint64x3_t timesW (uint64x3_t u) { | |||||
return u.zxyw + u.zwww; | |||||
} | } | ||||
void | |||||
gf_521_add_RAW ( | |||||
gf_521_t *out, | |||||
const gf_521_t *a, | |||||
const gf_521_t *b | |||||
) { | |||||
void gf_add_RAW (gf *out, const gf *a, const gf *b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ||||
} | } | ||||
} | } | ||||
void | |||||
gf_521_sub_RAW ( | |||||
gf_521_t *out, | |||||
const gf_521_t *a, | |||||
const gf_521_t *b | |||||
) { | |||||
void gf_sub_RAW (gf *out, const gf *a, const gf *b) { | |||||
unsigned int i; | unsigned int i; | ||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | ||||
} | } | ||||
} | } | ||||
void | |||||
gf_521_bias ( | |||||
gf_521_t *a, | |||||
int amt | |||||
) { | |||||
void gf_bias (gf *a, int amt) { | |||||
uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt; | uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt; | ||||
uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 }; | uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 }; | ||||
((uint64x4_t*)a)[0] += vlo; | ((uint64x4_t*)a)[0] += vlo; | ||||
@@ -73,10 +53,7 @@ gf_521_bias ( | |||||
((uint64x4_t*)a)[2] += vhi; | ((uint64x4_t*)a)[2] += vhi; | ||||
} | } | ||||
void | |||||
gf_521_weak_reduce ( | |||||
gf_521_t *a | |||||
) { | |||||
void gf_weak_reduce (gf *a) { | |||||
#if 0 | #if 0 | ||||
int i; | int i; | ||||
assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0); | assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0); | ||||
@@ -84,7 +61,6 @@ gf_521_weak_reduce ( | |||||
assert(a->limb[i] < 3ull<<61); | assert(a->limb[i] < 3ull<<61); | ||||
} | } | ||||
#endif | #endif | ||||
uint64x3_t | uint64x3_t | ||||
ot0 = ((uint64x4_t*)a)[0], | ot0 = ((uint64x4_t*)a)[0], | ||||
ot1 = ((uint64x4_t*)a)[1], | ot1 = ((uint64x4_t*)a)[1], | ||||