Browse Source

further reduce the code in f_impl.h

master
Michael Hamburg 9 years ago
parent
commit
2402788996
10 changed files with 75 additions and 398 deletions
  1. +8
    -35
      src/p25519/arch_ref64/f_impl.h
  2. +6
    -25
      src/p25519/arch_x86_64/f_impl.h
  3. +6
    -28
      src/p448/arch_32/f_impl.h
  4. +6
    -28
      src/p448/arch_arm_32/f_impl.h
  5. +7
    -29
      src/p448/arch_neon_experimental/f_impl.h
  6. +8
    -30
      src/p448/arch_ref64/f_impl.h
  7. +6
    -25
      src/p448/arch_x86_64/f_impl.h
  8. +11
    -80
      src/p480/arch_x86_64/f_impl.h
  9. +8
    -85
      src/p521/arch_ref64/f_impl.h
  10. +9
    -33
      src/p521/arch_x86_64_r12/f_impl.h

+ 8
- 35
src/p25519/arch_ref64/f_impl.h View File

@@ -4,14 +4,12 @@
#ifndef __P25519_H__ #ifndef __P25519_H__
#define __P25519_H__ 1 #define __P25519_H__ 1


#include "f_field.h"

#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>


#include "decaf/decaf_255.h"
#include "word.h"

#define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}


#ifdef __cplusplus #ifdef __cplusplus
@@ -20,54 +18,29 @@ extern "C" {


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i]; out->limb[i] = a->limb[i] + b->limb[i];
} }
gf_25519_weak_reduce(out);
gf_weak_reduce(out);
} }


void
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1); out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1);
} }
gf_25519_weak_reduce(out);
}

void
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) {
memcpy(out,a,sizeof(*a));
gf_weak_reduce(out);
} }


void
gf_25519_bias (
gf_25519_t a,
int amt
) {
void gf_bias (gf a, int amt) {
(void) a; (void) a;
(void) amt; (void) amt;
} }


void
gf_25519_weak_reduce (
gf_25519_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<51) - 1; uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51; uint64_t tmp = a->limb[4] >> 51;
int i; int i;


+ 6
- 25
src/p25519/arch_x86_64/f_impl.h View File

@@ -4,36 +4,24 @@
#ifndef __P25519_H__ #ifndef __P25519_H__
#define __P25519_H__ 1 #define __P25519_H__ 1


#include "f_field.h"

#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>


#include "decaf/decaf_255.h"
#include "word.h"

#define DECAF_255_LIMB_BITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i]; out->limb[i] = a->limb[i] + b->limb[i];
} }
} }


void
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
@@ -41,11 +29,7 @@ gf_25519_sub_RAW (
} }
} }


void
gf_25519_bias (
gf_25519_t a,
int amt
) {
void gf_bias (gf a, int amt) {
a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt; a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt;
int i; int i;
for (i=1; i<5; i++) { for (i=1; i<5; i++) {
@@ -53,10 +37,7 @@ gf_25519_bias (
} }
} }


void
gf_25519_weak_reduce (
gf_25519_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<51) - 1; uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51; uint64_t tmp = a->limb[4] >> 51;
int i; int i;


+ 6
- 28
src/p448/arch_32/f_impl.h View File

@@ -4,17 +4,12 @@
#ifndef __P448_H__ #ifndef __P448_H__
#define __P448_H__ 1 #define __P448_H__ 1


#include "word.h"
#include "f_field.h"


#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>


typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}


@@ -24,12 +19,7 @@ extern "C" {


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
@@ -42,12 +32,7 @@ gf_448_add_RAW (
*/ */
} }


void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
@@ -60,11 +45,7 @@ gf_448_sub_RAW (
*/ */
} }


void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
uint32x4_t *aa = (uint32x4_t*) a; uint32x4_t *aa = (uint32x4_t*) a;
@@ -74,10 +55,7 @@ gf_448_bias (
aa[3] += lo; aa[3] += lo;
} }


void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<28) - 1; uint64_t mask = (1ull<<28) - 1;
uint64_t tmp = a->limb[15] >> 28; uint64_t tmp = a->limb[15] >> 28;
int i; int i;


+ 6
- 28
src/p448/arch_arm_32/f_impl.h View File

@@ -4,17 +4,12 @@
#ifndef __P448_H__ #ifndef __P448_H__
#define __P448_H__ 1 #define __P448_H__ 1


#include "word.h"
#include "f_field.h"


#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>


typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}


@@ -24,12 +19,7 @@ extern "C" {


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
@@ -42,12 +32,7 @@ gf_448_add_RAW (
*/ */
} }


void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
@@ -60,11 +45,7 @@ gf_448_sub_RAW (
*/ */
} }


void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
uint32x4_t *aa = (uint32x4_t*) a; uint32x4_t *aa = (uint32x4_t*) a;
@@ -74,10 +55,7 @@ gf_448_bias (
aa[3] += lo; aa[3] += lo;
} }


void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<28) - 1; uint64_t mask = (1ull<<28) - 1;
uint64_t tmp = a->limb[15] >> 28; uint64_t tmp = a->limb[15] >> 28;
int i; int i;


+ 7
- 29
src/p448/arch_neon_experimental/f_impl.h View File

@@ -4,20 +4,15 @@
#ifndef __P448_H__ #ifndef __P448_H__
#define __P448_H__ 1 #define __P448_H__ 1


#include "word.h"
#include "f_field.h"


#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>


typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15) #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
#define USE_NEON_PERM 1 #define USE_NEON_PERM 1
#define LBITS 28
#define LIMBHI(x) ((x##ull)>>LBITS)
#define LIMBLO(x) ((x##ull)&((1ull<<LBITS)-1))
#define LIMBHI(x) ((x##ull)>>28)
#define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
# define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ # define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \ {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
@@ -30,24 +25,14 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
} }
} }


void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
@@ -60,11 +45,7 @@ gf_448_sub_RAW (
*/ */
} }


void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1}; uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
uint32x4_t *aa = (uint32x4_t*) a; uint32x4_t *aa = (uint32x4_t*) a;
@@ -74,10 +55,7 @@ gf_448_bias (
aa[3] += hi; aa[3] += hi;
} }


void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {


uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1}, uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
tmp = vshr_n_u32(aa[7],28); tmp = vshr_n_u32(aa[7],28);


+ 8
- 30
src/p448/arch_ref64/f_impl.h View File

@@ -4,17 +4,12 @@
#ifndef __P448_H__ #ifndef __P448_H__
#define __P448_H__ 1 #define __P448_H__ 1


#include "f_field.h"

#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>


#include "word.h"

typedef struct gf_448_s {
uint64_t limb[8];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LBITS 56
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}


#ifdef __cplusplus #ifdef __cplusplus
@@ -23,46 +18,29 @@ extern "C" {


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<8; i++) { for (i=0; i<8; i++) {
out->limb[i] = a->limb[i] + b->limb[i]; out->limb[i] = a->limb[i] + b->limb[i];
} }
gf_448_weak_reduce(out);
gf_weak_reduce(out);
} }


void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
for (i=0; i<8; i++) { for (i=0; i<8; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1); out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
} }
gf_448_weak_reduce(out);
gf_weak_reduce(out);
} }


void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
(void) a; (void) a;
(void) amt; (void) amt;
} }


void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<56) - 1; uint64_t mask = (1ull<<56) - 1;
uint64_t tmp = a->limb[7] >> 56; uint64_t tmp = a->limb[7] >> 56;
int i; int i;


+ 6
- 25
src/p448/arch_x86_64/f_impl.h View File

@@ -4,13 +4,11 @@
#ifndef __P448_H__ #ifndef __P448_H__
#define __P448_H__ 1 #define __P448_H__ 1


#include "f_field.h"

#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>


#include "decaf/decaf_448.h"
#include "word.h"

#define LBITS 56
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}


#ifdef __cplusplus #ifdef __cplusplus
@@ -19,12 +17,7 @@ extern "C" {


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
@@ -37,12 +30,7 @@ gf_448_add_RAW (
*/ */
} }


void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
@@ -55,11 +43,7 @@ gf_448_sub_RAW (
*/ */
} }


void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
#if __AVX2__ #if __AVX2__
@@ -82,10 +66,7 @@ gf_448_bias (
#endif #endif
} }


void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
/* PERF: use pshufb/palignr if anyone cares about speed of this */ /* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull<<56) - 1; uint64_t mask = (1ull<<56) - 1;
uint64_t tmp = a->limb[7] >> 56; uint64_t tmp = a->limb[7] >> 56;


+ 11
- 80
src/p480/arch_x86_64/f_impl.h View File

@@ -1,78 +1,23 @@
/* Copyright (c) 2014 Cryptography Research, Inc. /* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information. * Released under the MIT License. See LICENSE.txt for license information.
*/ */
#ifndef __gf_480_H__
#define __gf_480_H__ 1
#ifndef __gf_H__
#define __gf_H__ 1

#include "f_field.h"


#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>


#include "word.h" #include "word.h"


typedef struct gf_480_t {
uint64_t limb[8];
} __attribute__((aligned(32))) gf_480_t;

#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
static __inline__ void
gf_480_weak_reduce (
gf_480_t *inout
) __attribute__((unused,always_inline));
void
gf_480_strong_reduce (
gf_480_t *inout
);
static __inline__ void
gf_480_bias (
gf_480_t *inout,
int amount
) __attribute__((unused,always_inline));
void
gf_480_mul (
gf_480_t *__restrict__ out,
const gf_480_t *a,
const gf_480_t *b
);

void
gf_480_mulw (
gf_480_t *__restrict__ out,
const gf_480_t *a,
uint64_t b
);

void
gf_480_sqr (
gf_480_t *__restrict__ out,
const gf_480_t *a
);

void
gf_480_serialize (
uint8_t *serial,
const struct gf_480_t *x
);

mask_t
gf_480_deserialize (
gf_480_t *x,
const uint8_t serial[60]
);


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_480_add_RAW (
gf_480_t *out,
const gf_480_t *a,
const gf_480_t *b
) {
void gf_add_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
@@ -85,12 +30,7 @@ gf_480_add_RAW (
*/ */
} }


void
gf_480_sub_RAW (
gf_480_t *out,
const gf_480_t *a,
const gf_480_t *b
) {
void gf_sub_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
@@ -103,21 +43,15 @@ gf_480_sub_RAW (
*/ */
} }


void
gf_480_copy (
gf_480_t *out,
const gf_480_t *a
) {
void gf_copy (gf *out, const gf *a) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) { for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
((big_register_t *)out)[i] = ((const big_register_t *)a)[i]; ((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
} }
} }


void
gf_480_bias (
gf_480_t *a,
int amt
void gf_bias (
gf *a, int amt
) { ) {
uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt; uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt;
@@ -141,10 +75,7 @@ gf_480_bias (
#endif #endif
} }


void
gf_480_weak_reduce (
gf_480_t *a
) {
void gf_weak_reduce (gf *a) {
/* PERF: use pshufb/palignr if anyone cares about speed of this */ /* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull<<60) - 1; uint64_t mask = (1ull<<60) - 1;
uint64_t tmp = a->limb[7] >> 60; uint64_t tmp = a->limb[7] >> 60;
@@ -160,4 +91,4 @@ gf_480_weak_reduce (
}; /* extern "C" */ }; /* extern "C" */
#endif #endif


#endif /* __gf_480_H__ */
#endif /* __gf_H__ */

+ 8
- 85
src/p521/arch_ref64/f_impl.h View File

@@ -4,118 +4,41 @@
#ifndef __P521_H__ #ifndef __P521_H__
#define __P521_H__ 1 #define __P521_H__ 1


#include "f_field.h"

#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>


#include "word.h"

typedef struct gf_521_t {
uint64_t limb[9];
} gf_521_t;

#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
static __inline__ void
gf_521_weak_reduce (
gf_521_t *inout
) __attribute__((unused));
void
gf_521_strong_reduce (
gf_521_t *inout
);

static __inline__ void
gf_521_bias (
gf_521_t *inout,
int amount
) __attribute__((unused));
void
gf_521_mul (
gf_521_t *__restrict__ out,
const gf_521_t *a,
const gf_521_t *b
);

void
gf_521_mulw (
gf_521_t *__restrict__ out,
const gf_521_t *a,
uint64_t b
);

void
gf_521_sqr (
gf_521_t *__restrict__ out,
const gf_521_t *a
);

void
gf_521_serialize (
uint8_t *serial,
const struct gf_521_t *x
);

mask_t
gf_521_deserialize (
gf_521_t *x,
const uint8_t serial[66]
);


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void
gf_521_add_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
void gf_add_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i; unsigned int i;
for (i=0; i<9; i++) { for (i=0; i<9; i++) {
out->limb[i] = a->limb[i] + b->limb[i]; out->limb[i] = a->limb[i] + b->limb[i];
} }
gf_521_weak_reduce(out);
gf_weak_reduce(out);
} }


void
gf_521_sub_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
void gf_sub_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i; unsigned int i;
uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4; uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4;
for (i=0; i<9; i++) { for (i=0; i<9; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1); out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1);
} }
gf_521_weak_reduce(out);
}

void
gf_521_copy (
gf_521_t *out,
const gf_521_t *a
) {
memcpy(out,a,sizeof(*a));
gf_weak_reduce(out);
} }


void
gf_521_bias (
gf_521_t *a,
int amt
) {
void gf_bias (gf *a, int amt) {
(void) a; (void) a;
(void) amt; (void) amt;
} }


void
gf_521_weak_reduce (
gf_521_t *a
) {
void gf_weak_reduce (gf *a) {
uint64_t mask = (1ull<<58) - 1; uint64_t mask = (1ull<<58) - 1;
uint64_t tmp = a->limb[8] >> 57; uint64_t tmp = a->limb[8] >> 57;
int i; int i;


+ 9
- 33
src/p521/arch_x86_64_r12/f_impl.h View File

@@ -4,20 +4,18 @@
#ifndef __P521_H__ #ifndef __P521_H__
#define __P521_H__ 1 #define __P521_H__ 1


#include "f_field.h"

#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>


#include "word.h"
#include "constant_time.h" #include "constant_time.h"


/* FIXME: Currenmtlty desn't work at all, because the struct is declared [9] and not [12] */
#define LIMBPERM(x) (((x)%3)*4 + (x)/3) #define LIMBPERM(x) (((x)%3)*4 + (x)/3)
#define USE_P521_3x3_TRANSPOSE #define USE_P521_3x3_TRANSPOSE


typedef struct gf_521_s {
uint64_t limb[12];
} __attribute__((aligned(32))) gf_521_t;

#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@@ -29,43 +27,25 @@ typedef uint64x4_t uint64x3_t; /* fit it in a vector register */
static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 }; static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 };


/* Currently requires CLANG. Sorry. */ /* Currently requires CLANG. Sorry. */
static inline uint64x3_t
__attribute__((unused))
timesW (
uint64x3_t u
) {
return u.zxyw + u.zwww;
static inline uint64x3_t timesW (uint64x3_t u) {
return u.zxyw + u.zwww;
} }


void
gf_521_add_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
void gf_add_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
} }
} }


void
gf_521_sub_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
void gf_sub_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i; unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
} }
} }


void
gf_521_bias (
gf_521_t *a,
int amt
) {
void gf_bias (gf *a, int amt) {
uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt; uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt;
uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 }; uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 };
((uint64x4_t*)a)[0] += vlo; ((uint64x4_t*)a)[0] += vlo;
@@ -73,10 +53,7 @@ gf_521_bias (
((uint64x4_t*)a)[2] += vhi; ((uint64x4_t*)a)[2] += vhi;
} }


void
gf_521_weak_reduce (
gf_521_t *a
) {
void gf_weak_reduce (gf *a) {
#if 0 #if 0
int i; int i;
assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0); assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0);
@@ -84,7 +61,6 @@ gf_521_weak_reduce (
assert(a->limb[i] < 3ull<<61); assert(a->limb[i] < 3ull<<61);
} }
#endif #endif
uint64x3_t uint64x3_t
ot0 = ((uint64x4_t*)a)[0], ot0 = ((uint64x4_t*)a)[0],
ot1 = ((uint64x4_t*)a)[1], ot1 = ((uint64x4_t*)a)[1],


Loading…
Cancel
Save