소스 검색

generate most of f_impl.h. Not tested on most arches yet :-(

master
Michael Hamburg 8 년 전
부모
커밋
4218223dd7
21개의 변경된 파일196개의 추가작업 그리고 837개의 파일을 삭제
  1. +35
    -5
      src/gen_headers/f_field_h.py
  2. +1
    -1
      src/p25519/arch_ref64/f_impl.c
  3. +0
    -78
      src/p25519/arch_ref64/f_impl.h
  4. +1
    -1
      src/p25519/arch_x86_64/f_impl.c
  5. +0
    -90
      src/p25519/arch_x86_64/f_impl.h
  6. +1
    -2
      src/p448/arch_32/f_impl.c
  7. +0
    -76
      src/p448/arch_32/f_impl.h
  8. +1
    -2
      src/p448/arch_arm_32/f_impl.c
  9. +0
    -76
      src/p448/arch_arm_32/f_impl.h
  10. +1
    -2
      src/p448/arch_neon_experimental/f_impl.c
  11. +1
    -77
      src/p448/arch_neon_experimental/f_impl.h
  12. +1
    -1
      src/p448/arch_ref64/f_impl.c
  13. +0
    -76
      src/p448/arch_ref64/f_impl.h
  14. +1
    -1
      src/p448/arch_x86_64/f_impl.c
  15. +0
    -79
      src/p448/arch_x86_64/f_impl.h
  16. +20
    -21
      src/p480/arch_x86_64/f_impl.c
  17. +40
    -60
      src/p480/arch_x86_64/f_impl.h
  18. +20
    -20
      src/p521/arch_ref64/f_impl.c
  19. +39
    -59
      src/p521/arch_ref64/f_impl.h
  20. +20
    -20
      src/p521/arch_x86_64_r12/f_impl.c
  21. +14
    -90
      src/p521/arch_x86_64_r12/f_impl.h

+ 35
- 5
src/gen_headers/f_field_h.py 파일 보기

@@ -9,22 +9,52 @@ f_field_h = gen_file(
#include "constant_time.h"
#include <string.h>

#include "f_impl.h"

#include "decaf/decaf_%(gf_bits)s.h" /* HACK in genheader */
#include "word.h"

#define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d
#define GF_BITS %(gf_bits)d
#define gf gf_%(gf_shortname)s_t
#define gf_s gf_%(gf_shortname)s_s
#define gf_mul gf_%(gf_shortname)s_mul
#define gf_sqr gf_%(gf_shortname)s_sqr
#define gf_copy gf_%(gf_shortname)s_copy
#define gf_add_RAW gf_%(gf_shortname)s_add_RAW
#define gf_sub_RAW gf_%(gf_shortname)s_sub_RAW
#define gf_mulw gf_%(gf_shortname)s_mulw
#define gf_bias gf_%(gf_shortname)s_bias
#define gf_isr gf_%(gf_shortname)s_isr
#define gf_weak_reduce gf_%(gf_shortname)s_weak_reduce
#define gf_strong_reduce gf_%(gf_shortname)s_strong_reduce
#define gf_mul gf_%(gf_shortname)s_mul
#define gf_sqr gf_%(gf_shortname)s_sqr
#define gf_mulw gf_%(gf_shortname)s_mulw
#define gf_isr gf_%(gf_shortname)s_isr
#define gf_serialize gf_%(gf_shortname)s_serialize
#define gf_deserialize gf_%(gf_shortname)s_deserialize

#define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */

#define INLINE_UNUSED __inline__ __attribute__((unused,always_inline))

#ifdef __cplusplus
extern "C" {
#endif

/* Defined below in f_impl.h */
static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b);
static INLINE_UNUSED void gf_sub_RAW (gf out, const gf a, const gf b);
static INLINE_UNUSED void gf_bias (gf inout, int amount);
static INLINE_UNUSED void gf_weak_reduce (gf inout);

void gf_strong_reduce (gf inout);
void gf_mul (gf_s *__restrict__ out, const gf a, const gf b);
void gf_mulw (gf_s *__restrict__ out, const gf a, uint64_t b);
void gf_sqr (gf_s *__restrict__ out, const gf a);
void gf_serialize (uint8_t *serial, const gf x);
mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]);

#ifdef __cplusplus
} /* extern "C" */
#endif

#include "f_impl.h" /* Bring in the inline implementations */
""")

+ 1
- 1
src/p25519/arch_ref64/f_impl.c 파일 보기

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "f_field.h"

static __inline__ __uint128_t widemul(
const uint64_t a,


+ 0
- 78
src/p25519/arch_ref64/f_impl.h 파일 보기

@@ -14,88 +14,10 @@
#define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

/*
#define FIELD_LITERAL(a,b,c,d) {{ \
(a##ull) & LMASK, \
((a##ull)>>51 | (b##ull)<<13) & LMASK, \
((b##ull)>>38 | (c##ull)<<26) & LMASK, \
((c##ull)>>25 | (d##ull)<<39) & LMASK, \
(d##ull)>>12 \
}}
*/

#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) __attribute__((unused));
static __inline__ void
gf_25519_weak_reduce (
gf_25519_t inout
) __attribute__((unused));
void
gf_25519_strong_reduce (
gf_25519_t inout
);

static __inline__ void
gf_25519_bias (
gf_25519_t inout,
int amount
) __attribute__((unused));
void
gf_25519_mul (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
const gf_25519_t b
);

void
gf_25519_mulw (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
uint64_t b
);

void
gf_25519_sqr (
gf_25519_s *__restrict__ out,
const gf_25519_t a
);

void
gf_25519_serialize (
uint8_t serial[32],
const gf_25519_t x
);

mask_t
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32]
);

/* -------------- Inline functions begin here -------------- */

void


+ 1
- 1
src/p25519/arch_x86_64/f_impl.c 파일 보기

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "f_field.h"
#include "x86-64-arith.h"

static inline uint64_t shr(__uint128_t x, int n) {


+ 0
- 90
src/p25519/arch_x86_64/f_impl.h 파일 보기

@@ -14,88 +14,6 @@
#define DECAF_255_LIMB_BITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

/*
#define FIELD_LITERAL(a,b,c,d) {{ \
(a##ull) & LMASK, \
((a##ull)>>51 | (b##ull)<<13) & LMASK, \
((b##ull)>>38 | (c##ull)<<26) & LMASK, \
((c##ull)>>25 | (d##ull)<<39) & LMASK, \
(d##ull)>>12 \
}}
*/

#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) __attribute__((unused));
static __inline__ void
gf_25519_weak_reduce (
gf_25519_t inout
) __attribute__((unused));
void
gf_25519_strong_reduce (
gf_25519_t inout
);

static __inline__ void
gf_25519_bias (
gf_25519_t inout,
int amount
) __attribute__((unused));
void
gf_25519_mul (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
const gf_25519_t b
);

void
gf_25519_mulw (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
uint64_t b
);

void
gf_25519_sqr (
gf_25519_s *__restrict__ out,
const gf_25519_t a
);

void
gf_25519_serialize (
uint8_t serial[32],
const gf_25519_t x
);

mask_t
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32]
);

/* -------------- Inline functions begin here -------------- */

void
@@ -123,14 +41,6 @@ gf_25519_sub_RAW (
}
}

void
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) {
memcpy(out,a,sizeof(*a));
}

void
gf_25519_bias (
gf_25519_t a,


+ 1
- 2
src/p448/arch_32/f_impl.c 파일 보기

@@ -2,8 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "word.h"
#include "f_impl.h"
#include "f_field.h"

static inline mask_t __attribute__((always_inline))
is_zero (


+ 0
- 76
src/p448/arch_32/f_impl.h 파일 보기

@@ -22,74 +22,6 @@ typedef struct gf_448_s {
extern "C" {
#endif

static __inline__ void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_weak_reduce (
gf_448_t inout
) __attribute__((unused,always_inline));
void
gf_448_strong_reduce (
gf_448_t inout
);
static __inline__ void
gf_448_bias (
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));

void
gf_448_mul (
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
gf_448_mulw (
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
gf_448_sqr (
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
gf_448_serialize (
uint8_t *serial,
const gf_448_t x
);

mask_t
gf_448_deserialize (
gf_448_t x,
const uint8_t serial[56]
);

/* -------------- Inline functions begin here -------------- */

void
@@ -128,14 +60,6 @@ gf_448_sub_RAW (
*/
}

void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) {
*out = *a;
}

void
gf_448_bias (
gf_448_t a,


+ 1
- 2
src/p448/arch_arm_32/f_impl.c 파일 보기

@@ -2,8 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "word.h"
#include "f_impl.h"
#include "f_field.h"

static inline mask_t __attribute__((always_inline))
is_zero (


+ 0
- 76
src/p448/arch_arm_32/f_impl.h 파일 보기

@@ -22,74 +22,6 @@ typedef struct gf_448_s {
extern "C" {
#endif

static __inline__ void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_weak_reduce (
gf_448_t inout
) __attribute__((unused,always_inline));
void
gf_448_strong_reduce (
gf_448_t inout
);
static __inline__ void
gf_448_bias (
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));

void
gf_448_mul (
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
gf_448_mulw (
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
gf_448_sqr (
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
gf_448_serialize (
uint8_t *serial,
const gf_448_t x
);

mask_t
gf_448_deserialize (
gf_448_t x,
const uint8_t serial[56]
);

/* -------------- Inline functions begin here -------------- */

void
@@ -128,14 +60,6 @@ gf_448_sub_RAW (
*/
}

void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) {
*out = *a;
}

void
gf_448_bias (
gf_448_t a,


+ 1
- 2
src/p448/arch_neon_experimental/f_impl.c 파일 보기

@@ -2,8 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "word.h"
#include "f_impl.h"
#include "f_field.h"

static inline mask_t __attribute__((always_inline))
is_zero (


+ 1
- 77
src/p448/arch_neon_experimental/f_impl.h 파일 보기

@@ -27,75 +27,7 @@ typedef struct gf_448_s {
#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_weak_reduce (
gf_448_t inout
) __attribute__((unused,always_inline));
void
gf_448_strong_reduce (
gf_448_t inout
);
static __inline__ void
gf_448_bias (
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));

void
gf_448_mul (
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
gf_448_mulw (
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
gf_448_sqr (
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
gf_448_serialize (
uint8_t *serial,
const gf_448_t x
);

mask_t
gf_448_deserialize (
gf_448_t x,
const uint8_t serial[56]
);

/* -------------- Inline functions begin here -------------- */

void
@@ -128,14 +60,6 @@ gf_448_sub_RAW (
*/
}

void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) {
*out = *a;
}

void
gf_448_bias (
gf_448_t a,


+ 1
- 1
src/p448/arch_ref64/f_impl.c 파일 보기

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "f_field.h"

static __inline__ __uint128_t widemul(
const uint64_t a,


+ 0
- 76
src/p448/arch_ref64/f_impl.h 파일 보기

@@ -21,74 +21,6 @@ typedef struct gf_448_s {
extern "C" {
#endif

static __inline__ void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused));
static __inline__ void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused));
static __inline__ void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) __attribute__((unused));
static __inline__ void
gf_448_weak_reduce (
gf_448_t inout
) __attribute__((unused));
void
gf_448_strong_reduce (
gf_448_t inout
);

static __inline__ void
gf_448_bias (
gf_448_t inout,
int amount
) __attribute__((unused));
void
gf_448_mul (
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
gf_448_mulw (
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
gf_448_sqr (
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
gf_448_serialize (
uint8_t *serial,
const gf_448_t x
);

mask_t
gf_448_deserialize (
gf_448_t x,
const uint8_t serial[56]
);

/* -------------- Inline functions begin here -------------- */

void
@@ -118,14 +50,6 @@ gf_448_sub_RAW (
gf_448_weak_reduce(out);
}

void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) {
memcpy(out,a,sizeof(*a));
}

void
gf_448_bias (
gf_448_t a,


+ 1
- 1
src/p448/arch_x86_64/f_impl.c 파일 보기

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "f_field.h"
#include "x86-64-arith.h"

void


+ 0
- 79
src/p448/arch_x86_64/f_impl.h 파일 보기

@@ -17,74 +17,6 @@
extern "C" {
#endif

static __inline__ void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
gf_448_weak_reduce (
gf_448_t inout
) __attribute__((unused,always_inline));
void
gf_448_strong_reduce (
gf_448_t inout
);

static __inline__ void
gf_448_bias (
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));
void
gf_448_mul (
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
gf_448_mulw (
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
gf_448_sqr (
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
gf_448_serialize (
uint8_t *serial,
const gf_448_t x
);

mask_t
gf_448_deserialize (
gf_448_t x,
const uint8_t serial[56]
);

/* -------------- Inline functions begin here -------------- */

void
@@ -123,17 +55,6 @@ gf_448_sub_RAW (
*/
}

void
gf_448_copy (
gf_448_t out,
const gf_448_t a
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
}
}

void
gf_448_bias (
gf_448_t a,


+ 20
- 21
src/p480/arch_x86_64/f_impl.c 파일 보기

@@ -2,14 +2,13 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "x86-64-arith.h"
#include "f_field.h"

void
p480_mul (
p480_t *__restrict__ cs,
const p480_t *as,
const p480_t *bs
gf_480_mul (
gf_480_t *__restrict__ cs,
const gf_480_t *as,
const gf_480_t *bs
) {
const uint64_t *a = as->limb, *b = bs->limb;
uint64_t *c = cs->limb;
@@ -146,9 +145,9 @@ p480_mul (
}

void
p480_mulw (
p480_t *__restrict__ cs,
const p480_t *as,
gf_480_mulw (
gf_480_t *__restrict__ cs,
const gf_480_t *as,
uint64_t b
) {
const uint64_t *a = as->limb;
@@ -191,9 +190,9 @@ p480_mulw (
}

void
p480_sqr (
p480_t *__restrict__ cs,
const p480_t *as
gf_480_sqr (
gf_480_t *__restrict__ cs,
const gf_480_t *as
) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
@@ -306,8 +305,8 @@ p480_sqr (
}

void
p480_strong_reduce (
p480_t *a
gf_480_strong_reduce (
gf_480_t *a
) {
uint64_t mask = (1ull<<60)-1;

@@ -349,14 +348,14 @@ p480_strong_reduce (
}

void
p480_serialize (
gf_480_serialize (
uint8_t *serial,
const struct p480_t *x
const struct gf_480_t *x
) {
int i,j,k=0;
p480_t red;
p480_copy(&red, x);
p480_strong_reduce(&red);
gf_480_t red;
gf_480_copy(&red, x);
gf_480_strong_reduce(&red);
word_t r = 0;
for (i=0; i<8; i+=2) {
r = red.limb[i];
@@ -375,8 +374,8 @@ p480_serialize (
}

mask_t
p480_deserialize (
p480_t *x,
gf_480_deserialize (
gf_480_t *x,
const uint8_t serial[60]
) {
int i,j,k=0;


+ 40
- 60
src/p480/arch_x86_64/f_impl.h 파일 보기

@@ -1,97 +1,77 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __p480_H__
#define __p480_H__ 1
#ifndef __gf_480_H__
#define __gf_480_H__ 1

#include <stdint.h>
#include <assert.h>

#include "word.h"

typedef struct p480_t {
typedef struct gf_480_t {
uint64_t limb[8];
} __attribute__((aligned(32))) p480_t;
} __attribute__((aligned(32))) gf_480_t;

#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
p480_add_RAW (
p480_t *out,
const p480_t *a,
const p480_t *b
) __attribute__((unused,always_inline));
static __inline__ void
p480_sub_RAW (
p480_t *out,
const p480_t *a,
const p480_t *b
) __attribute__((unused,always_inline));
static __inline__ void
p480_copy (
p480_t *out,
const p480_t *a
) __attribute__((unused,always_inline));
static __inline__ void
p480_weak_reduce (
p480_t *inout
gf_480_weak_reduce (
gf_480_t *inout
) __attribute__((unused,always_inline));
void
p480_strong_reduce (
p480_t *inout
gf_480_strong_reduce (
gf_480_t *inout
);
static __inline__ void
p480_bias (
p480_t *inout,
gf_480_bias (
gf_480_t *inout,
int amount
) __attribute__((unused,always_inline));
void
p480_mul (
p480_t *__restrict__ out,
const p480_t *a,
const p480_t *b
gf_480_mul (
gf_480_t *__restrict__ out,
const gf_480_t *a,
const gf_480_t *b
);

void
p480_mulw (
p480_t *__restrict__ out,
const p480_t *a,
gf_480_mulw (
gf_480_t *__restrict__ out,
const gf_480_t *a,
uint64_t b
);

void
p480_sqr (
p480_t *__restrict__ out,
const p480_t *a
gf_480_sqr (
gf_480_t *__restrict__ out,
const gf_480_t *a
);

void
p480_serialize (
gf_480_serialize (
uint8_t *serial,
const struct p480_t *x
const struct gf_480_t *x
);

mask_t
p480_deserialize (
p480_t *x,
gf_480_deserialize (
gf_480_t *x,
const uint8_t serial[60]
);

/* -------------- Inline functions begin here -------------- */

void
p480_add_RAW (
p480_t *out,
const p480_t *a,
const p480_t *b
gf_480_add_RAW (
gf_480_t *out,
const gf_480_t *a,
const gf_480_t *b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
@@ -106,10 +86,10 @@ p480_add_RAW (
}

void
p480_sub_RAW (
p480_t *out,
const p480_t *a,
const p480_t *b
gf_480_sub_RAW (
gf_480_t *out,
const gf_480_t *a,
const gf_480_t *b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
@@ -124,9 +104,9 @@ p480_sub_RAW (
}

void
p480_copy (
p480_t *out,
const p480_t *a
gf_480_copy (
gf_480_t *out,
const gf_480_t *a
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
@@ -135,8 +115,8 @@ p480_copy (
}

void
p480_bias (
p480_t *a,
gf_480_bias (
gf_480_t *a,
int amt
) {
uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt;
@@ -162,8 +142,8 @@ p480_bias (
}

void
p480_weak_reduce (
p480_t *a
gf_480_weak_reduce (
gf_480_t *a
) {
/* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull<<60) - 1;
@@ -180,4 +160,4 @@ p480_weak_reduce (
}; /* extern "C" */
#endif

#endif /* __p480_H__ */
#endif /* __gf_480_H__ */

+ 20
- 20
src/p521/arch_ref64/f_impl.c 파일 보기

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "f_field.h"

static __inline__ __uint128_t widemul(
const uint64_t a,
@@ -17,10 +17,10 @@ static __inline__ uint64_t is_zero(uint64_t a) {
}

void
p521_mul (
p521_t *__restrict__ cs,
const p521_t *as,
const p521_t *bs
gf_521_mul (
gf_521_t *__restrict__ cs,
const gf_521_t *as,
const gf_521_t *bs
) {
uint64_t *c = cs->limb;
const uint64_t *a = as->limb, *b = bs->limb;
@@ -158,9 +158,9 @@ p521_mul (
}

void
p521_mulw (
p521_t *__restrict__ cs,
const p521_t *as,
gf_521_mulw (
gf_521_t *__restrict__ cs,
const gf_521_t *as,
uint64_t b
) {
const uint64_t *a = as->limb;
@@ -197,9 +197,9 @@ p521_mulw (
}

void
p521_sqr (
p521_t *__restrict__ cs,
const p521_t *as
gf_521_sqr (
gf_521_t *__restrict__ cs,
const gf_521_t *as
) {
uint64_t *c = cs->limb;
const uint64_t *a = as->limb;
@@ -306,8 +306,8 @@ p521_sqr (
}

void
p521_strong_reduce (
p521_t *a
gf_521_strong_reduce (
gf_521_t *a
) {
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1;

@@ -347,14 +347,14 @@ p521_strong_reduce (
}

void
p521_serialize (
gf_521_serialize (
uint8_t *serial,
const struct p521_t *x
const struct gf_521_t *x
) {
int i,k=0;
p521_t red;
p521_copy(&red, x);
p521_strong_reduce(&red);
gf_521_t red;
gf_521_copy(&red, x);
gf_521_strong_reduce(&red);
uint64_t r=0;
int bits = 0;
@@ -371,8 +371,8 @@ p521_serialize (
}

mask_t
p521_deserialize (
p521_t *x,
gf_521_deserialize (
gf_521_t *x,
const uint8_t serial[66]
) {
int i,k=0,bits=0;


+ 39
- 59
src/p521/arch_ref64/f_impl.h 파일 보기

@@ -10,122 +10,102 @@

#include "word.h"

typedef struct p521_t {
typedef struct gf_521_t {
uint64_t limb[9];
} p521_t;
} gf_521_t;

#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
p521_add_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
) __attribute__((unused));
static __inline__ void
p521_sub_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
) __attribute__((unused));
static __inline__ void
p521_copy (
p521_t *out,
const p521_t *a
) __attribute__((unused));
static __inline__ void
p521_weak_reduce (
p521_t *inout
gf_521_weak_reduce (
gf_521_t *inout
) __attribute__((unused));
void
p521_strong_reduce (
p521_t *inout
gf_521_strong_reduce (
gf_521_t *inout
);

static __inline__ void
p521_bias (
p521_t *inout,
gf_521_bias (
gf_521_t *inout,
int amount
) __attribute__((unused));
void
p521_mul (
p521_t *__restrict__ out,
const p521_t *a,
const p521_t *b
gf_521_mul (
gf_521_t *__restrict__ out,
const gf_521_t *a,
const gf_521_t *b
);

void
p521_mulw (
p521_t *__restrict__ out,
const p521_t *a,
gf_521_mulw (
gf_521_t *__restrict__ out,
const gf_521_t *a,
uint64_t b
);

void
p521_sqr (
p521_t *__restrict__ out,
const p521_t *a
gf_521_sqr (
gf_521_t *__restrict__ out,
const gf_521_t *a
);

void
p521_serialize (
gf_521_serialize (
uint8_t *serial,
const struct p521_t *x
const struct gf_521_t *x
);

mask_t
p521_deserialize (
p521_t *x,
gf_521_deserialize (
gf_521_t *x,
const uint8_t serial[66]
);

/* -------------- Inline functions begin here -------------- */

void
p521_add_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
gf_521_add_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
unsigned int i;
for (i=0; i<9; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
p521_weak_reduce(out);
gf_521_weak_reduce(out);
}

void
p521_sub_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
gf_521_sub_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
unsigned int i;
uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4;
for (i=0; i<9; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1);
}
p521_weak_reduce(out);
gf_521_weak_reduce(out);
}

void
p521_copy (
p521_t *out,
const p521_t *a
gf_521_copy (
gf_521_t *out,
const gf_521_t *a
) {
memcpy(out,a,sizeof(*a));
}

void
p521_bias (
p521_t *a,
gf_521_bias (
gf_521_t *a,
int amt
) {
(void) a;
@@ -133,8 +113,8 @@ p521_bias (
}

void
p521_weak_reduce (
p521_t *a
gf_521_weak_reduce (
gf_521_t *a
) {
uint64_t mask = (1ull<<58) - 1;
uint64_t tmp = a->limb[8] >> 57;


+ 20
- 20
src/p521/arch_x86_64_r12/f_impl.c 파일 보기

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "f_impl.h"
#include "f_field.h"

typedef struct {
uint64x3_t lo, hi, hier;
@@ -168,10 +168,10 @@ static inline void hexad_sqr_signed (


void
p521_mul (
p521_t *__restrict__ cs,
const p521_t *as,
const p521_t *bs
gf_521_mul (
gf_521_t *__restrict__ cs,
const gf_521_t *as,
const gf_521_t *bs
) {
int i;
@@ -254,9 +254,9 @@ p521_mul (


void
p521_sqr (
p521_t *__restrict__ cs,
const p521_t *as
gf_521_sqr (
gf_521_t *__restrict__ cs,
const gf_521_t *as
) {

@@ -313,9 +313,9 @@ p521_sqr (
}

void
p521_mulw (
p521_t *__restrict__ cs,
const p521_t *as,
gf_521_mulw (
gf_521_t *__restrict__ cs,
const gf_521_t *as,
uint64_t b
) {
@@ -375,8 +375,8 @@ p521_mulw (


void
p521_strong_reduce (
p521_t *a
gf_521_strong_reduce (
gf_521_t *a
) {
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1;

@@ -418,14 +418,14 @@ p521_strong_reduce (
}

void
p521_serialize (
gf_521_serialize (
uint8_t *serial,
const struct p521_t *x
const struct gf_521_t *x
) {
unsigned int i,k=0;
p521_t red;
p521_copy(&red, x);
p521_strong_reduce(&red);
gf_521_t red;
gf_521_copy(&red, x);
gf_521_strong_reduce(&red);
uint64_t r=0;
int bits = 0;
@@ -442,8 +442,8 @@ p521_serialize (
}

mask_t
p521_deserialize (
p521_t *x,
gf_521_deserialize (
gf_521_t *x,
const uint8_t serial[LIMBPERM(66)]
) {
int i,k=0,bits=0;


+ 14
- 90
src/p521/arch_x86_64_r12/f_impl.h 파일 보기

@@ -14,82 +14,14 @@
#define LIMBPERM(x) (((x)%3)*4 + (x)/3)
#define USE_P521_3x3_TRANSPOSE

typedef struct p521_t {
typedef struct gf_521_s {
uint64_t limb[12];
} __attribute__((aligned(32))) p521_t;
} __attribute__((aligned(32))) gf_521_t;

#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
p521_add_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
) __attribute__((unused));
static __inline__ void
p521_sub_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
) __attribute__((unused));
static __inline__ void
p521_copy (
p521_t *out,
const p521_t *a
) __attribute__((unused));
static __inline__ void
p521_weak_reduce (
p521_t *inout
) __attribute__((unused));
void
p521_strong_reduce (
p521_t *inout
);

static __inline__ void
p521_bias (
p521_t *inout,
int amount
) __attribute__((unused));
void
p521_mul (
p521_t *__restrict__ out,
const p521_t *a,
const p521_t *b
);

void
p521_mulw (
p521_t *__restrict__ out,
const p521_t *a,
uint64_t b
);

void
p521_sqr (
p521_t *__restrict__ out,
const p521_t *a
);

void
p521_serialize (
uint8_t *serial,
const struct p521_t *x
);

mask_t
p521_deserialize (
p521_t *x,
const uint8_t serial[66]
);

/* -------------- Inline functions begin here -------------- */

typedef uint64x4_t uint64x3_t; /* fit it in a vector register */
@@ -106,10 +38,10 @@ timesW (
}

void
p521_add_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
gf_521_add_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
@@ -118,10 +50,10 @@ p521_add_RAW (
}

void
p521_sub_RAW (
p521_t *out,
const p521_t *a,
const p521_t *b
gf_521_sub_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
@@ -130,16 +62,8 @@ p521_sub_RAW (
}

void
p521_copy (
p521_t *out,
const p521_t *a
) {
memcpy(out,a,sizeof(*a));
}

void
p521_bias (
p521_t *a,
gf_521_bias (
gf_521_t *a,
int amt
) {
uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt;
@@ -150,8 +74,8 @@ p521_bias (
}

void
p521_weak_reduce (
p521_t *a
gf_521_weak_reduce (
gf_521_t *a
) {
#if 0
int i;


불러오는 중...
취소
저장