Selaa lähdekoodia

cleanup word.h

master
Michael Hamburg 8 vuotta sitten
vanhempi
commit
196e9a85f8
3 muutettua tiedostoa jossa 74 lisäystä ja 96 poistoa
  1. +0
    -2
      src/decaf_fast.c
  2. +2
    -7
      src/include/field.h
  3. +72
    -87
      src/include/word.h

+ 0
- 2
src/decaf_fast.c Näytä tiedosto

@@ -41,8 +41,6 @@ extern const gf SQRT_MINUS_ONE;
extern const gf SQRT_ONE_MINUS_D; /* TODO: Intern this? */
#endif

#define NOINLINE __attribute__((noinline))
#define INLINE inline __attribute__((always_inline))
#define WBITS DECAF_WORD_BITS

const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};


+ 2
- 7
src/include/field.h Näytä tiedosto

@@ -22,17 +22,12 @@
*
* If x=0, returns 0.
*/
void
gf_isr (
gf a,
const gf x
);
void gf_isr(gf a, const gf x);
/**
* Square x, n times.
*/
static __inline__ void
__attribute__((unused,always_inline))
static INLINE UNUSED void
gf_sqrn (
gf_s *__restrict__ y,
const gf x,


+ 72
- 87
src/include/word.h Näytä tiedosto

@@ -31,7 +31,7 @@

#if (WORD_BITS == 64)
typedef uint32_t hword_t;
typedef uint64_t word_t;
typedef uint64_t word_t, mask_t;
typedef __uint128_t dword_t;
typedef int32_t hsword_t;
typedef int64_t sword_t;
@@ -49,7 +49,7 @@
#define SC_LIMB(x) (x##ull)
#elif (WORD_BITS == 32)
typedef uint16_t hword_t;
typedef uint32_t word_t;
typedef uint32_t word_t, mask_t;
typedef uint64_t dword_t;
typedef int16_t hsword_t;
typedef int32_t sword_t;
@@ -65,44 +65,41 @@
#define letohWORD letoh32
#define SC_LIMB(x) (x##ull)
#else
#error "For now, libdecaf only supports 32- and 64-bit architectures."
#error "For now, libdecaf only supports 32- and 64-bit architectures."
#endif

#define DIV_CEIL(_x,_y) (((_x) + (_y) - 1)/(_y))
#define ROUND_UP(_x,_y) (DIV_CEIL((_x),(_y))*(_y))
#define WORDS_FOR_BITS(_x) (DIV_CEIL((_x),WORD_BITS))

typedef word_t mask_t;
static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -(mask_t)1;

/* General utilities */
#define NOINLINE __attribute__((noinline))
#define UNUSED __attribute__((unused))
#define INLINE __inline__ __attribute__((always_inline))


#ifdef __ARM_NEON__
typedef uint32x4_t vecmask_t;
typedef uint32x4_t vecmask_t;
#elif __clang__
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
typedef int64_t int64x2_t __attribute__((ext_vector_type(2)));
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
typedef int64_t int64x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
typedef int32_t int32x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
typedef int32_t int32x2_t __attribute__((ext_vector_type(2)));
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
typedef int32_t int32x8_t __attribute__((ext_vector_type(8)));
typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
#else /* GCC-cleanliness */
typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
typedef int64_t int64x2_t __attribute__((vector_size(16)));
typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
typedef int64_t int64x4_t __attribute__((vector_size(32)));
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
typedef int32_t int32x4_t __attribute__((vector_size(16)));
typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
typedef int32_t int32x2_t __attribute__((vector_size(8)));
typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
typedef int32_t int32x8_t __attribute__((vector_size(32)));
typedef word_t vecmask_t __attribute__((vector_size(32)));
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
typedef int64_t int64x2_t __attribute__((ext_vector_type(2)));
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
typedef int64_t int64x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
typedef int32_t int32x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
typedef int32_t int32x2_t __attribute__((ext_vector_type(2)));
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
typedef int32_t int32x8_t __attribute__((ext_vector_type(8)));
typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
#else /* GCC, hopefully? */
typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
typedef int64_t int64x2_t __attribute__((vector_size(16)));
typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
typedef int64_t int64x4_t __attribute__((vector_size(32)));
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
typedef int32_t int32x4_t __attribute__((vector_size(16)));
typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
typedef int32_t int32x2_t __attribute__((vector_size(8)));
typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
typedef int32_t int32x8_t __attribute__((vector_size(32)));
typedef word_t vecmask_t __attribute__((vector_size(32)));
#endif

#if __AVX2__
@@ -111,7 +108,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32)));
typedef uint64x4_t uint64xn_t;
typedef uint32x8_t uint32xn_t;

static __inline__ big_register_t
static INLINE big_register_t
br_set_to_mask(mask_t x) {
uint32_t y = (uint32_t)x;
big_register_t ret = {y,y,y,y,y,y,y,y};
@@ -123,7 +120,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32)));
typedef uint64x2_t uint64xn_t;
typedef uint32x4_t uint32xn_t;

static __inline__ big_register_t
static INLINE big_register_t
br_set_to_mask(mask_t x) {
uint32_t y = x;
big_register_t ret = {y,y,y,y};
@@ -134,7 +131,8 @@ typedef word_t vecmask_t __attribute__((vector_size(32)));
typedef uint32x4_t big_register_t;
typedef uint64x2_t uint64xn_t;
typedef uint32x4_t uint32xn_t;
static __inline__ big_register_t
static INLINE big_register_t
br_set_to_mask(mask_t x) {
return vdupq_n_u32(x);
}
@@ -143,7 +141,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32)));
typedef uint64_t big_register_t, uint64xn_t;

typedef uint32_t uint32xn_t;
static __inline__ big_register_t
static INLINE big_register_t
br_set_to_mask(mask_t x) {
return (big_register_t)x;
}
@@ -153,7 +151,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32)));
typedef uint32_t uint32xn_t;
typedef uint32_t big_register_t;

static __inline__ big_register_t
static INLINE big_register_t
br_set_to_mask(mask_t x) {
return (big_register_t)x;
}
@@ -170,49 +168,40 @@ typedef struct {
/**
* Return -1 if x==0, and 0 otherwise.
*/
static __inline__ mask_t
__attribute__((always_inline,unused))
static INLINE UNUSED mask_t
word_is_zero(word_t x) {
return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS);
}

#if __AVX2__
static __inline__ big_register_t
br_is_zero(big_register_t x) {
return (big_register_t)(x == br_set_to_mask(0));
}
static INLINE big_register_t
br_is_zero(big_register_t x) {
return (big_register_t)(x == br_set_to_mask(0));
}
#elif __SSE2__
static __inline__ big_register_t
br_is_zero(big_register_t x) {
return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
//return (big_register_t)(x == br_set_to_mask(0));
}
static INLINE big_register_t
br_is_zero(big_register_t x) {
return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
//return (big_register_t)(x == br_set_to_mask(0));
}
#elif __ARM_NEON__
static __inline__ big_register_t
br_is_zero(big_register_t x) {
return vceqq_u32(x,x^x);
}
static INLINE big_register_t
br_is_zero(big_register_t x) {
return vceqq_u32(x,x^x);
}
#else
static __inline__ mask_t
br_is_zero(word_t x) {
return (((dword_t)x) - 1)>>WORD_BITS;
}
static INLINE mask_t
br_is_zero(word_t x) {
return (((dword_t)x) - 1)>>WORD_BITS;
}
#endif




#ifdef __APPLE__
static inline uint64_t
htobe64 (uint64_t x) {
__asm__ ("bswapq %0" : "+r"(x));
return x;
}
static inline uint64_t
htole64 (uint64_t x) { return x; }

static inline uint64_t
letoh64 (uint64_t x) { return x; }
static INLINE uint64_t htole64 (uint64_t x) { return x; }
static INLINE uint64_t letoh64 (uint64_t x) { return x; }
#endif

/**
@@ -230,20 +219,21 @@ letoh64 (uint64_t x) { return x; }
#endif

#ifdef HAS_MEMSET_S
#ifdef NEED_MEMSET_S_EXTERN
extern int memset_s(void *, size_t, int, size_t);
#endif
static __inline__ void
really_memset(void *p, char c, size_t s) {
memset_s(p, s, c, s);
}
#ifdef NEED_MEMSET_S_EXTERN
extern int memset_s(void *, size_t, int, size_t);
#endif
static INLINE void
really_memset(void *p, char c, size_t s) {
memset_s(p, s, c, s);
}
#else
static __inline__ void __attribute__((always_inline,unused))
really_memset(void *p, char c, size_t s) {
volatile char *pv = (volatile char *)p;
size_t i;
for (i=0; i<s; i++) pv[i] = c;
}
/* PERF: use words? */
static INLINE UNUSED void
really_memset(void *p, char c, size_t s) {
volatile char *pv = (volatile char *)p;
size_t i;
for (i=0; i<s; i++) pv[i] = c;
}
#endif

/**
@@ -257,12 +247,7 @@ really_memset(void *p, char c, size_t s) {
* @return A suitable pointer, which can be free'd with free(),
* or NULL if no memory can be allocated.
*/
static __inline__ void *
malloc_vector (
size_t size
) __attribute__((always_inline, unused));

void *
static INLINE UNUSED void *
malloc_vector(size_t size) {
void *out = NULL;


Ladataan…
Peruuta
Tallenna