|
|
@@ -31,7 +31,7 @@ |
|
|
|
|
|
|
|
#if (WORD_BITS == 64) |
|
|
|
typedef uint32_t hword_t; |
|
|
|
typedef uint64_t word_t; |
|
|
|
typedef uint64_t word_t, mask_t; |
|
|
|
typedef __uint128_t dword_t; |
|
|
|
typedef int32_t hsword_t; |
|
|
|
typedef int64_t sword_t; |
|
|
@@ -49,7 +49,7 @@ |
|
|
|
#define SC_LIMB(x) (x##ull) |
|
|
|
#elif (WORD_BITS == 32) |
|
|
|
typedef uint16_t hword_t; |
|
|
|
typedef uint32_t word_t; |
|
|
|
typedef uint32_t word_t, mask_t; |
|
|
|
typedef uint64_t dword_t; |
|
|
|
typedef int16_t hsword_t; |
|
|
|
typedef int32_t sword_t; |
|
|
@@ -65,44 +65,41 @@ |
|
|
|
#define letohWORD letoh32 |
|
|
|
#define SC_LIMB(x) (x##ull) |
|
|
|
#else |
|
|
|
#error "For now, libdecaf only supports 32- and 64-bit architectures." |
|
|
|
#error "For now, libdecaf only supports 32- and 64-bit architectures." |
|
|
|
#endif |
|
|
|
|
|
|
|
#define DIV_CEIL(_x,_y) (((_x) + (_y) - 1)/(_y)) |
|
|
|
#define ROUND_UP(_x,_y) (DIV_CEIL((_x),(_y))*(_y)) |
|
|
|
#define WORDS_FOR_BITS(_x) (DIV_CEIL((_x),WORD_BITS)) |
|
|
|
|
|
|
|
typedef word_t mask_t; |
|
|
|
static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -(mask_t)1; |
|
|
|
|
|
|
|
/* General utilities */ |
|
|
|
#define NOINLINE __attribute__((noinline)) |
|
|
|
#define UNUSED __attribute__((unused)) |
|
|
|
#define INLINE __inline__ __attribute__((always_inline)) |
|
|
|
|
|
|
|
|
|
|
|
#ifdef __ARM_NEON__ |
|
|
|
typedef uint32x4_t vecmask_t; |
|
|
|
typedef uint32x4_t vecmask_t; |
|
|
|
#elif __clang__ |
|
|
|
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef int32_t int32x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); |
|
|
|
typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); |
|
|
|
typedef word_t vecmask_t __attribute__((ext_vector_type(4))); |
|
|
|
#else /* GCC-cleanliness */ |
|
|
|
typedef uint64_t uint64x2_t __attribute__((vector_size(16))); |
|
|
|
typedef int64_t int64x2_t __attribute__((vector_size(16))); |
|
|
|
typedef uint64_t uint64x4_t __attribute__((vector_size(32))); |
|
|
|
typedef int64_t int64x4_t __attribute__((vector_size(32))); |
|
|
|
typedef uint32_t uint32x4_t __attribute__((vector_size(16))); |
|
|
|
typedef int32_t int32x4_t __attribute__((vector_size(16))); |
|
|
|
typedef uint32_t uint32x2_t __attribute__((vector_size(8))); |
|
|
|
typedef int32_t int32x2_t __attribute__((vector_size(8))); |
|
|
|
typedef uint32_t uint32x8_t __attribute__((vector_size(32))); |
|
|
|
typedef int32_t int32x8_t __attribute__((vector_size(32))); |
|
|
|
typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); |
|
|
|
typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef int32_t int32x2_t __attribute__((ext_vector_type(2))); |
|
|
|
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); |
|
|
|
typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); |
|
|
|
typedef word_t vecmask_t __attribute__((ext_vector_type(4))); |
|
|
|
#else /* GCC, hopefully? */ |
|
|
|
typedef uint64_t uint64x2_t __attribute__((vector_size(16))); |
|
|
|
typedef int64_t int64x2_t __attribute__((vector_size(16))); |
|
|
|
typedef uint64_t uint64x4_t __attribute__((vector_size(32))); |
|
|
|
typedef int64_t int64x4_t __attribute__((vector_size(32))); |
|
|
|
typedef uint32_t uint32x4_t __attribute__((vector_size(16))); |
|
|
|
typedef int32_t int32x4_t __attribute__((vector_size(16))); |
|
|
|
typedef uint32_t uint32x2_t __attribute__((vector_size(8))); |
|
|
|
typedef int32_t int32x2_t __attribute__((vector_size(8))); |
|
|
|
typedef uint32_t uint32x8_t __attribute__((vector_size(32))); |
|
|
|
typedef int32_t int32x8_t __attribute__((vector_size(32))); |
|
|
|
typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
#endif |
|
|
|
|
|
|
|
#if __AVX2__ |
|
|
@@ -111,7 +108,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
typedef uint64x4_t uint64xn_t; |
|
|
|
typedef uint32x8_t uint32xn_t; |
|
|
|
|
|
|
|
static __inline__ big_register_t |
|
|
|
static INLINE big_register_t |
|
|
|
br_set_to_mask(mask_t x) { |
|
|
|
uint32_t y = (uint32_t)x; |
|
|
|
big_register_t ret = {y,y,y,y,y,y,y,y}; |
|
|
@@ -123,7 +120,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
typedef uint64x2_t uint64xn_t; |
|
|
|
typedef uint32x4_t uint32xn_t; |
|
|
|
|
|
|
|
static __inline__ big_register_t |
|
|
|
static INLINE big_register_t |
|
|
|
br_set_to_mask(mask_t x) { |
|
|
|
uint32_t y = x; |
|
|
|
big_register_t ret = {y,y,y,y}; |
|
|
@@ -134,7 +131,8 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
typedef uint32x4_t big_register_t; |
|
|
|
typedef uint64x2_t uint64xn_t; |
|
|
|
typedef uint32x4_t uint32xn_t; |
|
|
|
static __inline__ big_register_t |
|
|
|
|
|
|
|
static INLINE big_register_t |
|
|
|
br_set_to_mask(mask_t x) { |
|
|
|
return vdupq_n_u32(x); |
|
|
|
} |
|
|
@@ -143,7 +141,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
typedef uint64_t big_register_t, uint64xn_t; |
|
|
|
|
|
|
|
typedef uint32_t uint32xn_t; |
|
|
|
static __inline__ big_register_t |
|
|
|
static INLINE big_register_t |
|
|
|
br_set_to_mask(mask_t x) { |
|
|
|
return (big_register_t)x; |
|
|
|
} |
|
|
@@ -153,7 +151,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); |
|
|
|
typedef uint32_t uint32xn_t; |
|
|
|
typedef uint32_t big_register_t; |
|
|
|
|
|
|
|
static __inline__ big_register_t |
|
|
|
static INLINE big_register_t |
|
|
|
br_set_to_mask(mask_t x) { |
|
|
|
return (big_register_t)x; |
|
|
|
} |
|
|
@@ -170,49 +168,40 @@ typedef struct { |
|
|
|
/** |
|
|
|
* Return -1 if x==0, and 0 otherwise. |
|
|
|
*/ |
|
|
|
static __inline__ mask_t |
|
|
|
__attribute__((always_inline,unused)) |
|
|
|
static INLINE UNUSED mask_t |
|
|
|
word_is_zero(word_t x) { |
|
|
|
return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS); |
|
|
|
} |
|
|
|
|
|
|
|
#if __AVX2__ |
|
|
|
static __inline__ big_register_t |
|
|
|
br_is_zero(big_register_t x) { |
|
|
|
return (big_register_t)(x == br_set_to_mask(0)); |
|
|
|
} |
|
|
|
static INLINE big_register_t |
|
|
|
br_is_zero(big_register_t x) { |
|
|
|
return (big_register_t)(x == br_set_to_mask(0)); |
|
|
|
} |
|
|
|
#elif __SSE2__ |
|
|
|
static __inline__ big_register_t |
|
|
|
br_is_zero(big_register_t x) { |
|
|
|
return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128()); |
|
|
|
//return (big_register_t)(x == br_set_to_mask(0)); |
|
|
|
} |
|
|
|
static INLINE big_register_t |
|
|
|
br_is_zero(big_register_t x) { |
|
|
|
return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128()); |
|
|
|
//return (big_register_t)(x == br_set_to_mask(0)); |
|
|
|
} |
|
|
|
#elif __ARM_NEON__ |
|
|
|
static __inline__ big_register_t |
|
|
|
br_is_zero(big_register_t x) { |
|
|
|
return vceqq_u32(x,x^x); |
|
|
|
} |
|
|
|
static INLINE big_register_t |
|
|
|
br_is_zero(big_register_t x) { |
|
|
|
return vceqq_u32(x,x^x); |
|
|
|
} |
|
|
|
#else |
|
|
|
static __inline__ mask_t |
|
|
|
br_is_zero(word_t x) { |
|
|
|
return (((dword_t)x) - 1)>>WORD_BITS; |
|
|
|
} |
|
|
|
static INLINE mask_t |
|
|
|
br_is_zero(word_t x) { |
|
|
|
return (((dword_t)x) - 1)>>WORD_BITS; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __APPLE__ |
|
|
|
static inline uint64_t |
|
|
|
htobe64 (uint64_t x) { |
|
|
|
__asm__ ("bswapq %0" : "+r"(x)); |
|
|
|
return x; |
|
|
|
} |
|
|
|
static inline uint64_t |
|
|
|
htole64 (uint64_t x) { return x; } |
|
|
|
|
|
|
|
static inline uint64_t |
|
|
|
letoh64 (uint64_t x) { return x; } |
|
|
|
static INLINE uint64_t htole64 (uint64_t x) { return x; } |
|
|
|
static INLINE uint64_t letoh64 (uint64_t x) { return x; } |
|
|
|
#endif |
|
|
|
|
|
|
|
/** |
|
|
@@ -230,20 +219,21 @@ letoh64 (uint64_t x) { return x; } |
|
|
|
#endif |
|
|
|
|
|
|
|
#ifdef HAS_MEMSET_S |
|
|
|
#ifdef NEED_MEMSET_S_EXTERN |
|
|
|
extern int memset_s(void *, size_t, int, size_t); |
|
|
|
#endif |
|
|
|
static __inline__ void |
|
|
|
really_memset(void *p, char c, size_t s) { |
|
|
|
memset_s(p, s, c, s); |
|
|
|
} |
|
|
|
#ifdef NEED_MEMSET_S_EXTERN |
|
|
|
extern int memset_s(void *, size_t, int, size_t); |
|
|
|
#endif |
|
|
|
static INLINE void |
|
|
|
really_memset(void *p, char c, size_t s) { |
|
|
|
memset_s(p, s, c, s); |
|
|
|
} |
|
|
|
#else |
|
|
|
static __inline__ void __attribute__((always_inline,unused)) |
|
|
|
really_memset(void *p, char c, size_t s) { |
|
|
|
volatile char *pv = (volatile char *)p; |
|
|
|
size_t i; |
|
|
|
for (i=0; i<s; i++) pv[i] = c; |
|
|
|
} |
|
|
|
/* PERF: use words? */ |
|
|
|
static INLINE UNUSED void |
|
|
|
really_memset(void *p, char c, size_t s) { |
|
|
|
volatile char *pv = (volatile char *)p; |
|
|
|
size_t i; |
|
|
|
for (i=0; i<s; i++) pv[i] = c; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
/** |
|
|
@@ -257,12 +247,7 @@ really_memset(void *p, char c, size_t s) { |
|
|
|
* @return A suitable pointer, which can be free'd with free(), |
|
|
|
* or NULL if no memory can be allocated. |
|
|
|
*/ |
|
|
|
static __inline__ void * |
|
|
|
malloc_vector ( |
|
|
|
size_t size |
|
|
|
) __attribute__((always_inline, unused)); |
|
|
|
|
|
|
|
void * |
|
|
|
static INLINE UNUSED void * |
|
|
|
malloc_vector(size_t size) { |
|
|
|
void *out = NULL; |
|
|
|
|
|
|
|