Browse Source

fixed the mixed-arch bug, but it comes from a long-standing todo

master
Michael Hamburg 9 years ago
parent
commit
86e44e69c6
3 changed files with 18 additions and 16 deletions
  1. +2
    -2
      src/include/field.h
  2. +0
    -5
      src/p25519/arch_x86_64/f_impl.c
  3. +16
    -9
      src/public_include/decaf/common.h

+ 2
- 2
src/include/field.h View File

@@ -43,7 +43,7 @@ gf_sqrn (
static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
gf_sub_RAW(c,a,b);
gf_bias(c, 2);
if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK PERF MAGIC
if (sizeof(word_t)==4) gf_weak_reduce(c); // HACK PERF MAGIC
// Depending on headroom, this is needed in some of the Ed routines, but
// not in the Montgomery ladder. Need to find a better way to prevent
// overflow. In particular, the headroom depends on the field+arch combo,
@@ -55,7 +55,7 @@ static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
gf_sub_RAW(c,a,b);
gf_bias(c, amt);
if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK PERF MAGIC
if (sizeof(word_t)==4) gf_weak_reduce(c); // HACK PERF MAGIC
}




+ 0
- 5
src/p25519/arch_x86_64/f_impl.c View File

@@ -72,11 +72,6 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
accum1 += shrld(accum0, 51);
c[4] = accum1 & mask;
/* 2^102 * 16 * 5 * 19 * (1+ep) >> 64
* = 2^(-13 + <13)
* PERF: good enough to fit into uint64_t.
*/
uint64_t a1 = shrld(accum1,51);
/* Here a1 < (5*(9*2^51)^2 + small) >> 51 = 405 * 2^51 + small
* a1 * 19 + c0 < (405*19+1)*2^51 + small < 2^13 * 2^51.


+ 16
- 9
src/public_include/decaf/common.h View File

@@ -45,21 +45,28 @@ extern "C" {
* platform to support dynamic linking, since even if you header was built
* with eg arch_neon, you might end up linking a library built with arch_arm32.
*/
#if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30)) \
&& !defined(DECAF_FORCE_32_BIT)
#define DECAF_WORD_BITS 64 /**< The number of bits in a word */
typedef uint64_t decaf_word_t; /**< Word size for internal computations */
#ifndef DECAF_WORD_BITS
#if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
#define DECAF_WORD_BITS 64 /**< The number of bits in a word */
#else
#define DECAF_WORD_BITS 32 /**< The number of bits in a word */
#endif
#endif
#if DECAF_WORD_BITS == 64
typedef uint64_t decaf_word_t; /**< Word size for internal computations */
typedef int64_t decaf_sword_t; /**< Signed word size for internal computations */
typedef uint64_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef __uint128_t decaf_dword_t; /**< Double-word size for internal computations */
typedef __int128_t decaf_dsword_t; /**< Signed double-word size for internal computations */
#else
#define DECAF_WORD_BITS 32 /**< The number of bits in a word */
typedef uint64_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef __uint128_t decaf_dword_t; /**< Double-word size for internal computations */
typedef __int128_t decaf_dsword_t; /**< Signed double-word size for internal computations */
#elif DECAF_WORD_BITS == 32 /**< The number of bits in a word */
typedef uint32_t decaf_word_t; /**< Word size for internal computations */
typedef int32_t decaf_sword_t; /**< Signed word size for internal computations */
typedef uint32_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef uint64_t decaf_dword_t; /**< Double-word size for internal computations */
typedef int64_t decaf_dsword_t; /**< Signed double-word size for internal computations */
#else
#error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
#endif
/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */


Loading…
Cancel
Save