Browse Source

add explicit GF_HEADROOM per field+arch for unreduced arith; fortunately unreduced arith is very rare

master
Michael Hamburg 9 years ago
parent
commit
f8dc967ae1
10 changed files with 61 additions and 55 deletions
  1. +2
    -7
      src/include/field.h
  2. +1
    -0
      src/p25519/arch_32/f_impl.h
  3. +1
    -0
      src/p25519/arch_ref64/f_impl.h
  4. +1
    -0
      src/p25519/arch_x86_64/f_impl.h
  5. +1
    -0
      src/p448/arch_32/f_impl.h
  6. +1
    -0
      src/p448/arch_arm_32/f_impl.h
  7. +1
    -0
      src/p448/arch_neon/f_impl.h
  8. +1
    -0
      src/p448/arch_ref64/f_impl.h
  9. +1
    -0
      src/p448/arch_x86_64/f_impl.h
  10. +51
    -48
      src/per_curve/decaf.tmpl.c

+ 2
- 7
src/include/field.h View File

@@ -42,19 +42,14 @@ static INLINE UNUSED void gf_sqrn (
static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
gf_sub_RAW(c,a,b);
gf_bias(c, 2);
if (sizeof(word_t)==4) gf_weak_reduce(c); // HACK PERF MAGIC
// Depending on headroom, this is needed in some of the Ed routines, but
// not in the Montgomery ladder. Need to find a better way to prevent
// overflow. In particular, the headroom depends on the field+arch combo,
// not just one or the other, and whether the reduction is needed depends
// also on the algorithm.
if (GF_HEADROOM < 3) gf_weak_reduce(c);
}

/** Subtract mod p. Bias by amt but don't reduce. */
static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
gf_sub_RAW(c,a,b);
gf_bias(c, amt);
if (sizeof(word_t)==4) gf_weak_reduce(c); // HACK PERF MAGIC
if (GF_HEADROOM < amt+1) gf_weak_reduce(c);
}

/** Mul by signed int. Not constant-time WRT the sign of that int. */


+ 1
- 0
src/p25519/arch_32/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 5
#define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26
#define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}}



+ 1
- 0
src/p25519/arch_ref64/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 933
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

#define LIMB_PLACE_VALUE(i) 51


+ 1
- 0
src/p25519/arch_x86_64/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 933
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

#define LIMB_PLACE_VALUE(i) 51


+ 1
- 0
src/p448/arch_32/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 2
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}


+ 1
- 0
src/p448/arch_arm_32/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 2
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}


+ 1
- 0
src/p448/arch_neon/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 2
#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
#define USE_NEON_PERM 1
#define LIMBHI(x) ((x##ull)>>28)


+ 1
- 0
src/p448/arch_ref64/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 9999 /* Everything is reduced anyway */
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
#define LIMB_PLACE_VALUE(i) 56


+ 1
- 0
src/p448/arch_x86_64/f_impl.h View File

@@ -2,6 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define GF_HEADROOM 60
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
#define LIMB_PLACE_VALUE(i) 56



+ 51
- 48
src/per_curve/decaf.tmpl.c View File

@@ -295,24 +295,25 @@ void API_NS(point_sub) (
const point_t r
) {
gf a, b, c, d;
gf_sub_nr ( b, q->y, q->x );
gf_sub_nr ( d, r->y, r->x );
gf_add_nr ( c, r->y, r->x );
gf_sub_nr ( b, q->y, q->x ); /* 3+e */
gf_sub_nr ( d, r->y, r->x ); /* 3+e */
gf_add_nr ( c, r->y, r->x ); /* 2+e */
gf_mul ( a, c, b );
gf_add_nr ( b, q->y, q->x );
gf_add_nr ( b, q->y, q->x ); /* 2+e */
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mulw ( p->x, b, 2*EFF_D );
gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a );
gf_add_nr ( b, a, p->y ); /* 2+e */
gf_sub_nr ( c, p->y, a ); /* 3+e */
gf_mul ( a, q->z, r->z );
gf_add_nr ( a, a, a );
gf_add_nr ( a, a, a ); /* 2+e */
if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
#if NEG_D
gf_sub_nr ( p->y, a, p->x );
gf_add_nr ( a, a, p->x );
gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
#else
gf_add_nr ( p->y, a, p->x );
gf_sub_nr ( a, a, p->x );
gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
#endif
gf_mul ( p->z, a, p->y );
gf_mul ( p->x, p->y, c );
@@ -326,24 +327,25 @@ void API_NS(point_add) (
const point_t r
) {
gf a, b, c, d;
gf_sub_nr ( b, q->y, q->x );
gf_sub_nr ( c, r->y, r->x );
gf_add_nr ( d, r->y, r->x );
gf_sub_nr ( b, q->y, q->x ); /* 3+e */
gf_sub_nr ( c, r->y, r->x ); /* 3+e */
gf_add_nr ( d, r->y, r->x ); /* 2+e */
gf_mul ( a, c, b );
gf_add_nr ( b, q->y, q->x );
gf_add_nr ( b, q->y, q->x ); /* 2+e */
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mulw ( p->x, b, 2*EFF_D );
gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a );
gf_add_nr ( b, a, p->y ); /* 2+e */
gf_sub_nr ( c, p->y, a ); /* 3+e */
gf_mul ( a, q->z, r->z );
gf_add_nr ( a, a, a );
gf_add_nr ( a, a, a ); /* 2+e */
if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
#if NEG_D
gf_add_nr ( p->y, a, p->x );
gf_sub_nr ( a, a, p->x );
gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
#else
gf_sub_nr ( p->y, a, p->x );
gf_add_nr ( a, a, p->x );
gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
#endif
gf_mul ( p->z, a, p->y );
gf_mul ( p->x, p->y, c );
@@ -360,14 +362,15 @@ point_double_internal (
gf a, b, c, d;
gf_sqr ( c, q->x );
gf_sqr ( a, q->y );
gf_add_nr ( d, c, a );
gf_add_nr ( p->t, q->y, q->x );
gf_add_nr ( d, c, a ); /* 2+e */
gf_add_nr ( p->t, q->y, q->x ); /* 2+e */
gf_sqr ( b, p->t );
gf_subx_nr ( b, b, d, 3 );
gf_sub_nr ( p->t, a, c );
gf_subx_nr ( b, b, d, 3 ); /* 4+e */
gf_sub_nr ( p->t, a, c ); /* 3+e */
gf_sqr ( p->x, q->z );
gf_add_nr ( p->z, p->x, p->x );
gf_subx_nr ( a, p->z, p->t, 4 );
gf_add_nr ( p->z, p->x, p->x ); /* 2+e */
gf_subx_nr ( a, p->z, p->t, 4 ); /* 6+e */
if (GF_HEADROOM == 5) gf_weak_reduce(a); /* or 1+e */
gf_mul ( p->x, a, b );
gf_mul ( p->z, p->t, a );
gf_mul ( p->y, p->t, d );
@@ -439,15 +442,15 @@ add_niels_to_pt (
int before_double
) {
gf a, b, c;
gf_sub_nr ( b, d->y, d->x );
gf_sub_nr ( b, d->y, d->x ); /* 3+e */
gf_mul ( a, e->a, b );
gf_add_nr ( b, d->x, d->y );
gf_add_nr ( b, d->x, d->y ); /* 2+e */
gf_mul ( d->y, e->b, b );
gf_mul ( d->x, e->c, d->t );
gf_add_nr ( c, a, d->y );
gf_sub_nr ( b, d->y, a );
gf_sub_nr ( d->y, d->z, d->x );
gf_add_nr ( a, d->x, d->z );
gf_add_nr ( c, a, d->y ); /* 2+e */
gf_sub_nr ( b, d->y, a ); /* 3+e */
gf_sub_nr ( d->y, d->z, d->x ); /* 3+e */
gf_add_nr ( a, d->x, d->z ); /* 2+e */
gf_mul ( d->z, a, d->y );
gf_mul ( d->x, d->y, b );
gf_mul ( d->y, a, c );
@@ -461,15 +464,15 @@ sub_niels_from_pt (
int before_double
) {
gf a, b, c;
gf_sub_nr ( b, d->y, d->x );
gf_sub_nr ( b, d->y, d->x ); /* 3+e */
gf_mul ( a, e->b, b );
gf_add_nr ( b, d->x, d->y );
gf_add_nr ( b, d->x, d->y ); /* 2+e */
gf_mul ( d->y, e->a, b );
gf_mul ( d->x, e->c, d->t );
gf_add_nr ( c, a, d->y );
gf_sub_nr ( b, d->y, a );
gf_add_nr ( d->y, d->z, d->x );
gf_sub_nr ( a, d->z, d->x );
gf_add_nr ( c, a, d->y ); /* 2+e */
gf_sub_nr ( b, d->y, a ); /* 3+e */
gf_add_nr ( d->y, d->z, d->x ); /* 2+e */
gf_sub_nr ( a, d->z, d->x ); /* 3+e */
gf_mul ( d->z, a, d->y );
gf_mul ( d->x, d->y, b );
gf_mul ( d->y, a, c );
@@ -1073,25 +1076,25 @@ decaf_error_t API_NS(x_direct_scalarmul) (
gf_cond_swap(z2,z3,swap);
swap = k_t;
gf_add_nr(t1,x2,z2); /* A = x2 + z2 */
gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */
gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */
gf_add_nr(t1,x2,z2); /* A = x2 + z2 */ /* 2+e */
gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */ /* 3+e */
gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */ /* 3+e */
gf_mul(x2,t1,z2); /* DA */
gf_add_nr(z2,z3,x3); /* C = x3 + z3 */
gf_add_nr(z2,z3,x3); /* C = x3 + z3 */ /* 2+e */
gf_mul(x3,t2,z2); /* CB */
gf_sub_nr(z3,x2,x3); /* DA-CB */
gf_sub_nr(z3,x2,x3); /* DA-CB */ /* 3+e */
gf_sqr(z2,z3); /* (DA-CB)^2 */
gf_mul(z3,x1,z2); /* z3 = x1(DA-CB)^2 */
gf_add_nr(z2,x2,x3); /* (DA+CB) */
gf_add_nr(z2,x2,x3); /* (DA+CB) */ /* 2+e */
gf_sqr(x3,z2); /* x3 = (DA+CB)^2 */
gf_sqr(z2,t1); /* AA = A^2 */
gf_sqr(t1,t2); /* BB = B^2 */
gf_mul(x2,z2,t1); /* x2 = AA*BB */
gf_sub_nr(t2,z2,t1); /* E = AA-BB */
gf_sub_nr(t2,z2,t1); /* E = AA-BB */ /* 3+e */
gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
gf_add_nr(t1,t1,z2); /* AA + a24*E */
gf_add_nr(t1,t1,z2); /* AA + a24*E */ /* 2+e */
gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
}


Loading…
Cancel
Save