Browse Source

precomputed scalarmul almost ported, but doesnt work yet

master
Mike Hamburg 10 years ago
parent
commit
f5b9826959
1 changed files with 191 additions and 22 deletions
  1. +191
    -22
      src/decaf_fast.c

+ 191
- 22
src/decaf_fast.c View File

@@ -89,12 +89,13 @@ const decaf_448_point_t decaf_448_point_base = {{
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) }
}};

struct decaf_448_precomputed_s {
decaf_448_point_t p[1];
};
/* Projective Niels coordinates */
typedef struct { gf a, b, c; } niels_s, niels_t[1];
typedef struct { niels_t n; gf z; } pniels_s, pniels_t[1];
struct decaf_448_precomputed_s { niels_t table [5<<4]; /* MAGIC */ };

const struct decaf_448_precomputed_s *decaf_448_precomputed_base =
(const struct decaf_448_precomputed_s *)decaf_448_point_base;
const struct decaf_448_precomputed_s decaf_448_precomputed_base_s,
*decaf_448_precomputed_base = &decaf_448_precomputed_base_s;

const size_t sizeof_decaf_448_precomputed_s = sizeof(struct decaf_448_precomputed_s);
const size_t alignof_decaf_448_precomputed_s = 32;
@@ -704,16 +705,13 @@ void decaf_448_point_scalarmul_xxx (
decaf_448_point_sub(a,w,tmp);
}

/* Projective Niels coordinates */
typedef struct { gf a, b, c; } niels_s, niels_t[1];
typedef struct { niels_t n; gf z; } pniels_s, pniels_t[1];

static void cond_neg_pniels (
pniels_t b,
/* Operations on [p]niels */
static void cond_neg_niels (
niels_t n,
decaf_bool_t neg
) {
cond_swap(b->n->a, b->n->b, neg);
cond_neg(b->n->c, neg);
cond_swap(n->a, n->b, neg);
cond_neg(n->c, neg);
}

static void pt_to_pniels (
@@ -739,6 +737,16 @@ static void pniels_to_pt (
gf_sqr ( e->z, d->z );
}

static void niels_to_pt (
decaf_448_point_t e,
const niels_t n
) {
gf_add ( e->y, n->b, n->a );
gf_sub ( e->x, n->b, n->a );
gf_mul ( e->t, e->y, e->x );
gf_cpy ( e->z, ONE );
}

static void add_niels_to_pt (
decaf_448_point_t d,
const niels_t e,
@@ -814,7 +822,7 @@ void decaf_448_point_scalarmul (
bits ^= inv;
constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
cond_neg_pniels(pn, inv);
cond_neg_niels(pn->n, inv);
pniels_to_pt(tmp, pn);

for (i-=WINDOW; i>=0; i-=WINDOW) {
@@ -837,7 +845,7 @@ void decaf_448_point_scalarmul (
/* Add in from table. Compute t only on last iteration. */
constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
cond_neg_pniels(pn, inv);
cond_neg_niels(pn->n, inv);
add_pniels_to_pt(tmp, pn, i ? -1 : 0);
}
@@ -961,19 +969,180 @@ decaf_bool_t decaf_448_point_valid (
return out;
}

void decaf_448_precompute (
decaf_448_precomputed_s *a,
const decaf_448_point_t b
// void decaf_448_precompute (
// decaf_448_precomputed_s *a,
// const decaf_448_point_t b
// ) {
// decaf_448_point_copy(a->p[0],b);
// }

// void decaf_448_precomputed_scalarmul (
// decaf_448_point_t a,
// const decaf_448_precomputed_s *b,
// const decaf_448_scalar_t scalar
// ) {
// decaf_448_point_scalarmul(a,b->p[0],scalar);
// }

void gf_batch_invert (
gf *__restrict__ out,
const gf *in,
unsigned int n
) {
decaf_448_point_copy(a->p[0],b);
// if (n==0) {
// return;
// } else if (n==1) {
// field_inverse(out[0],in[0]);
// return;
// }
assert(n>1);
gf_cpy(out[1], in[0]);
int i;
for (i=1; i<(int) (n-1); i++) {
gf_mul(out[i+1], out[i], in[i]);
}
gf_mul(out[0], out[n-1], in[n-1]);

gf t1, t2;
gf_isqrt(t1, out[0]);
gf_sqr(t2, t1);
gf_sqr(t1, t2);
gf_mul(t2, t1, out[0]);
gf_cpy(out[0], t2);

for (i=n-1; i>0; i--) {
gf_mul(t1, out[i], out[0]);
gf_cpy(out[i], t1);
gf_mul(t1, out[0], in[i]);
gf_cpy(out[0], t1);
}
}

void
decaf_448_precompute (
struct decaf_448_precomputed_s *table,
const decaf_448_point_t base
) {
const int n = 5, t = 5, s = 18; // TODO MAGIC
assert(n*t*s >= DECAF_448_SCALAR_BITS);
decaf_448_point_t working, start, doubles[t-1];
decaf_448_point_copy(working, base);
pniels_t pn_tmp;
gf zs[n<<(t-1)], zis[n<<(t-1)];
unsigned int i,j,k;
/* Compute n tables */
for (i=0; i<n; i++) {

/* Doubling phase */
for (j=0; j<t; j++) {
if (j) decaf_448_point_add(start, start, working);
else decaf_448_point_copy(start, working);

if (j==t-1 && i==n-1) break;

decaf_448_point_double(working, working);
if (j<t-1) decaf_448_point_copy(doubles[j], working);

for (k=0; k<s-1; k++)
decaf_448_point_double_internal(working, working, k<s-2);
}

/* Gray-code phase */
for (j=0;; j++) {
int gray = j ^ (j>>1);
int idx = (((i+1)<<(t-1))-1) ^ gray;

pt_to_pniels(pn_tmp, start);
memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
gf_cpy(zs[idx], pn_tmp->z);
if (j >= (1u<<(t-1)) - 1) break;
int delta = (j+1) ^ ((j+1)>>1) ^ gray;

for (k=0; delta>1; k++)
delta >>=1;
if (gray & (1<<k)) {
decaf_448_point_add(start, start, doubles[k]);
} else {
decaf_448_point_sub(start, start, doubles[k]);
}
}
}
gf_batch_invert(zis, zs, n<<(t-1));

gf product;
for (i=0; i<n<<(t-1); i++) {
gf_mul(product, table->table[i]->a, zis[i]);
gf_canon(product);
gf_cpy(table->table[i]->a, product);
gf_mul(product, table->table[i]->b, zis[i]);
gf_canon(product);
gf_cpy(table->table[i]->b, product);
gf_mul(product, table->table[i]->c, zis[i]);
gf_canon(product);
gf_cpy(table->table[i]->c, product);
}
}

void decaf_448_precomputed_scalarmul (
decaf_448_point_t a,
const decaf_448_precomputed_s *b,
decaf_448_point_t out,
const struct decaf_448_precomputed_s *table,
const decaf_448_scalar_t scalar
) {
decaf_448_point_scalarmul(a,b->p[0],scalar);
unsigned int i,j,k;
const int n = 5, t = 5, s = 18, nbits = 450; // TODO MAGIC
unsigned int scalar2_words = (nbits + WBITS - 1)/WBITS;
if (scalar2_words < SCALAR_WORDS) scalar2_words = SCALAR_WORDS;
decaf_448_scalar_t scalar2, onehalf = {{{0}}}, two = {{{2}}}, arrr;
onehalf->limb[SCALAR_WORDS-1] = 1ull<<(WBITS-1);

/* FIXME PERF MAGIC precompute 2^449-1/2 mod q. Could instead use 2^446-1/2 mod q though. */
decaf_448_montmul(arrr,two,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);

/* PERF dedicated halve */
decaf_448_scalar_sub(scalar2, scalar, decaf_448_scalar_one);
decaf_448_montmul(scalar2,scalar2,onehalf,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
decaf_448_scalar_add(scalar2, scalar2, arrr);
niels_t ni;
for (i=0; i<s; i++) {
if (i) decaf_448_point_double(out,out);
for (j=0; j<n; j++) {
int tab = 0;
for (k=0; k<t; k++) {
unsigned int bit = (s-1-i) + k*s + j*(s*t);
if (bit < scalar2_words * WBITS) {
tab |= (scalar2->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
}
}
mask_t invert = (tab>>(t-1))-1;
tab ^= invert;
tab &= (1<<(t-1)) - 1;
constant_time_lookup(ni, &table->table[j<<(t-1)], sizeof(ni), 1<<(t-1), tab);
cond_neg_niels(ni, invert);
if (i||j) {
add_niels_to_pt(out, ni, (j==n-1 && i<s-1));
} else {
niels_to_pt(out, ni);
}
}
}
}

decaf_bool_t decaf_448_direct_scalarmul (


Loading…
Cancel
Save