| @@ -257,11 +257,7 @@ void API_NS(scalar_mul) ( | |||
| } | |||
| /* PERF: could implement this */ | |||
| static INLINE void | |||
| sc_montsqr ( | |||
| scalar_t out, | |||
| const scalar_t a | |||
| ) { | |||
| static INLINE void sc_montsqr (scalar_t out, const scalar_t a) { | |||
| sc_montmul(out,a,a); | |||
| } | |||
| @@ -269,77 +265,60 @@ decaf_bool_t API_NS(scalar_invert) ( | |||
| scalar_t out, | |||
| const scalar_t a | |||
| ) { | |||
| #if 0 | |||
| /* FIELD MAGIC. TODO PERF: not updated for 25519 */ | |||
| scalar_t chain[7], tmp; | |||
| sc_montmul(chain[0],a,API_NS(sc_r2)); | |||
| unsigned int i,j; | |||
| /* Addition chain generated by a not-too-clever SAGE script. First part: compute a^(2^222-1) */ | |||
| const struct { uint8_t widx, sidx, sct, midx; } muls [] = { | |||
| {2,0,1,0}, {3,2,1,0}, {4,3,1,0}, {5,4,1,0}, /* 0x3,7,f,1f */ | |||
| {1,5,1,0}, {1,1,3,3}, {6,1,9,1}, {1,6,1,0}, {6,1,18,6}, /* a^(2^37-1) */ | |||
| {1,6,37,6}, {1,1,37,6}, {1,1,111,1} /* a^(2^222-1) */ | |||
| }; | |||
| /* Second part: sliding window */ | |||
| const struct { uint8_t sct, midx; } muls1 [] = { | |||
| {6, 5}, {4, 2}, {3, 0}, {2, 0}, {4, 0}, {8, 5}, | |||
| {2, 0}, {5, 3}, {4, 0}, {4, 0}, {5, 3}, {3, 2}, | |||
| {3, 2}, {3, 2}, {2, 0}, {3, 0}, {4, 2}, {2, 0}, | |||
| {4, 3}, {3, 2}, {2, 0}, {3, 2}, {5, 2}, {3, 2}, | |||
| {2, 0}, {3, 0}, {7, 0}, {5, 0}, {3, 2}, {3, 2}, | |||
| {4, 2}, {5, 0}, {5, 3}, {3, 0}, {2, 0}, {5, 2}, | |||
| {4, 3}, {4, 0}, {3, 2}, {7, 4}, {2, 0}, {2, 0}, | |||
| {2, 0}, {2, 0}, {3, 0}, {5, 2}, {5, 4}, {5, 2}, | |||
| {5, 0}, {2, 0}, {3, 0}, {3, 0}, {2, 0}, {2, 0}, | |||
| {2, 0}, {3, 2}, {2, 0}, {3, 2}, {5, 0}, {4, 0}, | |||
| {6, 4}, {4, 0} | |||
| }; | |||
| for (i=0; i<sizeof(muls)/sizeof(muls[0]); i++) { | |||
| sc_montsqr(tmp, chain[muls[i].sidx]); | |||
| for (j=1; j<muls[i].sct; j++) { | |||
| sc_montsqr(tmp, tmp); | |||
| } | |||
| sc_montmul(chain[muls[i].widx], tmp, chain[muls[i].midx]); | |||
| } | |||
| for (i=0; i<sizeof(muls1)/sizeof(muls1[0]); i++) { | |||
| sc_montsqr(tmp, chain[1]); | |||
| for (j=1; j<muls1[i].sct; j++) { | |||
| sc_montsqr(tmp, tmp); | |||
| } | |||
| sc_montmul(chain[1], tmp, chain[muls1[i].midx]); | |||
| /* Fermat's little theorem, sliding window. | |||
| * Sliding window is fine here because the modulus isn't secret. | |||
| */ | |||
| const int SCALAR_WINDOW_BITS = 3; | |||
| scalar_t precmp[1<<SCALAR_WINDOW_BITS]; | |||
| const int LAST = (1<<SCALAR_WINDOW_BITS)-1; | |||
| /* Precompute precmp = [a^1,a^3,...] */ | |||
| sc_montmul(precmp[0],a,API_NS(sc_r2)); | |||
| if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]); | |||
| int i; | |||
| for (i=1; i<=LAST; i++) { | |||
| sc_montmul(precmp[i],precmp[i-1],precmp[LAST]); | |||
| } | |||
| sc_montmul(out,chain[1],API_NS(scalar_one)); | |||
| for (i=0; i<sizeof(chain)/sizeof(chain[0]); i++) { | |||
| API_NS(scalar_destroy)(chain[i]); | |||
| } | |||
| return ~API_NS(scalar_eq)(out,API_NS(scalar_zero)); | |||
| #else | |||
| scalar_t b, ma; | |||
| int i; | |||
| sc_montmul(b,API_NS(scalar_one),API_NS(sc_r2)); | |||
| sc_montmul(ma,a,API_NS(sc_r2)); | |||
| for (i=SCALAR_BITS-1; i>=0; i--) { | |||
| sc_montsqr(b,b); | |||
| decaf_word_t w = sc_p->limb[i/WBITS]; | |||
| if (i<WBITS) { | |||
| /* Sliding window */ | |||
| unsigned residue = 0, trailing = 0, started = 0; | |||
| for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) { | |||
| if (started) sc_montsqr(out,out); | |||
| decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0; | |||
| if (i >= 0 && i<WBITS) { | |||
| assert(w >= 2); | |||
| w-=2; | |||
| } | |||
| if (1 & w>>(i%WBITS)) { | |||
| sc_montmul(b,b,ma); | |||
| residue = (residue<<1) | ((w>>(i%WBITS))&1); | |||
| if (residue>>SCALAR_WINDOW_BITS != 0) { | |||
| assert(trailing == 0); | |||
| trailing = residue; | |||
| residue = 0; | |||
| } | |||
| if (trailing > 0 && (trailing & (1<<SCALAR_WINDOW_BITS)-1) == 0) { | |||
| if (started) { | |||
| sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]); | |||
| } else { | |||
| API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]); | |||
| started = 1; | |||
| } | |||
| trailing = 0; | |||
| } | |||
| trailing <<= 1; | |||
| } | |||
| sc_montmul(out,b,API_NS(scalar_one)); | |||
| API_NS(scalar_destroy)(b); | |||
| API_NS(scalar_destroy)(ma); | |||
| assert(residue==0); | |||
| assert(trailing==0); | |||
| /* Demontgomerize */ | |||
| sc_montmul(out,out,API_NS(scalar_one)); | |||
| decaf_bzero(precmp, sizeof(precmp)); | |||
| return ~API_NS(scalar_eq)(out,API_NS(scalar_zero)); | |||
| #endif | |||
| } | |||
| void API_NS(scalar_sub) ( | |||