| @@ -257,11 +257,7 @@ void API_NS(scalar_mul) ( | |||||
| } | } | ||||
| /* PERF: could implement this */ | /* PERF: could implement this */ | ||||
| static INLINE void | |||||
| sc_montsqr ( | |||||
| scalar_t out, | |||||
| const scalar_t a | |||||
| ) { | |||||
| static INLINE void sc_montsqr (scalar_t out, const scalar_t a) { | |||||
| sc_montmul(out,a,a); | sc_montmul(out,a,a); | ||||
| } | } | ||||
| @@ -269,77 +265,60 @@ decaf_bool_t API_NS(scalar_invert) ( | |||||
| scalar_t out, | scalar_t out, | ||||
| const scalar_t a | const scalar_t a | ||||
| ) { | ) { | ||||
| #if 0 | |||||
| /* FIELD MAGIC. TODO PERF: not updated for 25519 */ | |||||
| scalar_t chain[7], tmp; | |||||
| sc_montmul(chain[0],a,API_NS(sc_r2)); | |||||
| unsigned int i,j; | |||||
| /* Addition chain generated by a not-too-clever SAGE script. First part: compute a^(2^222-1) */ | |||||
| const struct { uint8_t widx, sidx, sct, midx; } muls [] = { | |||||
| {2,0,1,0}, {3,2,1,0}, {4,3,1,0}, {5,4,1,0}, /* 0x3,7,f,1f */ | |||||
| {1,5,1,0}, {1,1,3,3}, {6,1,9,1}, {1,6,1,0}, {6,1,18,6}, /* a^(2^37-1) */ | |||||
| {1,6,37,6}, {1,1,37,6}, {1,1,111,1} /* a^(2^222-1) */ | |||||
| }; | |||||
| /* Second part: sliding window */ | |||||
| const struct { uint8_t sct, midx; } muls1 [] = { | |||||
| {6, 5}, {4, 2}, {3, 0}, {2, 0}, {4, 0}, {8, 5}, | |||||
| {2, 0}, {5, 3}, {4, 0}, {4, 0}, {5, 3}, {3, 2}, | |||||
| {3, 2}, {3, 2}, {2, 0}, {3, 0}, {4, 2}, {2, 0}, | |||||
| {4, 3}, {3, 2}, {2, 0}, {3, 2}, {5, 2}, {3, 2}, | |||||
| {2, 0}, {3, 0}, {7, 0}, {5, 0}, {3, 2}, {3, 2}, | |||||
| {4, 2}, {5, 0}, {5, 3}, {3, 0}, {2, 0}, {5, 2}, | |||||
| {4, 3}, {4, 0}, {3, 2}, {7, 4}, {2, 0}, {2, 0}, | |||||
| {2, 0}, {2, 0}, {3, 0}, {5, 2}, {5, 4}, {5, 2}, | |||||
| {5, 0}, {2, 0}, {3, 0}, {3, 0}, {2, 0}, {2, 0}, | |||||
| {2, 0}, {3, 2}, {2, 0}, {3, 2}, {5, 0}, {4, 0}, | |||||
| {6, 4}, {4, 0} | |||||
| }; | |||||
| for (i=0; i<sizeof(muls)/sizeof(muls[0]); i++) { | |||||
| sc_montsqr(tmp, chain[muls[i].sidx]); | |||||
| for (j=1; j<muls[i].sct; j++) { | |||||
| sc_montsqr(tmp, tmp); | |||||
| } | |||||
| sc_montmul(chain[muls[i].widx], tmp, chain[muls[i].midx]); | |||||
| } | |||||
| for (i=0; i<sizeof(muls1)/sizeof(muls1[0]); i++) { | |||||
| sc_montsqr(tmp, chain[1]); | |||||
| for (j=1; j<muls1[i].sct; j++) { | |||||
| sc_montsqr(tmp, tmp); | |||||
| } | |||||
| sc_montmul(chain[1], tmp, chain[muls1[i].midx]); | |||||
| /* Fermat's little theorem, sliding window. | |||||
| * Sliding window is fine here because the modulus isn't secret. | |||||
| */ | |||||
| const int SCALAR_WINDOW_BITS = 3; | |||||
| scalar_t precmp[1<<SCALAR_WINDOW_BITS]; | |||||
| const int LAST = (1<<SCALAR_WINDOW_BITS)-1; | |||||
| /* Precompute precmp = [a^1,a^3,...] */ | |||||
| sc_montmul(precmp[0],a,API_NS(sc_r2)); | |||||
| if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]); | |||||
| int i; | |||||
| for (i=1; i<=LAST; i++) { | |||||
| sc_montmul(precmp[i],precmp[i-1],precmp[LAST]); | |||||
| } | } | ||||
| sc_montmul(out,chain[1],API_NS(scalar_one)); | |||||
| for (i=0; i<sizeof(chain)/sizeof(chain[0]); i++) { | |||||
| API_NS(scalar_destroy)(chain[i]); | |||||
| } | |||||
| return ~API_NS(scalar_eq)(out,API_NS(scalar_zero)); | |||||
| #else | |||||
| scalar_t b, ma; | |||||
| int i; | |||||
| sc_montmul(b,API_NS(scalar_one),API_NS(sc_r2)); | |||||
| sc_montmul(ma,a,API_NS(sc_r2)); | |||||
| for (i=SCALAR_BITS-1; i>=0; i--) { | |||||
| sc_montsqr(b,b); | |||||
| decaf_word_t w = sc_p->limb[i/WBITS]; | |||||
| if (i<WBITS) { | |||||
| /* Sliding window */ | |||||
| unsigned residue = 0, trailing = 0, started = 0; | |||||
| for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) { | |||||
| if (started) sc_montsqr(out,out); | |||||
| decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0; | |||||
| if (i >= 0 && i<WBITS) { | |||||
| assert(w >= 2); | assert(w >= 2); | ||||
| w-=2; | w-=2; | ||||
| } | } | ||||
| if (1 & w>>(i%WBITS)) { | |||||
| sc_montmul(b,b,ma); | |||||
| residue = (residue<<1) | ((w>>(i%WBITS))&1); | |||||
| if (residue>>SCALAR_WINDOW_BITS != 0) { | |||||
| assert(trailing == 0); | |||||
| trailing = residue; | |||||
| residue = 0; | |||||
| } | } | ||||
| if (trailing > 0 && (trailing & (1<<SCALAR_WINDOW_BITS)-1) == 0) { | |||||
| if (started) { | |||||
| sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]); | |||||
| } else { | |||||
| API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]); | |||||
| started = 1; | |||||
| } | |||||
| trailing = 0; | |||||
| } | |||||
| trailing <<= 1; | |||||
| } | } | ||||
| sc_montmul(out,b,API_NS(scalar_one)); | |||||
| API_NS(scalar_destroy)(b); | |||||
| API_NS(scalar_destroy)(ma); | |||||
| assert(residue==0); | |||||
| assert(trailing==0); | |||||
| /* Demontgomerize */ | |||||
| sc_montmul(out,out,API_NS(scalar_one)); | |||||
| decaf_bzero(precmp, sizeof(precmp)); | |||||
| return ~API_NS(scalar_eq)(out,API_NS(scalar_zero)); | return ~API_NS(scalar_eq)(out,API_NS(scalar_zero)); | ||||
| #endif | |||||
| } | } | ||||
| void API_NS(scalar_sub) ( | void API_NS(scalar_sub) ( | ||||