sliding window

9 years ago · 820c83a80c
--- a/src/decaf_fast.c
+++ b/src/decaf_fast.c
@@ -257,11 +257,7 @@ void API_NS(scalar_mul) (
 }

 /* PERF: could implement this */
 static INLINE void
 sc_montsqr (
    scalar_t out,
    const scalar_t a
 ) {
 static INLINE void sc_montsqr (scalar_t out, const scalar_t a) {
    sc_montmul(out,a,a);
 }

@@ -269,77 +265,60 @@ decaf_bool_t API_NS(scalar_invert) (
    scalar_t out,
    const scalar_t a
 ) {
 #if 0
    /* FIELD MAGIC.  TODO PERF: not updated for 25519 */
    scalar_t chain[7], tmp;
    sc_montmul(chain[0],a,API_NS(sc_r2));
    
    unsigned int i,j;
    /* Addition chain generated by a not-too-clever SAGE script.  First part: compute a^(2^222-1) */
    const struct { uint8_t widx, sidx, sct, midx; } muls [] = {
        {2,0,1,0}, {3,2,1,0}, {4,3,1,0}, {5,4,1,0}, /* 0x3,7,f,1f */
        {1,5,1,0}, {1,1,3,3}, {6,1,9,1}, {1,6,1,0}, {6,1,18,6}, /* a^(2^37-1) */
        {1,6,37,6}, {1,1,37,6}, {1,1,111,1} /* a^(2^222-1) */
    };
    /* Second part: sliding window */
    const struct { uint8_t sct, midx; } muls1 [] = {
        {6, 5}, {4, 2}, {3, 0}, {2, 0}, {4, 0}, {8, 5},
        {2, 0}, {5, 3}, {4, 0}, {4, 0}, {5, 3}, {3, 2},
        {3, 2}, {3, 2}, {2, 0}, {3, 0}, {4, 2}, {2, 0},
        {4, 3}, {3, 2}, {2, 0}, {3, 2}, {5, 2}, {3, 2},
        {2, 0}, {3, 0}, {7, 0}, {5, 0}, {3, 2}, {3, 2},
        {4, 2}, {5, 0}, {5, 3}, {3, 0}, {2, 0}, {5, 2},
        {4, 3}, {4, 0}, {3, 2}, {7, 4}, {2, 0}, {2, 0},
        {2, 0}, {2, 0}, {3, 0}, {5, 2}, {5, 4}, {5, 2},
        {5, 0}, {2, 0}, {3, 0}, {3, 0}, {2, 0}, {2, 0},
        {2, 0}, {3, 2}, {2, 0}, {3, 2}, {5, 0}, {4, 0},
        {6, 4}, {4, 0}
    };
    
    for (i=0; i<sizeof(muls)/sizeof(muls[0]); i++) {
        sc_montsqr(tmp, chain[muls[i].sidx]);
        for (j=1; j<muls[i].sct; j++) {
            sc_montsqr(tmp, tmp);
        }
        sc_montmul(chain[muls[i].widx], tmp, chain[muls[i].midx]);
    }
    
    for (i=0; i<sizeof(muls1)/sizeof(muls1[0]); i++) {
        sc_montsqr(tmp, chain[1]);
        for (j=1; j<muls1[i].sct; j++) {
            sc_montsqr(tmp, tmp);
        }
        sc_montmul(chain[1], tmp, chain[muls1[i].midx]);
    /* Fermat's little theorem, sliding window.
     * Sliding window is fine here because the modulus isn't secret.
     */
    const int SCALAR_WINDOW_BITS = 3;
    scalar_t precmp[1<<SCALAR_WINDOW_BITS];
    const int LAST = (1<<SCALAR_WINDOW_BITS)-1;

    /* Precompute precmp = [a^1,a^3,...] */
    sc_montmul(precmp[0],a,API_NS(sc_r2));
    if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]);

    int i;
    for (i=1; i<=LAST; i++) {
        sc_montmul(precmp[i],precmp[i-1],precmp[LAST]);
    }
    
    sc_montmul(out,chain[1],API_NS(scalar_one));
    for (i=0; i<sizeof(chain)/sizeof(chain[0]); i++) {
        API_NS(scalar_destroy)(chain[i]);
    }
    return ~API_NS(scalar_eq)(out,API_NS(scalar_zero));
 #else
    scalar_t b, ma;
    int i;
    sc_montmul(b,API_NS(scalar_one),API_NS(sc_r2));
    sc_montmul(ma,a,API_NS(sc_r2));
    for (i=SCALAR_BITS-1; i>=0; i--) {
        sc_montsqr(b,b);
            
        decaf_word_t w = sc_p->limb[i/WBITS];
        if (i<WBITS) {
    /* Sliding window */
    unsigned residue = 0, trailing = 0, started = 0;
    for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) {
        
        if (started) sc_montsqr(out,out);
        
        decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0;
        if (i >= 0 && i<WBITS) {
            assert(w >= 2);
            w-=2;
        }
        if (1 & w>>(i%WBITS)) {
            sc_montmul(b,b,ma);
        
        residue = (residue<<1) | ((w>>(i%WBITS))&1);
        if (residue>>SCALAR_WINDOW_BITS != 0) {
            assert(trailing == 0);
            trailing = residue;
            residue = 0;
        }
        
        if (trailing > 0 && (trailing & (1<<SCALAR_WINDOW_BITS)-1) == 0) {
            if (started) {
                sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
            } else {
                API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
                started = 1;
            }
            trailing = 0;
        }
        trailing <<= 1;
        
    }

    sc_montmul(out,b,API_NS(scalar_one));
    API_NS(scalar_destroy)(b);
    API_NS(scalar_destroy)(ma);
    assert(residue==0);
    assert(trailing==0);
    
    /* Demontgomerize */
    sc_montmul(out,out,API_NS(scalar_one));
    decaf_bzero(precmp, sizeof(precmp));
    return ~API_NS(scalar_eq)(out,API_NS(scalar_zero));
 #endif
 }

 void API_NS(scalar_sub) (