add scalar invert; unopt and opt; tests. It might be faster with dedicated montsqr and/or blinded egcd, but this is good enough for now

10 years ago · 2dacf20e3e
--- a/include/decaf.h
+++ b/include/decaf.h
@@ -16,7 +16,8 @@
 * decaf_448_decode can fail because not every sequence of bytes is a valid group
 * element.
 *
 * The formulas contain no data-dependent branches, timing or memory accesses.
 * The formulas contain no data-dependent branches, timing or memory accesses,
 * except for decaf_448_base_double_scalarmul_non_secret.
 *
 * This library may support multiple curves eventually.  The Ed448-Goldilocks
 * specific identifiers are prefixed with DECAF_448 or decaf_448.
@@ -116,8 +117,6 @@ extern const struct decaf_448_precomputed_s *decaf_448_precomputed_base API_VIS;
 #ifdef __cplusplus
 extern "C" {
 #endif
    
 /* TODO: scalar invert? */

 /**
 * @brief Read a scalar from wire format or from bytes.
@@ -207,6 +206,17 @@ void decaf_448_scalar_mul (
    const decaf_448_scalar_t b
 ) API_VIS NONNULL3 NOINLINE;

 /**
 * @brief Invert a scalar.  When passed zero, return 0.  The input and output may alias.
 * @param [in] a A scalar.
 * @param [out] out 1/a.
 * @return DECAF_TRUE The input is nonzero.
 */  
 decaf_bool_t decaf_448_scalar_invert (
    decaf_448_scalar_t out,
    const decaf_448_scalar_t a
 ) API_VIS NONNULL2 NOINLINE;

 /**
 * @brief Copy a scalar.  The scalars may use the same memory, in which
 * case this function does nothing.
--- a/src/decaf.c
+++ b/src/decaf.c
@@ -11,6 +11,7 @@
 #define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
 #include "decaf.h"
 #include <string.h>
 #include <assert.h>

 #define WBITS DECAF_WORD_BITS

@@ -363,6 +364,33 @@ void decaf_448_scalar_mul (
    decaf_448_montmul(out,out,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
 }

 decaf_bool_t decaf_448_scalar_invert (
    decaf_448_scalar_t out,
    const decaf_448_scalar_t a
 ) {
    decaf_448_scalar_t b, ma;
    int i;
    decaf_448_montmul(b,decaf_448_scalar_one,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    decaf_448_montmul(ma,a,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    for (i=DECAF_448_SCALAR_BITS-1; i>=0; i--) {
        decaf_448_montmul(b,b,b,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
            
        decaf_word_t w = decaf_448_scalar_p->limb[i/WBITS];
        if (i<WBITS) {
            assert(w >= 2);
            w-=2;
        }
        if (1 & w>>(i%WBITS)) {
            decaf_448_montmul(b,b,ma,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
        }
    }

    decaf_448_montmul(out,b,decaf_448_scalar_one,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    decaf_448_scalar_destroy(b);
    decaf_448_scalar_destroy(ma);
    return ~decaf_448_scalar_eq(out,decaf_448_scalar_zero);
 }

 void decaf_448_scalar_sub (
    decaf_448_scalar_t out,
    const decaf_448_scalar_t a,
--- a/src/decaf_fast.c
+++ b/src/decaf_fast.c
@@ -340,6 +340,66 @@ void decaf_448_scalar_mul (
    decaf_448_montmul(out,out,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
 }

 /* PERF: could implement this */
 siv decaf_448_montsqr (
    decaf_448_scalar_t out,
    const decaf_448_scalar_t a
 ) {
    decaf_448_montmul(out,a,a,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
 }

 decaf_bool_t decaf_448_scalar_invert (
    decaf_448_scalar_t out,
    const decaf_448_scalar_t a
 ) {
    decaf_448_scalar_t chain[7], tmp;
    decaf_448_montmul(chain[0],a,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    
    unsigned int i,j;
    /* Addition chain generated by a not-too-clever SAGE script.  First part: compute a^(2^222-1) */
    struct { uint8_t widx, sidx, sct, midx; } muls [] = {
        {2,0,1,0}, {3,2,1,0}, {4,3,1,0}, {5,4,1,0}, /* 0x3,7,f,1f */
        {1,5,1,0}, {1,1,3,3}, {6,1,9,1}, {1,6,1,0}, {6,1,18,6}, /* a^(2^37-1) */
        {1,6,37,6}, {1,1,37,6}, {1,1,111,1} /* a^(2^222-1) */
    };
    /* Second part: sliding window */
    struct { uint8_t sct, midx; } muls1 [] = {
        {6, 5}, {4, 2}, {3, 0}, {2, 0}, {4, 0}, {8, 5},
        {2, 0}, {5, 3}, {4, 0}, {4, 0}, {5, 3}, {3, 2},
        {3, 2}, {3, 2}, {2, 0}, {3, 0}, {4, 2}, {2, 0},
        {4, 3}, {3, 2}, {2, 0}, {3, 2}, {5, 2}, {3, 2},
        {2, 0}, {3, 0}, {7, 0}, {5, 0}, {3, 2}, {3, 2},
        {4, 2}, {5, 0}, {5, 3}, {3, 0}, {2, 0}, {5, 2},
        {4, 3}, {4, 0}, {3, 2}, {7, 4}, {2, 0}, {2, 0},
        {2, 0}, {2, 0}, {3, 0}, {5, 2}, {5, 4}, {5, 2},
        {5, 0}, {2, 0}, {3, 0}, {3, 0}, {2, 0}, {2, 0},
        {2, 0}, {3, 2}, {2, 0}, {3, 2}, {5, 0}, {4, 0},
        {6, 4}, {4, 0}
    };
    
    for (i=0; i<sizeof(muls)/sizeof(muls[0]); i++) {
        decaf_448_montsqr(tmp, chain[muls[i].sidx]);
        for (j=1; j<muls[i].sct; j++) {
            decaf_448_montsqr(tmp, tmp);
        }
        decaf_448_montmul(chain[muls[i].widx], tmp, chain[muls[i].midx], decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    }
    
    for (i=0; i<sizeof(muls1)/sizeof(muls1[0]); i++) {
        decaf_448_montsqr(tmp, chain[1]);
        for (j=1; j<muls1[i].sct; j++) {
            decaf_448_montsqr(tmp, tmp);
        }
        decaf_448_montmul(chain[1], tmp, chain[muls1[i].midx], decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    }
    
    decaf_448_montmul(out,chain[1],decaf_448_scalar_one,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
    for (i=0; i<sizeof(chain)/sizeof(chain[0]); i++) {
        decaf_448_scalar_destroy(chain[i]);
    }
    return ~decaf_448_scalar_eq(out,decaf_448_scalar_zero);
 }

 void decaf_448_scalar_sub (
    decaf_448_scalar_t out,
    const decaf_448_scalar_t a,
--- a/test/bench.c
+++ b/test/bench.c
@@ -276,6 +276,13 @@ int main(int argc, char **argv) {
    when = now() - when;
    printf("decaf mulsc: %5.1fns\n", when * 1e9 / i);
    
    when = now();
    for (i=0; i<nbase/10; i++) {
        decaf_448_scalar_invert(csc,bsc);
    }
    when = now() - when;
    printf("decaf invsc: %5.1fµs\n", when * 1e6 / i);
    
    memset(&ext,0,sizeof(ext));
    memset(&niels,0,sizeof(niels)); /* avoid assertions in p521 even though this isn't a valid ext or niels */

--- a/test/test_arithmetic.c
+++ b/test/test_arithmetic.c
@@ -215,7 +215,8 @@ static mask_t test_add_sub_RAW (

 static mask_t test_scalar (
    const mpz_t x,
    const mpz_t y
    const mpz_t y,
    int inv
 ) {
    decaf_448_scalar_t xx,yy,tt;
    mpz_t t;
@@ -236,6 +237,18 @@ static mask_t test_scalar (
    mpz_mul(t,x,y);
    succ &= scalar_assert_eq_gmp("scalar mul",xx,yy,tt,x,y,t);
    
    if (inv) {
        decaf_bool_t ret = decaf_448_scalar_invert(tt,xx);
        if (!mpz_cmp_ui(x,0)) {
            mpz_set_ui(t,0);
            succ &= (ret == 0) ? MASK_SUCCESS : MASK_FAILURE;
        } else {
            mpz_invert(t,x,mp_scalar_field);
            succ &= (ret == MASK_SUCCESS) ? MASK_SUCCESS : MASK_FAILURE;
        }
        succ &= scalar_assert_eq_gmp("scalar inv",xx,yy,tt,x,y,t);
    }
    
    mpz_clear(t);
    
    return succ;
@@ -361,7 +374,7 @@ int test_arithmetic (void) {
        
        succ &= test_add_sub_RAW(x,y,word);
        succ &= test_mul_sqr(x,y,word);
        succ &= test_scalar(x,y);
        succ &= test_scalar(x,y,(j%20==0));
        
        if (j < 1000)
            succ &= test_isr(x);