/* Copyright (c) 2014 Cryptography Research, Inc. * Released under the MIT License. See LICENSE.txt for license information. */ #include "word.h" #include #include #include #include "intrinsics.h" #include "scalarmul.h" #include "barrett_field.h" #include "constant_time.h" mask_t montgomery_ladder ( field_a_t out, const field_a_t in, const word_t *scalar, unsigned int nbits, unsigned int n_extra_doubles ) { montgomery_a_t mont; deserialize_montgomery(mont, in); int i,j,n=(nbits-1)%WORD_BITS; mask_t pflip = 0; for (j=(nbits+WORD_BITS-1)/WORD_BITS-1; j>=0; j--) { word_t w = scalar[j]; for (i=n; i>=0; i--) { mask_t flip = -((w>>i)&1); constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),flip^pflip); constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),flip^pflip); montgomery_step(mont); pflip = flip; } n = WORD_BITS-1; } constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),pflip); constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),pflip); assert(n_extra_doubles < INT_MAX); for (j=0; j<(int)n_extra_doubles; j++) { montgomery_step(mont); } return serialize_montgomery(out, mont, in); } static __inline__ void __attribute__((unused,always_inline)) constant_time_lookup_tw_pniels ( tw_pniels_a_t out, const tw_pniels_a_t *in, int nin, int idx ) { constant_time_lookup(out,in,sizeof(*out),nin,idx); } static __inline__ void __attribute__((unused,always_inline)) constant_time_lookup_tw_niels ( tw_niels_a_t out, const tw_niels_a_t *in, int nin, int idx ) { constant_time_lookup(out,in,sizeof(*out),nin,idx); } /* static __inline__ void constant_time_lookup_tw_pniels ( tw_pniels_a_t out, const tw_pniels_a_t in, int nin, int idx ) { big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); big_register_t *o = (big_register_t *)out; const big_register_t *i = (const big_register_t *)in; int j; unsigned int k; really_memset(out, 0, sizeof(*out)); for (j=0; j>= 1; if (i> 1, NTABLE = 1<<(WINDOW-1), nbits = ROUND_UP(SCALAR_BITS,WINDOW); word_t scalar2[SCALAR_WORDS]; convert_to_signed_window_form ( scalar2, scalar, SCALAR_WORDS, SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS ); tw_extensible_a_t tabulator; copy_tw_extensible(tabulator, working); double_tw_extensible(tabulator); tw_pniels_a_t pn VECTOR_ALIGNED, multiples[NTABLE] VECTOR_ALIGNED; convert_tw_extensible_to_tw_pniels(pn, tabulator); convert_tw_extensible_to_tw_pniels(multiples[0], working); int i,j; for (i=1; i> (i%WORD_BITS) & WINDOW_MASK, inv = (bits>>(WINDOW-1))-1; bits ^= inv; constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK); cond_negate_tw_pniels(pn, inv); convert_tw_pniels_to_tw_extensible(working, pn); for (i-=WINDOW; i>=0; i-=WINDOW) { for (j=0; j> (i%WORD_BITS); if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) { bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS)); } bits &= WINDOW_MASK; inv = (bits>>(WINDOW-1))-1; bits ^= inv; constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK); cond_negate_tw_pniels(pn, inv); add_tw_pniels_to_tw_extensible(working, pn); } } void scalarmul_vlook ( tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS] ) { const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE, WINDOW_MASK = (1<> 1, NTABLE = 1<<(WINDOW-1), nbits = ROUND_UP(SCALAR_BITS,WINDOW); word_t scalar2[SCALAR_WORDS]; convert_to_signed_window_form( scalar2, scalar, SCALAR_WORDS, SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS ); tw_extensible_a_t tabulator; copy_tw_extensible(tabulator, working); double_tw_extensible(tabulator); tw_pniels_a_t pn VECTOR_ALIGNED, multiples[NTABLE] VECTOR_ALIGNED; convert_tw_extensible_to_tw_pniels(pn, tabulator); convert_tw_extensible_to_tw_pniels(multiples[0], working); int i,j; for (i=1; i> (i%WORD_BITS) & WINDOW_MASK, inv = (bits>>(WINDOW-1))-1; bits ^= inv; copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]); cond_negate_tw_pniels(pn, inv); convert_tw_pniels_to_tw_extensible(working, pn); for (i-=WINDOW; i>=0; i-=WINDOW) { for (j=0; j> (i%WORD_BITS); if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) { bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS)); } bits &= WINDOW_MASK; inv = (bits>>(WINDOW-1))-1; bits ^= inv; copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]); cond_negate_tw_pniels(pn, inv); add_tw_pniels_to_tw_extensible(working, pn); } } static mask_t schedule_scalar_for_combs ( word_t *scalar2, const word_t *scalar, unsigned int nbits, const struct fixed_base_table_t* table ) { unsigned int i; unsigned int n = table->n, t = table->t, s = table->s; if (n*t*s < nbits || n < 1 || t < 1 || s < 1) { return MASK_FAILURE; } unsigned int scalar_words = (nbits + WORD_BITS - 1)/WORD_BITS, scalar2_words = scalar_words; if (scalar2_words < SCALAR_WORDS) scalar2_words = SCALAR_WORDS; word_t scalar3[scalar2_words]; /* Copy scalar to scalar3, but clear its high bits (if there are any) */ for (i=0; iscalar_adjustments , SCALAR_WORDS ); return MASK_SUCCESS; } mask_t scalarmul_fixed_base ( tw_extensible_a_t out, const word_t scalar[SCALAR_WORDS], unsigned int nbits, const struct fixed_base_table_t* table ) { unsigned int i,j,k; unsigned int n = table->n, t = table->t, s = table->s; unsigned int scalar2_words = (nbits + WORD_BITS - 1)/WORD_BITS; if (scalar2_words < SCALAR_WORDS) scalar2_words = SCALAR_WORDS; word_t scalar2[scalar2_words]; mask_t succ = schedule_scalar_for_combs(scalar2, scalar, nbits, table); if (!succ) return MASK_FAILURE; #ifdef __clang_analyzer__ assert(t >= 1); #endif tw_niels_a_t ni; for (i=0; i> (bit%WORD_BITS) & 1) << k; } } mask_t invert = (tab>>(t-1))-1; tab ^= invert; tab &= (1<<(t-1)) - 1; constant_time_lookup_tw_niels(ni, table->table + (j<<(t-1)), 1<<(t-1), tab); cond_negate_tw_niels(ni, invert); if (i||j) { add_tw_niels_to_tw_extensible(out, ni); } else { convert_tw_niels_to_tw_extensible(out, ni); } } } return MASK_SUCCESS; } mask_t linear_combo_combs_vt ( tw_extensible_a_t out, const word_t scalar1[SCALAR_WORDS], unsigned int nbits1, const struct fixed_base_table_t* table1, const word_t scalar2[SCALAR_WORDS], unsigned int nbits2, const struct fixed_base_table_t* table2 ) { unsigned int i,j,k,sc; unsigned int s1 = table1->s, s2 = table2->s, smax = (s1 > s2) ? s1 : s2; unsigned int scalar1b_words = (nbits1 + WORD_BITS - 1)/WORD_BITS; if (scalar1b_words < SCALAR_WORDS) scalar1b_words = SCALAR_WORDS; unsigned int scalar2b_words = (nbits2 + WORD_BITS - 1)/WORD_BITS; if (scalar2b_words < SCALAR_WORDS) scalar2b_words = SCALAR_WORDS; word_t scalar1b[scalar1b_words], scalar2b[scalar2b_words]; /* Schedule the scalars */ mask_t succ; succ = schedule_scalar_for_combs(scalar1b, scalar1, nbits1, table1); if (!succ) return MASK_FAILURE; succ = schedule_scalar_for_combs(scalar2b, scalar2, nbits2, table2); if (!succ) return MASK_FAILURE; #ifdef __clang_analyzer__ assert(table1->t >= 1); assert(table2->t >= 1); #endif tw_niels_a_t ni; unsigned int swords[2] = {scalar1b_words, scalar2b_words}, started = 0; word_t *scalars[2] = {scalar1b,scalar2b}; for (i=0; is; if (ii < 0) continue; assert(ii < (int)table->s); for (j=0; jn; j++) { int tab = 0; for (k=0; kt; k++) { unsigned int bit = (table->s-1-ii) + k*table->s + j*(table->s*table->t); if (bit < swords[sc] * WORD_BITS) { tab |= (scalars[sc][bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k; } } mask_t invert = (tab>>(table->t-1))-1; tab ^= invert; tab &= (1<<(table->t-1)) - 1; copy_tw_niels(ni, table->table[tab + (j<<(table->t-1))]); cond_negate_tw_niels(ni,invert); if (started) { add_tw_niels_to_tw_extensible(out, ni); } else { convert_tw_niels_to_tw_extensible(out, ni); started = 1; } } } assert(started); } return MASK_SUCCESS; } mask_t precompute_fixed_base ( struct fixed_base_table_t* out, const tw_extensible_a_t base, unsigned int n, unsigned int t, unsigned int s, tw_niels_a_t *prealloc ) { if (s < 1 || t < 1 || n < 1 || n*t*s < SCALAR_BITS) { really_memset(out, 0, sizeof(*out)); return 0; } out->n = n; out->t = t; out->s = s; tw_extensible_a_t working, start; copy_tw_extensible(working, base); tw_pniels_a_t pn_tmp; tw_pniels_a_t *doubles = (tw_pniels_a_t *) malloc_vector(sizeof(*doubles) * (t-1)); field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs) * (n<<(t-1))); field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis) * (n<<(t-1))); tw_niels_a_t *table = prealloc; if (prealloc) { out->own_table = 0; } else { table = (tw_niels_a_t *) malloc_vector(sizeof(*table) * (n<<(t-1))); out->own_table = 1; } out->table = table; if (!doubles || !zs || !zis || !table) { free(doubles); free(zs); free(zis); really_memset(out, 0, sizeof(*out)); really_memset(table, 0, sizeof(*table) * (n<<(t-1))); if (!prealloc) free(table); return 0; } unsigned int i,j,k; /* Compute the scalar adjustments, equal to 2^nbits-1 mod q */ unsigned int adjustment_size = (n*t*s)/WORD_BITS + 1; assert(adjustment_size >= SCALAR_WORDS); word_t adjustment[adjustment_size]; for (i=0; iscalar_adjustments[(SCALAR_WORDS)*(adjustment[0] & 1)], *high_adjustment = &out->scalar_adjustments[(SCALAR_WORDS)*((~adjustment[0]) & 1)]; for (i=0; i>1); int idx = (((i+1)<<(t-1))-1) ^ gray; convert_tw_extensible_to_tw_pniels(pn_tmp, start); copy_tw_niels(table[idx], pn_tmp->n); field_copy(zs[idx], pn_tmp->z); if (j >= (1u<<(t-1)) - 1) break; int delta = (j+1) ^ ((j+1)>>1) ^ gray; for (k=0; delta>1; k++) delta >>=1; if (gray & (1<a, zis[i]); field_strong_reduce(product); field_copy(table[i]->a, product); field_mul(product, table[i]->b, zis[i]); field_strong_reduce(product); field_copy(table[i]->b, product); field_mul(product, table[i]->c, zis[i]); field_strong_reduce(product); field_copy(table[i]->c, product); } mask_t ret = ~field_is_zero(zis[0]); free(doubles); free(zs); free(zis); if (unlikely(!ret)) { really_memset(table, 0, sizeof(*table) * (n<<(t-1))); if (!prealloc) free(table); really_memset(out, 0, sizeof(*out)); return 0; } return ret; } void destroy_fixed_base ( struct fixed_base_table_t* table ) { if (table->table) { really_memset(table->table,0,sizeof(*table->table)*(table->n<<(table->t-1))); } if (table->own_table) { free(table->table); } really_memset(table,0,sizeof(*table)); } mask_t precompute_fixed_base_wnaf ( tw_niels_a_t *out, const tw_extensible_a_t const_base, unsigned int tbits ) { int i; field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs)<z); copy_tw_niels(out[0], tmp->n); if (tbits > 0) { double_tw_extensible(base); convert_tw_extensible_to_tw_pniels(twop, base); add_tw_pniels_to_tw_extensible(base, tmp); convert_tw_extensible_to_tw_pniels(tmp, base); field_copy(zs[1], tmp->z); copy_tw_niels(out[1], tmp->n); for (i=2; i < 1<z); copy_tw_niels(out[i], tmp->n); } } field_simultaneous_invert(zis, zs, 1<a, zis[i]); field_strong_reduce(product); field_copy(out[i]->a, product); field_mul(product, out[i]->b, zis[i]); field_strong_reduce(product); field_copy(out[i]->b, product); field_mul(product, out[i]->c, zis[i]); field_strong_reduce(product); field_copy(out[i]->c, product); } free(zs); free(zis); return -1; } /** * @cond internal * Control for variable-time scalar multiply algorithms. */ struct smvt_control { int power, addend; }; static int recode_wnaf( struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */ const word_t *scalar, unsigned int nbits, unsigned int tableBits) { int current = 0, i, j; unsigned int position = 0; /* PERF: negate scalar if it's large * PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one... */ for (i=nbits-1; i >= 0; i--) { int bit = (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1; current = 2*current + bit; /* * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0 * So current loses (tableBits+1) bits every time. It otherwise gains * 1 bit per iteration. The number of iterations is * (nbits + 2 + tableBits), and an additional control word is added at * the end. So the total number of control words is at most * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2. * There's also the stopper with power -1, for a total of +3. */ if (current >= (2<> 1; /* |delta| < 2^tablebits */ current = -(current & 1); for (j=i; (delta & 1) == 0; j++) { delta >>= 1; } control[position].power = j+1; control[position].addend = delta; position++; assert(position <= nbits/(tableBits+1) + 2); } } if (current) { for (j=0; (current & 1) == 0; j++) { current >>= 1; } control[position].power = j; control[position].addend = current; position++; assert(position <= nbits/(tableBits+1) + 2); } control[position].power = -1; control[position].addend = 0; return position; } static void prepare_wnaf_table( tw_pniels_a_t *output, tw_extensible_a_t working, unsigned int tbits ) { int i; convert_tw_extensible_to_tw_pniels(output[0], working); if (tbits == 0) return; double_tw_extensible(working); tw_pniels_a_t twop; convert_tw_extensible_to_tw_pniels(twop, working); add_tw_pniels_to_tw_extensible(working, output[0]); convert_tw_extensible_to_tw_pniels(output[1], working); for (i=2; i < 1< 0) { assert(control[0].addend > 0); assert(control[0].power >= 0); convert_tw_pniels_to_tw_extensible(working, precmp[control[0].addend >> 1]); } else { set_identity_tw_extensible(working); return; } int conti = 1, i; for (i = control[0].power - 1; i >= 0; i--) { double_tw_extensible(working); if (i == control[conti].power) { assert(control[conti].addend); if (control[conti].addend > 0) { add_tw_pniels_to_tw_extensible(working, precmp[control[conti].addend >> 1]); } else { sub_tw_pniels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]); } conti++; assert(conti <= control_bits); } } } void scalarmul_fixed_base_wnaf_vt ( tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS], unsigned int nbits, const tw_niels_a_t *precmp, unsigned int table_bits ) { struct smvt_control control[nbits/(table_bits+1)+3]; int control_bits = recode_wnaf(control, scalar, nbits, table_bits); if (control_bits > 0) { assert(control[0].addend > 0); assert(control[0].power >= 0); convert_tw_niels_to_tw_extensible(working, precmp[control[0].addend >> 1]); } else { set_identity_tw_extensible(working); return; } int conti = 1, i; for (; control[conti].power >= 0; conti++) { assert(conti <= control_bits); for (i = control[conti-1].power - control[conti].power; i; i--) { double_tw_extensible(working); } assert(control[conti].addend); if (control[conti].addend > 0) { add_tw_niels_to_tw_extensible(working, precmp[control[conti].addend >> 1]); } else { sub_tw_niels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]); } } for (i = control[conti-1].power; i; i--) { double_tw_extensible(working); } } void linear_combo_var_fixed_vt( tw_extensible_a_t working, const word_t scalar_var[SCALAR_WORDS], unsigned int nbits_var, const word_t scalar_pre[SCALAR_WORDS], unsigned int nbits_pre, const tw_niels_a_t *precmp, unsigned int table_bits_pre ) { const int table_bits_var = SCALARMUL_WNAF_COMBO_TABLE_BITS; struct smvt_control control_var[nbits_var/(table_bits_var+1)+3]; struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3]; int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var); int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre); (void)ncb_var; (void)ncb_pre; tw_pniels_a_t precmp_var[1< control_pre[0].power) { convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]); contv++; } else if (i == control_pre[0].power && i >=0 ) { convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]); add_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]); contv++; contp++; } else { i = control_pre[0].power; convert_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]); contp++; } if (i < 0) { set_identity_tw_extensible(working); return; } for (i--; i >= 0; i--) { double_tw_extensible(working); if (i == control_var[contv].power) { assert(control_var[contv].addend); if (control_var[contv].addend > 0) { add_tw_pniels_to_tw_extensible(working, precmp_var[control_var[contv].addend >> 1]); } else { sub_tw_pniels_from_tw_extensible(working, precmp_var[(-control_var[contv].addend) >> 1]); } contv++; } if (i == control_pre[contp].power) { assert(control_pre[contp].addend); if (control_pre[contp].addend > 0) { add_tw_niels_to_tw_extensible(working, precmp[control_pre[contp].addend >> 1]); } else { sub_tw_niels_from_tw_extensible(working, precmp[(-control_pre[contp].addend) >> 1]); } contp++; } } assert(contv == ncb_var); assert(contp == ncb_pre); }