|
- /* Copyright (c) 2014 Cryptography Research, Inc.
- * Released under the MIT License. See LICENSE.txt for license information.
- */
-
- #include "f_field.h"
-
- void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
- uint64_t *c = cs->limb;
- const uint64_t *a = as->limb, *b = bs->limb;
- __uint128_t accum0, accum1;
-
- accum0 = widemul(2*a[8], b[8]);
- accum1 = widemul(a[0], b[7]);
- accum0 += widemul(a[1], b[6]);
- accum1 += widemul(a[2], b[5]);
- accum0 += widemul(a[3], b[4]);
- accum1 += widemul(a[4], b[3]);
- accum0 += widemul(a[5], b[2]);
- accum1 += widemul(a[6], b[1]);
- accum0 += widemul(a[7], b[0]);
- accum1 += accum0;
- c[7] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum1 += widemul(a[0], b[8-0]);
- accum0 += widemul(a[1], b[8-1]);
- accum1 += widemul(a[2], b[8-2]);
- accum0 += widemul(a[3], b[8-3]);
- accum1 += widemul(a[4], b[8-4]);
- accum0 += widemul(a[5], b[8-5]);
- accum1 += widemul(a[6], b[8-6]);
- accum0 += widemul(a[7], b[8-7]);
- accum1 += widemul(a[8], b[8-8]);
- accum1 += accum0;
- c[8] = accum1 & ((1ull<<57)-1);
- accum1 >>= 57;
-
- accum0 = 0;
- accum0 += widemul(a[1], b[0+9-1]);
- accum0 += widemul(a[2], b[0+9-2]);
- accum0 += widemul(a[3], b[0+9-3]);
- accum0 += widemul(a[4], b[0+9-4]);
- accum1 += widemul(a[0], b[0-0]);
- accum0 += widemul(a[5], b[0+9-5]);
- accum0 += widemul(a[6], b[0+9-6]);
- accum0 += widemul(a[7], b[0+9-7]);
- accum0 += widemul(a[8], b[0+9-8]);
- accum1 += accum0 << 1;
- c[0] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(a[2], b[1+9-2]);
- accum0 += widemul(a[3], b[1+9-3]);
- accum1 += widemul(a[0], b[1-0]);
- accum0 += widemul(a[4], b[1+9-4]);
- accum0 += widemul(a[5], b[1+9-5]);
- accum1 += widemul(a[1], b[1-1]);
- accum0 += widemul(a[6], b[1+9-6]);
- accum0 += widemul(a[7], b[1+9-7]);
- accum0 += widemul(a[8], b[1+9-8]);
- accum1 += accum0 << 1;
- c[1] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(a[3], b[2+9-3]);
- accum1 += widemul(a[0], b[2-0]);
- accum0 += widemul(a[4], b[2+9-4]);
- accum0 += widemul(a[5], b[2+9-5]);
- accum1 += widemul(a[1], b[2-1]);
- accum0 += widemul(a[6], b[2+9-6]);
- accum0 += widemul(a[7], b[2+9-7]);
- accum1 += widemul(a[2], b[2-2]);
- accum0 += widemul(a[8], b[2+9-8]);
- accum1 += accum0 << 1;
- c[2] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(a[4], b[3+9-4]);
- accum1 += widemul(a[0], b[3-0]);
- accum0 += widemul(a[5], b[3+9-5]);
- accum1 += widemul(a[1], b[3-1]);
- accum0 += widemul(a[6], b[3+9-6]);
- accum1 += widemul(a[2], b[3-2]);
- accum0 += widemul(a[7], b[3+9-7]);
- accum1 += widemul(a[3], b[3-3]);
- accum0 += widemul(a[8], b[3+9-8]);
- accum1 += accum0 << 1;
- c[3] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum1 += widemul(a[0], b[4-0]);
- accum0 += widemul(a[5], b[4+9-5]);
- accum1 += widemul(a[1], b[4-1]);
- accum0 += widemul(a[6], b[4+9-6]);
- accum1 += widemul(a[2], b[4-2]);
- accum0 += widemul(a[7], b[4+9-7]);
- accum1 += widemul(a[3], b[4-3]);
- accum0 += widemul(a[8], b[4+9-8]);
- accum1 += widemul(a[4], b[4-4]);
- accum1 += accum0 << 1;
- c[4] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum1 += widemul(a[0], b[5-0]);
- accum0 += widemul(a[6], b[5+9-6]);
- accum1 += widemul(a[1], b[5-1]);
- accum1 += widemul(a[2], b[5-2]);
- accum0 += widemul(a[7], b[5+9-7]);
- accum1 += widemul(a[3], b[5-3]);
- accum1 += widemul(a[4], b[5-4]);
- accum0 += widemul(a[8], b[5+9-8]);
- accum1 += widemul(a[5], b[5-5]);
- accum1 += accum0 << 1;
- c[5] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum1 += widemul(a[0], b[6-0]);
- accum1 += widemul(a[1], b[6-1]);
- accum0 += widemul(a[7], b[6+9-7]);
- accum1 += widemul(a[2], b[6-2]);
- accum1 += widemul(a[3], b[6-3]);
- accum1 += widemul(a[4], b[6-4]);
- accum0 += widemul(a[8], b[6+9-8]);
- accum1 += widemul(a[5], b[6-5]);
- accum1 += widemul(a[6], b[6-6]);
- accum1 += accum0 << 1;
- c[6] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum1 += c[7];
- c[7] = accum1 & ((1ull<<58)-1);
-
- c[8] += accum1 >> 58;
- }
-
- void gf_mulw (
- gf_s *__restrict__ cs,
- const gf as,
- uint64_t b
- ) {
- const uint64_t *a = as->limb;
- uint64_t *c = cs->limb;
-
- __uint128_t accum0 = 0, accum3 = 0, accum6 = 0;
- uint64_t mask = (1ull<<58) - 1;
-
- int i;
- for (i=0; i<3; i++) {
- accum0 += widemul(b, a[i]);
- accum3 += widemul(b, a[i+3]);
- accum6 += widemul(b, a[i+6]);
- c[i] = accum0 & mask; accum0 >>= 58;
- c[i+3] = accum3 & mask; accum3 >>= 58;
- if (i==2) {
- c[i+6] = accum6 & (mask>>1); accum6 >>= 57;
- } else {
- c[i+6] = accum6 & mask; accum6 >>= 58;
- }
- }
-
- accum0 += c[3];
- c[3] = accum0 & mask;
- c[4] += accum0 >> 58;
-
- accum3 += c[6];
- c[6] = accum3 & mask;
- c[7] += accum3 >> 58;
-
- accum6 += c[0];
- c[0] = accum6 & mask;
- c[1] += accum6 >> 58;
- }
-
- void gf_sqr (gf_s *__restrict__ cs, const gf as) {
- uint64_t *c = cs->limb;
- const uint64_t *a = as->limb;
- __uint128_t accum0, accum1;
-
- accum0 = widemul(a[8], a[8]);
- accum1 = widemul(a[0], a[7]);
- accum0 += widemul(a[1], a[6]);
- accum1 += widemul(a[2], a[5]);
- accum0 += widemul(a[3], a[4]);
- accum1 += accum0;
- c[7] = 2 * (accum1 & ((1ull<<57)-1));
- accum1 >>= 57;
-
- accum0 = 0;
- accum0 = 0;
- accum1 += widemul(a[4], a[4]);
- accum0 += widemul(a[1], a[7]);
- accum1 += widemul(2*a[2], a[6]);
- accum0 += widemul(a[3], a[5]);
- accum1 += widemul(2*a[0], a[8]);
- accum1 += 2*accum0;
- c[8] = accum1 & ((1ull<<57)-1);
- accum1 >>= 57;
-
- accum0 = 0;
- accum1 += widemul(a[0], a[0]);
- accum0 += widemul(a[1], a[8]);
- accum0 += widemul(a[2], a[7]);
- accum0 += widemul(a[3], a[6]);
- accum0 += widemul(a[4], a[5]);
- accum1 += accum0 << 2;
- c[0] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(a[2], a[8]);
- accum0 += widemul(a[3], a[7]);
- accum0 += widemul(a[4], a[6]);
- accum0 <<= 1;
- accum0 += widemul(a[5], a[5]);
- accum0 += widemul(a[0], a[1]);
- accum1 += accum0 << 1;
- c[1] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum1 += widemul(a[1], a[1]);
-
- accum0 += widemul(a[3], a[8]);
- accum0 += widemul(a[4], a[7]);
- accum0 += widemul(a[5], a[6]);
- accum0 <<= 1;
- accum0 += widemul(a[0], a[2]);
- accum1 += accum0 << 1;
- c[2] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(a[6], a[6]);
- accum0 += widemul(2*a[5], a[7]);
- accum0 += widemul(2*a[4], a[8]);
- accum0 += widemul(a[0], a[3]);
- accum0 += widemul(a[1], a[2]);
- accum1 += accum0 << 1;
- c[3] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(a[6], a[7]);
- accum0 += widemul(a[5], a[8]);
- accum0 <<= 1;
- accum1 += widemul(a[2], a[2]);
- accum0 += widemul(a[0], a[4]);
- accum0 += widemul(a[1], a[3]);
- accum1 += accum0 << 1;
- c[4] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum0 += widemul(2*a[6], a[8]);
- accum0 += widemul(a[7], a[7]);
- accum0 += widemul(a[0], a[5]);
- accum0 += widemul(a[1], a[4]);
- accum0 += widemul(a[2], a[3]);
- accum1 += accum0 << 1;
- c[5] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum0 = 0;
- accum1 += widemul(a[3], a[3]);
- accum0 += widemul(a[0], a[6]);
- accum0 += widemul(a[1], a[5]);
- accum0 += widemul(2*a[7], a[8]);
- accum0 += widemul(a[2], a[4]);
- accum1 += accum0 << 1;
- c[6] = accum1 & ((1ull<<58)-1);
- accum1 >>= 58;
-
- accum1 += c[7];
- c[7] = accum1 & ((1ull<<58)-1);
-
- c[8] += accum1 >> 58;
- }
|