You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

285 lines
7.8 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #include "f_field.h"
  5. void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
  6. uint64_t *c = cs->limb;
  7. const uint64_t *a = as->limb, *b = bs->limb;
  8. __uint128_t accum0, accum1;
  9. accum0 = widemul(2*a[8], b[8]);
  10. accum1 = widemul(a[0], b[7]);
  11. accum0 += widemul(a[1], b[6]);
  12. accum1 += widemul(a[2], b[5]);
  13. accum0 += widemul(a[3], b[4]);
  14. accum1 += widemul(a[4], b[3]);
  15. accum0 += widemul(a[5], b[2]);
  16. accum1 += widemul(a[6], b[1]);
  17. accum0 += widemul(a[7], b[0]);
  18. accum1 += accum0;
  19. c[7] = accum1 & ((1ull<<58)-1);
  20. accum1 >>= 58;
  21. accum0 = 0;
  22. accum1 += widemul(a[0], b[8-0]);
  23. accum0 += widemul(a[1], b[8-1]);
  24. accum1 += widemul(a[2], b[8-2]);
  25. accum0 += widemul(a[3], b[8-3]);
  26. accum1 += widemul(a[4], b[8-4]);
  27. accum0 += widemul(a[5], b[8-5]);
  28. accum1 += widemul(a[6], b[8-6]);
  29. accum0 += widemul(a[7], b[8-7]);
  30. accum1 += widemul(a[8], b[8-8]);
  31. accum1 += accum0;
  32. c[8] = accum1 & ((1ull<<57)-1);
  33. accum1 >>= 57;
  34. accum0 = 0;
  35. accum0 += widemul(a[1], b[0+9-1]);
  36. accum0 += widemul(a[2], b[0+9-2]);
  37. accum0 += widemul(a[3], b[0+9-3]);
  38. accum0 += widemul(a[4], b[0+9-4]);
  39. accum1 += widemul(a[0], b[0-0]);
  40. accum0 += widemul(a[5], b[0+9-5]);
  41. accum0 += widemul(a[6], b[0+9-6]);
  42. accum0 += widemul(a[7], b[0+9-7]);
  43. accum0 += widemul(a[8], b[0+9-8]);
  44. accum1 += accum0 << 1;
  45. c[0] = accum1 & ((1ull<<58)-1);
  46. accum1 >>= 58;
  47. accum0 = 0;
  48. accum0 += widemul(a[2], b[1+9-2]);
  49. accum0 += widemul(a[3], b[1+9-3]);
  50. accum1 += widemul(a[0], b[1-0]);
  51. accum0 += widemul(a[4], b[1+9-4]);
  52. accum0 += widemul(a[5], b[1+9-5]);
  53. accum1 += widemul(a[1], b[1-1]);
  54. accum0 += widemul(a[6], b[1+9-6]);
  55. accum0 += widemul(a[7], b[1+9-7]);
  56. accum0 += widemul(a[8], b[1+9-8]);
  57. accum1 += accum0 << 1;
  58. c[1] = accum1 & ((1ull<<58)-1);
  59. accum1 >>= 58;
  60. accum0 = 0;
  61. accum0 += widemul(a[3], b[2+9-3]);
  62. accum1 += widemul(a[0], b[2-0]);
  63. accum0 += widemul(a[4], b[2+9-4]);
  64. accum0 += widemul(a[5], b[2+9-5]);
  65. accum1 += widemul(a[1], b[2-1]);
  66. accum0 += widemul(a[6], b[2+9-6]);
  67. accum0 += widemul(a[7], b[2+9-7]);
  68. accum1 += widemul(a[2], b[2-2]);
  69. accum0 += widemul(a[8], b[2+9-8]);
  70. accum1 += accum0 << 1;
  71. c[2] = accum1 & ((1ull<<58)-1);
  72. accum1 >>= 58;
  73. accum0 = 0;
  74. accum0 += widemul(a[4], b[3+9-4]);
  75. accum1 += widemul(a[0], b[3-0]);
  76. accum0 += widemul(a[5], b[3+9-5]);
  77. accum1 += widemul(a[1], b[3-1]);
  78. accum0 += widemul(a[6], b[3+9-6]);
  79. accum1 += widemul(a[2], b[3-2]);
  80. accum0 += widemul(a[7], b[3+9-7]);
  81. accum1 += widemul(a[3], b[3-3]);
  82. accum0 += widemul(a[8], b[3+9-8]);
  83. accum1 += accum0 << 1;
  84. c[3] = accum1 & ((1ull<<58)-1);
  85. accum1 >>= 58;
  86. accum0 = 0;
  87. accum1 += widemul(a[0], b[4-0]);
  88. accum0 += widemul(a[5], b[4+9-5]);
  89. accum1 += widemul(a[1], b[4-1]);
  90. accum0 += widemul(a[6], b[4+9-6]);
  91. accum1 += widemul(a[2], b[4-2]);
  92. accum0 += widemul(a[7], b[4+9-7]);
  93. accum1 += widemul(a[3], b[4-3]);
  94. accum0 += widemul(a[8], b[4+9-8]);
  95. accum1 += widemul(a[4], b[4-4]);
  96. accum1 += accum0 << 1;
  97. c[4] = accum1 & ((1ull<<58)-1);
  98. accum1 >>= 58;
  99. accum0 = 0;
  100. accum1 += widemul(a[0], b[5-0]);
  101. accum0 += widemul(a[6], b[5+9-6]);
  102. accum1 += widemul(a[1], b[5-1]);
  103. accum1 += widemul(a[2], b[5-2]);
  104. accum0 += widemul(a[7], b[5+9-7]);
  105. accum1 += widemul(a[3], b[5-3]);
  106. accum1 += widemul(a[4], b[5-4]);
  107. accum0 += widemul(a[8], b[5+9-8]);
  108. accum1 += widemul(a[5], b[5-5]);
  109. accum1 += accum0 << 1;
  110. c[5] = accum1 & ((1ull<<58)-1);
  111. accum1 >>= 58;
  112. accum0 = 0;
  113. accum1 += widemul(a[0], b[6-0]);
  114. accum1 += widemul(a[1], b[6-1]);
  115. accum0 += widemul(a[7], b[6+9-7]);
  116. accum1 += widemul(a[2], b[6-2]);
  117. accum1 += widemul(a[3], b[6-3]);
  118. accum1 += widemul(a[4], b[6-4]);
  119. accum0 += widemul(a[8], b[6+9-8]);
  120. accum1 += widemul(a[5], b[6-5]);
  121. accum1 += widemul(a[6], b[6-6]);
  122. accum1 += accum0 << 1;
  123. c[6] = accum1 & ((1ull<<58)-1);
  124. accum1 >>= 58;
  125. accum1 += c[7];
  126. c[7] = accum1 & ((1ull<<58)-1);
  127. c[8] += accum1 >> 58;
  128. }
  129. void gf_mulw (
  130. gf_s *__restrict__ cs,
  131. const gf as,
  132. uint64_t b
  133. ) {
  134. const uint64_t *a = as->limb;
  135. uint64_t *c = cs->limb;
  136. __uint128_t accum0 = 0, accum3 = 0, accum6 = 0;
  137. uint64_t mask = (1ull<<58) - 1;
  138. int i;
  139. for (i=0; i<3; i++) {
  140. accum0 += widemul(b, a[i]);
  141. accum3 += widemul(b, a[i+3]);
  142. accum6 += widemul(b, a[i+6]);
  143. c[i] = accum0 & mask; accum0 >>= 58;
  144. c[i+3] = accum3 & mask; accum3 >>= 58;
  145. if (i==2) {
  146. c[i+6] = accum6 & (mask>>1); accum6 >>= 57;
  147. } else {
  148. c[i+6] = accum6 & mask; accum6 >>= 58;
  149. }
  150. }
  151. accum0 += c[3];
  152. c[3] = accum0 & mask;
  153. c[4] += accum0 >> 58;
  154. accum3 += c[6];
  155. c[6] = accum3 & mask;
  156. c[7] += accum3 >> 58;
  157. accum6 += c[0];
  158. c[0] = accum6 & mask;
  159. c[1] += accum6 >> 58;
  160. }
  161. void gf_sqr (gf_s *__restrict__ cs, const gf as) {
  162. uint64_t *c = cs->limb;
  163. const uint64_t *a = as->limb;
  164. __uint128_t accum0, accum1;
  165. accum0 = widemul(a[8], a[8]);
  166. accum1 = widemul(a[0], a[7]);
  167. accum0 += widemul(a[1], a[6]);
  168. accum1 += widemul(a[2], a[5]);
  169. accum0 += widemul(a[3], a[4]);
  170. accum1 += accum0;
  171. c[7] = 2 * (accum1 & ((1ull<<57)-1));
  172. accum1 >>= 57;
  173. accum0 = 0;
  174. accum0 = 0;
  175. accum1 += widemul(a[4], a[4]);
  176. accum0 += widemul(a[1], a[7]);
  177. accum1 += widemul(2*a[2], a[6]);
  178. accum0 += widemul(a[3], a[5]);
  179. accum1 += widemul(2*a[0], a[8]);
  180. accum1 += 2*accum0;
  181. c[8] = accum1 & ((1ull<<57)-1);
  182. accum1 >>= 57;
  183. accum0 = 0;
  184. accum1 += widemul(a[0], a[0]);
  185. accum0 += widemul(a[1], a[8]);
  186. accum0 += widemul(a[2], a[7]);
  187. accum0 += widemul(a[3], a[6]);
  188. accum0 += widemul(a[4], a[5]);
  189. accum1 += accum0 << 2;
  190. c[0] = accum1 & ((1ull<<58)-1);
  191. accum1 >>= 58;
  192. accum0 = 0;
  193. accum0 += widemul(a[2], a[8]);
  194. accum0 += widemul(a[3], a[7]);
  195. accum0 += widemul(a[4], a[6]);
  196. accum0 <<= 1;
  197. accum0 += widemul(a[5], a[5]);
  198. accum0 += widemul(a[0], a[1]);
  199. accum1 += accum0 << 1;
  200. c[1] = accum1 & ((1ull<<58)-1);
  201. accum1 >>= 58;
  202. accum0 = 0;
  203. accum1 += widemul(a[1], a[1]);
  204. accum0 += widemul(a[3], a[8]);
  205. accum0 += widemul(a[4], a[7]);
  206. accum0 += widemul(a[5], a[6]);
  207. accum0 <<= 1;
  208. accum0 += widemul(a[0], a[2]);
  209. accum1 += accum0 << 1;
  210. c[2] = accum1 & ((1ull<<58)-1);
  211. accum1 >>= 58;
  212. accum0 = 0;
  213. accum0 += widemul(a[6], a[6]);
  214. accum0 += widemul(2*a[5], a[7]);
  215. accum0 += widemul(2*a[4], a[8]);
  216. accum0 += widemul(a[0], a[3]);
  217. accum0 += widemul(a[1], a[2]);
  218. accum1 += accum0 << 1;
  219. c[3] = accum1 & ((1ull<<58)-1);
  220. accum1 >>= 58;
  221. accum0 = 0;
  222. accum0 += widemul(a[6], a[7]);
  223. accum0 += widemul(a[5], a[8]);
  224. accum0 <<= 1;
  225. accum1 += widemul(a[2], a[2]);
  226. accum0 += widemul(a[0], a[4]);
  227. accum0 += widemul(a[1], a[3]);
  228. accum1 += accum0 << 1;
  229. c[4] = accum1 & ((1ull<<58)-1);
  230. accum1 >>= 58;
  231. accum0 = 0;
  232. accum0 += widemul(2*a[6], a[8]);
  233. accum0 += widemul(a[7], a[7]);
  234. accum0 += widemul(a[0], a[5]);
  235. accum0 += widemul(a[1], a[4]);
  236. accum0 += widemul(a[2], a[3]);
  237. accum1 += accum0 << 1;
  238. c[5] = accum1 & ((1ull<<58)-1);
  239. accum1 >>= 58;
  240. accum0 = 0;
  241. accum1 += widemul(a[3], a[3]);
  242. accum0 += widemul(a[0], a[6]);
  243. accum0 += widemul(a[1], a[5]);
  244. accum0 += widemul(2*a[7], a[8]);
  245. accum0 += widemul(a[2], a[4]);
  246. accum1 += accum0 << 1;
  247. c[6] = accum1 & ((1ull<<58)-1);
  248. accum1 >>= 58;
  249. accum1 += c[7];
  250. c[7] = accum1 & ((1ull<<58)-1);
  251. c[8] += accum1 >> 58;
  252. }