You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

270 lines
6.4 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #include "barrett_field.h"
  5. #include <assert.h>
  6. word_t
  7. add_nr_ext_packed(
  8. word_t *out,
  9. const word_t *a,
  10. int nwords_a,
  11. const word_t *c,
  12. int nwords_c,
  13. word_t mask
  14. ) {
  15. int i;
  16. dword_t carry = 0;
  17. for (i=0; i<nwords_c; i++) {
  18. out[i] = carry = carry + a[i] + (c[i]&mask);
  19. carry >>= WORD_BITS;
  20. }
  21. for (; i<nwords_a; i++) {
  22. out[i] = carry = carry + a[i];
  23. carry >>= WORD_BITS;
  24. }
  25. return carry;
  26. }
  27. static __inline__ word_t
  28. add_nr_packed(
  29. word_t *a,
  30. const word_t *c,
  31. int nwords
  32. ) {
  33. int i;
  34. dword_t carry = 0;
  35. for (i=0; i<nwords; i++) {
  36. a[i] = carry = carry + a[i] + c[i];
  37. carry >>= WORD_BITS;
  38. }
  39. return carry;
  40. }
  41. static __inline__ word_t
  42. sub_nr_packed(
  43. word_t *a,
  44. const word_t *c,
  45. int nwords
  46. ) {
  47. int i;
  48. dsword_t carry = 0;
  49. for (i=0; i<nwords; i++) {
  50. a[i] = carry = carry + a[i] - c[i];
  51. carry >>= WORD_BITS;
  52. }
  53. return carry;
  54. }
  55. word_t
  56. sub_nr_ext_packed(
  57. word_t *out,
  58. const word_t *a,
  59. int nwords_a,
  60. const word_t *c,
  61. int nwords_c,
  62. word_t mask
  63. ) {
  64. int i;
  65. dsword_t carry = 0;
  66. for (i=0; i<nwords_c; i++) {
  67. out[i] = carry = carry + a[i] - (c[i]&mask);
  68. carry >>= WORD_BITS;
  69. }
  70. for (; i<nwords_a; i++) {
  71. out[i] = carry = carry + a[i];
  72. carry >>= WORD_BITS;
  73. }
  74. return carry;
  75. }
  76. static word_t
  77. widemac(
  78. word_t *accum,
  79. int nwords_accum,
  80. const word_t *mier,
  81. int nwords_mier,
  82. word_t mand,
  83. word_t carry
  84. ) {
  85. int i;
  86. assert(nwords_accum >= nwords_mier);
  87. for (i=0; i<nwords_mier; i++) {
  88. /* UMAAL chain for the wordy part of p */
  89. dword_t product = ((dword_t)mand) * mier[i];
  90. product += accum[i];
  91. product += carry;
  92. accum[i] = product;
  93. carry = product >> WORD_BITS;
  94. }
  95. for (; i<nwords_accum; i++) {
  96. dword_t sum = ((dword_t)carry) + accum[i];
  97. accum[i] = sum;
  98. carry = sum >> WORD_BITS;
  99. }
  100. return carry;
  101. }
  102. void
  103. barrett_negate (
  104. word_t *a,
  105. int nwords_a,
  106. const word_t *p_lo,
  107. int nwords_p,
  108. int nwords_lo,
  109. int p_shift
  110. ) {
  111. int i;
  112. dsword_t carry = 0;
  113. barrett_reduce(a,nwords_a,0,p_lo,nwords_p,nwords_lo,p_shift);
  114. /* Have p = 2^big - p_lo. Want p - a = 2^big - p_lo - a */
  115. for (i=0; i<nwords_lo; i++) {
  116. a[i] = carry = carry - p_lo[i] - a[i];
  117. carry >>= WORD_BITS;
  118. }
  119. for (; i<nwords_p; i++) {
  120. a[i] = carry = carry - a[i];
  121. if (i<nwords_p-1) {
  122. carry >>= WORD_BITS;
  123. }
  124. }
  125. a[nwords_p-1] = carry = carry + (((word_t)1) << p_shift);
  126. for (; i<nwords_a; i++) {
  127. assert(!a[i]);
  128. }
  129. assert(!(carry>>64));
  130. }
  131. void
  132. barrett_reduce(
  133. word_t *a,
  134. int nwords_a,
  135. word_t a_carry,
  136. const word_t *p_lo,
  137. int nwords_p,
  138. int nwords_lo,
  139. int p_shift
  140. ) {
  141. /* TODO: non 2^k-c primes. */
  142. int repeat, nwords_left_in_a=nwords_a;
  143. /* TODO: is there a point to this a_carry business? */
  144. assert(a_carry < ((word_t)1)<<p_shift && nwords_a >= nwords_p);
  145. for (; nwords_left_in_a >= nwords_p; nwords_left_in_a--) {
  146. for (repeat=0; repeat<2; repeat++) {
  147. /* PERF: surely a more careful implementation could
  148. * avoid this double round
  149. */
  150. word_t mand = a[nwords_left_in_a-1] >> p_shift;
  151. a[nwords_left_in_a-1] &= (((word_t)1)<<p_shift)-1;
  152. if (p_shift && !repeat) {
  153. /* collect high bits when there are any */
  154. if (nwords_left_in_a < nwords_a) {
  155. mand |= a[nwords_left_in_a] << (WORD_BITS-p_shift);
  156. a[nwords_left_in_a] = 0;
  157. } else {
  158. mand |= a_carry << (WORD_BITS-p_shift);
  159. }
  160. }
  161. word_t carry = widemac(a+nwords_left_in_a-nwords_p, nwords_p, p_lo, nwords_lo, mand, 0);
  162. assert(!carry);
  163. (void)carry;
  164. }
  165. }
  166. assert(nwords_left_in_a == nwords_p-1);
  167. /* OK, but it still isn't reduced. Add and subtract p_lo. */
  168. word_t cout = add_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,-1);
  169. if (p_shift) {
  170. cout = (cout<<(WORD_BITS-p_shift)) + (a[nwords_p-1]>>p_shift);
  171. a[nwords_p-1] &= (((word_t)1)<<p_shift)-1;
  172. }
  173. /* mask = carry-1: if no carry then do sub, otherwise don't */
  174. sub_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,cout-1);
  175. }
  176. /* PERF: This function is horribly slow. Enough to break 1%. */
  177. void
  178. barrett_mul_or_mac(
  179. word_t *accum,
  180. int nwords_accum,
  181. const word_t *a,
  182. int nwords_a,
  183. const word_t *b,
  184. int nwords_b,
  185. const word_t *p_lo,
  186. int nwords_p,
  187. int nwords_lo,
  188. int p_shift,
  189. mask_t doMac
  190. ) {
  191. assert(nwords_accum >= nwords_p);
  192. /* nwords_tmp = max(nwords_a + 1, nwords_p + 1, nwords_accum if doMac); */
  193. int nwords_tmp = (nwords_a > nwords_p) ? nwords_a : nwords_p;
  194. nwords_tmp++;
  195. if (nwords_tmp < nwords_accum && doMac)
  196. nwords_tmp = nwords_accum;
  197. word_t tmp[nwords_tmp];
  198. int bpos, i;
  199. for (i=0; i<nwords_tmp; i++) {
  200. tmp[i] = 0;
  201. }
  202. for (bpos=nwords_b-1; bpos >= 0; bpos--) {
  203. /* Invariant at the beginning of the loop: the high word is unused. */
  204. assert(tmp[nwords_tmp-1] == 0);
  205. /* shift up */
  206. for (i=nwords_tmp-2; i>=0; i--) {
  207. tmp[i+1] = tmp[i];
  208. }
  209. tmp[0] = 0;
  210. /* mac and reduce */
  211. word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0);
  212. /* the mac can't carry, because nwords_tmp >= nwords_a+1 and its high word is clear */
  213. assert(!carry);
  214. barrett_reduce(tmp, nwords_tmp, carry, p_lo, nwords_p, nwords_lo, p_shift);
  215. /* at this point, the number of words used is nwords_p <= nwords_tmp-1,
  216. * so the high word is again clear */
  217. }
  218. if (doMac) {
  219. word_t cout = add_nr_packed(tmp, accum, nwords_accum);
  220. barrett_reduce(tmp, nwords_tmp, cout, p_lo, nwords_p, nwords_lo, p_shift);
  221. }
  222. for (i=0; i<nwords_tmp && i<nwords_accum; i++) {
  223. accum[i] = tmp[i];
  224. }
  225. for (; i<nwords_tmp; i++) {
  226. assert(tmp[i] == 0);
  227. }
  228. for (; i<nwords_accum; i++) {
  229. accum[i] = 0;
  230. }
  231. }