You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

236 lines
5.7 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #include "barrett_field.h"
  5. #include <assert.h>
  6. word_t
  7. add_nr_ext_packed(
  8. word_t *out,
  9. const word_t *a,
  10. int nwords_a,
  11. const word_t *c,
  12. int nwords_c,
  13. word_t mask
  14. ) {
  15. int i;
  16. dword_t carry = 0;
  17. for (i=0; i<nwords_c; i++) {
  18. out[i] = carry = carry + a[i] + (c[i]&mask);
  19. carry >>= WORD_BITS;
  20. }
  21. for (; i<nwords_a; i++) {
  22. out[i] = carry = carry + a[i];
  23. carry >>= WORD_BITS;
  24. }
  25. return carry;
  26. }
  27. static __inline__ word_t
  28. add_nr_packed(
  29. word_t *a,
  30. const word_t *c,
  31. int nwords
  32. ) {
  33. int i;
  34. dword_t carry = 0;
  35. for (i=0; i<nwords; i++) {
  36. a[i] = carry = carry + a[i] + c[i];
  37. carry >>= WORD_BITS;
  38. }
  39. return carry;
  40. }
  41. static __inline__ word_t
  42. sub_nr_packed(
  43. word_t *a,
  44. const word_t *c,
  45. int nwords
  46. ) {
  47. int i;
  48. dsword_t carry = 0;
  49. for (i=0; i<nwords; i++) {
  50. a[i] = carry = carry + a[i] - c[i];
  51. carry >>= WORD_BITS;
  52. }
  53. return carry;
  54. }
  55. word_t
  56. sub_nr_ext_packed(
  57. word_t *out,
  58. const word_t *a,
  59. int nwords_a,
  60. const word_t *c,
  61. int nwords_c,
  62. word_t mask
  63. ) {
  64. int i;
  65. dsword_t carry = 0;
  66. for (i=0; i<nwords_c; i++) {
  67. out[i] = carry = carry + a[i] - (c[i]&mask);
  68. carry >>= WORD_BITS;
  69. }
  70. for (; i<nwords_a; i++) {
  71. out[i] = carry = carry + a[i];
  72. carry >>= WORD_BITS;
  73. }
  74. return carry;
  75. }
  76. static word_t
  77. widemac(
  78. word_t *accum,
  79. int nwords_accum,
  80. const word_t *mier,
  81. int nwords_mier,
  82. word_t mand,
  83. word_t carry
  84. ) {
  85. int i;
  86. assert(nwords_accum >= nwords_mier);
  87. for (i=0; i<nwords_mier; i++) {
  88. /* UMAAL chain for the wordy part of p */
  89. dword_t product = ((dword_t)mand) * mier[i];
  90. product += accum[i];
  91. product += carry;
  92. accum[i] = product;
  93. carry = product >> WORD_BITS;
  94. }
  95. for (; i<nwords_accum; i++) {
  96. dword_t sum = ((dword_t)carry) + accum[i];
  97. accum[i] = sum;
  98. carry = sum >> WORD_BITS;
  99. }
  100. return carry;
  101. }
  102. void
  103. barrett_reduce(
  104. word_t *a,
  105. int nwords_a,
  106. word_t a_carry,
  107. const word_t *p_lo,
  108. int nwords_p,
  109. int nwords_lo,
  110. int p_shift
  111. ) {
  112. /* TODO: non 2^k-c primes. */
  113. int repeat, nwords_left_in_a=nwords_a;
  114. /* TODO: is there a point to this a_carry business? */
  115. assert(a_carry < ((word_t)1)<<p_shift && nwords_a >= nwords_p);
  116. for (; nwords_left_in_a >= nwords_p; nwords_left_in_a--) {
  117. for (repeat=0; repeat<2; repeat++) {
  118. /* PERF: surely a more careful implementation could
  119. * avoid this double round
  120. */
  121. word_t mand = a[nwords_left_in_a-1] >> p_shift;
  122. a[nwords_left_in_a-1] &= (((word_t)1)<<p_shift)-1;
  123. if (p_shift && !repeat) {
  124. /* collect high bits when there are any */
  125. if (nwords_left_in_a < nwords_a) {
  126. mand |= a[nwords_left_in_a] << (WORD_BITS-p_shift);
  127. a[nwords_left_in_a] = 0;
  128. } else {
  129. mand |= a_carry << (WORD_BITS-p_shift);
  130. }
  131. }
  132. word_t carry = widemac(a+nwords_left_in_a-nwords_p, nwords_p, p_lo, nwords_lo, mand, 0);
  133. assert(!carry);
  134. (void)carry;
  135. }
  136. }
  137. assert(nwords_left_in_a == nwords_p-1);
  138. /* OK, but it still isn't reduced. Add and subtract p_lo. */
  139. word_t cout = add_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,-1);
  140. if (p_shift) {
  141. cout = (cout<<(WORD_BITS-p_shift)) + (a[nwords_p-1]>>p_shift);
  142. a[nwords_p-1] &= (((word_t)1)<<p_shift)-1;
  143. }
  144. /* mask = carry-1: if no carry then do sub, otherwise don't */
  145. sub_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,cout-1);
  146. }
  147. /* PERF: This function is horribly slow. Enough to break 1%. */
  148. void
  149. barrett_mul_or_mac(
  150. word_t *accum,
  151. int nwords_accum,
  152. const word_t *a,
  153. int nwords_a,
  154. const word_t *b,
  155. int nwords_b,
  156. const word_t *p_lo,
  157. int nwords_p,
  158. int nwords_lo,
  159. int p_shift,
  160. mask_t doMac
  161. ) {
  162. assert(nwords_accum >= nwords_p);
  163. /* nwords_tmp = max(nwords_a + 1, nwords_p + 1, nwords_accum if doMac); */
  164. int nwords_tmp = (nwords_a > nwords_p) ? nwords_a : nwords_p;
  165. nwords_tmp++;
  166. if (nwords_tmp < nwords_accum && doMac)
  167. nwords_tmp = nwords_accum;
  168. word_t tmp[nwords_tmp];
  169. int bpos, i;
  170. for (i=0; i<nwords_tmp; i++) {
  171. tmp[i] = 0;
  172. }
  173. if (doMac) {
  174. for (i=0; i<nwords_accum; i++) {
  175. tmp[i] = accum[i];
  176. }
  177. barrett_reduce(tmp, nwords_tmp, 0, p_lo, nwords_p, nwords_lo, p_shift);
  178. }
  179. for (bpos=nwords_b-1; bpos >= 0; bpos--) {
  180. /* Invariant at the beginning of the loop: the high word is unused. */
  181. assert(tmp[nwords_tmp-1] == 0);
  182. /* shift up */
  183. for (i=nwords_tmp-2; i>=0; i--) {
  184. tmp[i+1] = tmp[i];
  185. }
  186. /* mac and reduce */
  187. word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0);
  188. /* the mac can't carry, because nwords_tmp >= nwords_a+1 and its high word is clear */
  189. assert(!carry);
  190. barrett_reduce(tmp, nwords_tmp, carry, p_lo, nwords_p, nwords_lo, p_shift);
  191. /* at this point, the number of words used is nwords_p <= nwords_tmp-1,
  192. * so the high word is again clear */
  193. }
  194. for (i=0; i<nwords_tmp && i<nwords_accum; i++) {
  195. accum[i] = tmp[i];
  196. }
  197. for (; i<nwords_tmp; i++) {
  198. assert(tmp[i] == 0);
  199. }
  200. for (; i<nwords_accum; i++) {
  201. accum[i] = 0;
  202. }
  203. }