You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

243 lines
5.3 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P448_H__
  5. #define __P448_H__ 1
  6. #include <stdint.h>
  7. #include <assert.h>
  8. #include "word.h"
  9. typedef struct p448_t {
  10. uint64_t limb[8];
  11. } __attribute__((aligned(32))) p448_t;
  12. #ifdef __cplusplus
  13. extern "C" {
  14. #endif
  15. static __inline__ void
  16. p448_set_ui(p448_t *out,
  17. uint64_t x)
  18. __attribute__((unused,always_inline));
  19. static __inline__ void
  20. p448_cond_swap(p448_t *a,
  21. p448_t *b,
  22. mask_t do_swap)
  23. __attribute__((unused,always_inline));
  24. static __inline__ void
  25. p448_add(p448_t *out,
  26. const p448_t *a,
  27. const p448_t *b)
  28. __attribute__((unused,always_inline));
  29. static __inline__ void
  30. p448_sub(p448_t *out,
  31. const p448_t *a,
  32. const p448_t *b)
  33. __attribute__((unused,always_inline));
  34. static __inline__ void
  35. p448_neg(p448_t *out,
  36. const p448_t *a)
  37. __attribute__((unused,always_inline));
  38. static __inline__ void
  39. p448_cond_neg(p448_t *a,
  40. mask_t doNegate)
  41. __attribute__((unused,always_inline));
  42. static __inline__ void
  43. p448_addw(p448_t *a,
  44. uint64_t x)
  45. __attribute__((unused,always_inline));
  46. static __inline__ void
  47. p448_subw(p448_t *a,
  48. uint64_t x)
  49. __attribute__((unused,always_inline));
  50. static __inline__ void
  51. p448_copy(p448_t *out, const p448_t *a)
  52. __attribute__((unused,always_inline));
  53. static __inline__ void
  54. p448_weak_reduce(p448_t *inout)
  55. __attribute__((unused,always_inline));
  56. void
  57. p448_strong_reduce(p448_t *inout);
  58. mask_t
  59. p448_is_zero(const p448_t *in);
  60. static __inline__ void
  61. p448_bias(p448_t *inout, int amount)
  62. __attribute__((unused,always_inline));
  63. void
  64. p448_mul(p448_t *__restrict__ out,
  65. const p448_t *a,
  66. const p448_t *b);
  67. void
  68. p448_mulw(p448_t *__restrict__ out,
  69. const p448_t *a,
  70. uint64_t b);
  71. void
  72. p448_sqr(p448_t *__restrict__ out,
  73. const p448_t *a);
  74. static __inline__ void
  75. p448_sqrn(p448_t *__restrict__ y, const p448_t *x, int n)
  76. __attribute__((unused,always_inline));
  77. void
  78. p448_set_ui(p448_t *out,
  79. uint64_t x) {
  80. int i;
  81. out->limb[0] = x;
  82. for (i=1; i<8; i++) {
  83. out->limb[i] = 0;
  84. }
  85. }
  86. void
  87. p448_cond_swap(p448_t *a, p448_t *b, mask_t doswap) {
  88. big_register_t *aa = (big_register_t*)a;
  89. big_register_t *bb = (big_register_t*)b;
  90. big_register_t m = doswap;
  91. unsigned int i;
  92. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  93. big_register_t x = m & (aa[i]^bb[i]);
  94. aa[i] ^= x;
  95. bb[i] ^= x;
  96. }
  97. }
  98. void
  99. p448_add(p448_t *out, const p448_t *a, const p448_t *b) {
  100. unsigned int i;
  101. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  102. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
  103. }
  104. /*
  105. unsigned int i;
  106. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  107. out->limb[i] = a->limb[i] + b->limb[i];
  108. }
  109. */
  110. }
  111. void
  112. p448_sub(p448_t *out, const p448_t *a, const p448_t *b) {
  113. unsigned int i;
  114. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  115. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
  116. }
  117. /*
  118. unsigned int i;
  119. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  120. out->limb[i] = a->limb[i] - b->limb[i];
  121. }
  122. */
  123. }
  124. void
  125. p448_neg(p448_t *out, const p448_t *a) {
  126. unsigned int i;
  127. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  128. ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i];
  129. }
  130. /*
  131. unsigned int i;
  132. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  133. out->limb[i] = -a->limb[i];
  134. }
  135. */
  136. }
  137. void
  138. p448_cond_neg(
  139. p448_t *a,
  140. mask_t doNegate
  141. ) {
  142. unsigned int i;
  143. struct p448_t negated;
  144. big_register_t *aa = (big_register_t *)a;
  145. big_register_t *nn = (big_register_t*)&negated;
  146. big_register_t m = doNegate;
  147. p448_neg(&negated, a);
  148. p448_bias(&negated, 2);
  149. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  150. aa[i] = (aa[i] & ~m) | (nn[i] & m);
  151. }
  152. }
  153. void
  154. p448_addw(p448_t *a, uint64_t x) {
  155. a->limb[0] += x;
  156. }
  157. void
  158. p448_subw(p448_t *a, uint64_t x) {
  159. a->limb[0] -= x;
  160. }
  161. void
  162. p448_copy(p448_t *out, const p448_t *a) {
  163. *out = *a;
  164. }
  165. void
  166. p448_bias(p448_t *a, int amt) {
  167. uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
  168. uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
  169. uint64x4_t *aa = (uint64x4_t*) a;
  170. aa[0] += lo;
  171. aa[1] += hi;
  172. }
  173. void
  174. p448_weak_reduce(p448_t *a) {
  175. /* TODO: use pshufb/palignr if anyone cares about speed of this */
  176. uint64_t mask = (1ull<<56) - 1;
  177. uint64_t tmp = a->limb[7] >> 56;
  178. int i;
  179. a->limb[4] += tmp;
  180. for (i=7; i>0; i--) {
  181. a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
  182. }
  183. a->limb[0] = (a->limb[0] & mask) + tmp;
  184. }
  185. void p448_sqrn(p448_t *__restrict__ y, const p448_t *x, int n) {
  186. p448_t tmp;
  187. assert(n>0);
  188. if (n&1) {
  189. p448_sqr(y,x);
  190. n--;
  191. } else {
  192. p448_sqr(&tmp,x);
  193. p448_sqr(y,&tmp);
  194. n-=2;
  195. }
  196. for (; n; n-=2) {
  197. p448_sqr(&tmp,y);
  198. p448_sqr(y,&tmp);
  199. }
  200. }
  201. #ifdef __cplusplus
  202. }; /* extern "C" */
  203. #endif
  204. #endif /* __P448_H__ */