You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

258 lines
4.6 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P448_H__
  5. #define __P448_H__ 1
  6. #include <stdint.h>
  7. #include <assert.h>
  8. #include "word.h"
  9. typedef struct p448_t {
  10. uint64_t limb[8];
  11. } __attribute__((aligned(32))) p448_t;
  12. #ifdef __cplusplus
  13. extern "C" {
  14. #endif
  15. static __inline__ void
  16. p448_set_ui (
  17. p448_t *out,
  18. uint64_t x
  19. ) __attribute__((unused,always_inline));
  20. static __inline__ void
  21. p448_add_RAW (
  22. p448_t *out,
  23. const p448_t *a,
  24. const p448_t *b
  25. ) __attribute__((unused,always_inline));
  26. static __inline__ void
  27. p448_sub_RAW (
  28. p448_t *out,
  29. const p448_t *a,
  30. const p448_t *b
  31. ) __attribute__((unused,always_inline));
  32. static __inline__ void
  33. p448_neg_RAW (
  34. p448_t *out,
  35. const p448_t *a
  36. ) __attribute__((unused,always_inline));
  37. static __inline__ void
  38. p448_addw (
  39. p448_t *a,
  40. uint64_t x
  41. ) __attribute__((unused,always_inline));
  42. static __inline__ void
  43. p448_subw (
  44. p448_t *a,
  45. uint64_t x
  46. ) __attribute__((unused,always_inline));
  47. static __inline__ void
  48. p448_copy (
  49. p448_t *out,
  50. const p448_t *a
  51. ) __attribute__((unused,always_inline));
  52. static __inline__ void
  53. p448_weak_reduce (
  54. p448_t *inout
  55. ) __attribute__((unused,always_inline));
  56. void
  57. p448_strong_reduce (
  58. p448_t *inout
  59. );
  60. mask_t
  61. p448_is_zero (
  62. const p448_t *in
  63. );
  64. static __inline__ void
  65. p448_bias (
  66. p448_t *inout,
  67. int amount
  68. ) __attribute__((unused,always_inline));
  69. void
  70. p448_mul (
  71. p448_t *__restrict__ out,
  72. const p448_t *a,
  73. const p448_t *b
  74. );
  75. void
  76. p448_mulw (
  77. p448_t *__restrict__ out,
  78. const p448_t *a,
  79. uint64_t b
  80. );
  81. void
  82. p448_sqr (
  83. p448_t *__restrict__ out,
  84. const p448_t *a
  85. );
  86. void
  87. p448_serialize (
  88. uint8_t *serial,
  89. const struct p448_t *x
  90. );
  91. mask_t
  92. p448_deserialize (
  93. p448_t *x,
  94. const uint8_t serial[56]
  95. );
  96. /* -------------- Inline functions begin here -------------- */
  97. void
  98. p448_set_ui (
  99. p448_t *out,
  100. uint64_t x
  101. ) {
  102. int i;
  103. out->limb[0] = x;
  104. for (i=1; i<8; i++) {
  105. out->limb[i] = 0;
  106. }
  107. }
  108. void
  109. p448_add_RAW (
  110. p448_t *out,
  111. const p448_t *a,
  112. const p448_t *b
  113. ) {
  114. unsigned int i;
  115. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  116. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
  117. }
  118. /*
  119. unsigned int i;
  120. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  121. out->limb[i] = a->limb[i] + b->limb[i];
  122. }
  123. */
  124. }
  125. void
  126. p448_sub_RAW (
  127. p448_t *out,
  128. const p448_t *a,
  129. const p448_t *b
  130. ) {
  131. unsigned int i;
  132. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  133. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
  134. }
  135. /*
  136. unsigned int i;
  137. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  138. out->limb[i] = a->limb[i] - b->limb[i];
  139. }
  140. */
  141. }
  142. void
  143. p448_neg_RAW (
  144. struct p448_t *out,
  145. const p448_t *a
  146. ) {
  147. unsigned int i;
  148. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  149. ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i];
  150. }
  151. /*
  152. unsigned int i;
  153. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  154. out->limb[i] = -a->limb[i];
  155. }
  156. */
  157. }
  158. void
  159. p448_addw (
  160. p448_t *a,
  161. uint64_t x
  162. ) {
  163. a->limb[0] += x;
  164. }
  165. void
  166. p448_subw (
  167. p448_t *a,
  168. uint64_t x
  169. ) {
  170. a->limb[0] -= x;
  171. }
  172. void
  173. p448_copy (
  174. p448_t *out,
  175. const p448_t *a
  176. ) {
  177. unsigned int i;
  178. for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
  179. ((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
  180. }
  181. }
  182. void
  183. p448_bias (
  184. p448_t *a,
  185. int amt
  186. ) {
  187. uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
  188. #if __AVX2__
  189. uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
  190. uint64x4_t *aa = (uint64x4_t*) a;
  191. aa[0] += lo;
  192. aa[1] += hi;
  193. #elif __SSE2__
  194. uint64x2_t lo = {co1,co1}, hi = {co2,co1};
  195. uint64x2_t *aa = (uint64x2_t*) a;
  196. aa[0] += lo;
  197. aa[1] += lo;
  198. aa[2] += hi;
  199. aa[3] += lo;
  200. #else
  201. unsigned int i;
  202. for (i=0; i<sizeof(*a)/sizeof(uint64_t); i++) {
  203. a->limb[i] += (i==4) ? co2 : co1;
  204. }
  205. #endif
  206. }
  207. void
  208. p448_weak_reduce (
  209. p448_t *a
  210. ) {
  211. /* PERF: use pshufb/palignr if anyone cares about speed of this */
  212. uint64_t mask = (1ull<<56) - 1;
  213. uint64_t tmp = a->limb[7] >> 56;
  214. int i;
  215. a->limb[4] += tmp;
  216. for (i=7; i>0; i--) {
  217. a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
  218. }
  219. a->limb[0] = (a->limb[0] & mask) + tmp;
  220. }
  221. #ifdef __cplusplus
  222. }; /* extern "C" */
  223. #endif
  224. #endif /* __P448_H__ */