You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

331 lines
5.8 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P448_H__
  5. #define __P448_H__ 1
  6. #include <stdint.h>
  7. #include <assert.h>
  8. #include "word.h"
  9. typedef struct p448_t {
  10. uint64_t limb[8];
  11. } __attribute__((aligned(32))) p448_t;
  12. #ifdef __cplusplus
  13. extern "C" {
  14. #endif
  15. static __inline__ void
  16. p448_set_ui (
  17. p448_t *out,
  18. uint64_t x
  19. ) __attribute__((unused,always_inline));
  20. static __inline__ void
  21. p448_cond_swap (
  22. p448_t *a,
  23. p448_t *b,
  24. mask_t do_swap
  25. ) __attribute__((unused,always_inline));
  26. static __inline__ void
  27. p448_add (
  28. p448_t *out,
  29. const p448_t *a,
  30. const p448_t *b
  31. ) __attribute__((unused,always_inline));
  32. static __inline__ void
  33. p448_sub (
  34. p448_t *out,
  35. const p448_t *a,
  36. const p448_t *b
  37. ) __attribute__((unused,always_inline));
  38. static __inline__ void
  39. p448_neg (
  40. p448_t *out,
  41. const p448_t *a
  42. ) __attribute__((unused,always_inline));
  43. static __inline__ void
  44. p448_cond_neg (
  45. p448_t *a,
  46. mask_t doNegate
  47. ) __attribute__((unused,always_inline));
  48. static __inline__ void
  49. p448_addw (
  50. p448_t *a,
  51. uint64_t x
  52. ) __attribute__((unused,always_inline));
  53. static __inline__ void
  54. p448_subw (
  55. p448_t *a,
  56. uint64_t x
  57. ) __attribute__((unused,always_inline));
  58. static __inline__ void
  59. p448_copy (
  60. p448_t *out,
  61. const p448_t *a
  62. ) __attribute__((unused,always_inline));
  63. static __inline__ void
  64. p448_weak_reduce (
  65. p448_t *inout
  66. ) __attribute__((unused,always_inline));
  67. void
  68. p448_strong_reduce (
  69. p448_t *inout
  70. );
  71. mask_t
  72. p448_is_zero (
  73. const p448_t *in
  74. );
  75. static __inline__ void
  76. p448_bias (
  77. p448_t *inout,
  78. int amount
  79. ) __attribute__((unused,always_inline));
  80. void
  81. p448_mul (
  82. p448_t *__restrict__ out,
  83. const p448_t *a,
  84. const p448_t *b
  85. );
  86. void
  87. p448_mulw (
  88. p448_t *__restrict__ out,
  89. const p448_t *a,
  90. uint64_t b
  91. );
  92. void
  93. p448_sqr (
  94. p448_t *__restrict__ out,
  95. const p448_t *a
  96. );
  97. static __inline__ void
  98. p448_sqrn (
  99. p448_t *__restrict__ y,
  100. const p448_t *x,
  101. int n
  102. ) __attribute__((unused,always_inline));
  103. void
  104. p448_serialize (
  105. uint8_t *serial,
  106. const struct p448_t *x
  107. );
  108. void
  109. q448_serialize (
  110. uint8_t *serial,
  111. const word_t x[7]
  112. );
  113. mask_t
  114. q448_deserialize (
  115. word_t x[7],
  116. const uint8_t serial[56]
  117. );
  118. mask_t
  119. p448_deserialize (
  120. p448_t *x,
  121. const uint8_t serial[56]
  122. );
  123. /* -------------- Inline functions begin here -------------- */
  124. void
  125. p448_set_ui (
  126. p448_t *out,
  127. uint64_t x
  128. ) {
  129. int i;
  130. out->limb[0] = x;
  131. for (i=1; i<8; i++) {
  132. out->limb[i] = 0;
  133. }
  134. }
  135. void
  136. p448_cond_swap (
  137. p448_t *a,
  138. p448_t *b,
  139. mask_t doswap
  140. ) {
  141. big_register_t *aa = (big_register_t*)a;
  142. big_register_t *bb = (big_register_t*)b;
  143. big_register_t m = doswap;
  144. unsigned int i;
  145. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  146. big_register_t x = m & (aa[i]^bb[i]);
  147. aa[i] ^= x;
  148. bb[i] ^= x;
  149. }
  150. }
  151. void
  152. p448_add (
  153. p448_t *out,
  154. const p448_t *a,
  155. const p448_t *b
  156. ) {
  157. unsigned int i;
  158. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  159. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
  160. }
  161. /*
  162. unsigned int i;
  163. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  164. out->limb[i] = a->limb[i] + b->limb[i];
  165. }
  166. */
  167. }
  168. void
  169. p448_sub (
  170. p448_t *out,
  171. const p448_t *a,
  172. const p448_t *b
  173. ) {
  174. unsigned int i;
  175. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  176. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
  177. }
  178. /*
  179. unsigned int i;
  180. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  181. out->limb[i] = a->limb[i] - b->limb[i];
  182. }
  183. */
  184. }
  185. void
  186. p448_neg (
  187. p448_t *out,
  188. const p448_t *a
  189. ) {
  190. unsigned int i;
  191. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  192. ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i];
  193. }
  194. /*
  195. unsigned int i;
  196. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  197. out->limb[i] = -a->limb[i];
  198. }
  199. */
  200. }
  201. void
  202. p448_cond_neg(
  203. p448_t *a,
  204. mask_t doNegate
  205. ) {
  206. unsigned int i;
  207. struct p448_t negated;
  208. big_register_t *aa = (big_register_t *)a;
  209. big_register_t *nn = (big_register_t*)&negated;
  210. big_register_t m = doNegate;
  211. p448_neg(&negated, a);
  212. p448_bias(&negated, 2);
  213. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  214. aa[i] = (aa[i] & ~m) | (nn[i] & m);
  215. }
  216. }
  217. void
  218. p448_addw (
  219. p448_t *a,
  220. uint64_t x
  221. ) {
  222. a->limb[0] += x;
  223. }
  224. void
  225. p448_subw (
  226. p448_t *a,
  227. uint64_t x
  228. ) {
  229. a->limb[0] -= x;
  230. }
  231. void
  232. p448_copy (
  233. p448_t *out,
  234. const p448_t *a
  235. ) {
  236. *out = *a;
  237. }
  238. void
  239. p448_bias (
  240. p448_t *a,
  241. int amt
  242. ) {
  243. uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
  244. uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
  245. uint64x4_t *aa = (uint64x4_t*) a;
  246. aa[0] += lo;
  247. aa[1] += hi;
  248. }
  249. void
  250. p448_weak_reduce (
  251. p448_t *a
  252. ) {
  253. /* PERF: use pshufb/palignr if anyone cares about speed of this */
  254. uint64_t mask = (1ull<<56) - 1;
  255. uint64_t tmp = a->limb[7] >> 56;
  256. int i;
  257. a->limb[4] += tmp;
  258. for (i=7; i>0; i--) {
  259. a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
  260. }
  261. a->limb[0] = (a->limb[0] & mask) + tmp;
  262. }
  263. void
  264. p448_sqrn (
  265. p448_t *__restrict__ y,
  266. const p448_t *x,
  267. int n
  268. ) {
  269. p448_t tmp;
  270. assert(n>0);
  271. if (n&1) {
  272. p448_sqr(y,x);
  273. n--;
  274. } else {
  275. p448_sqr(&tmp,x);
  276. p448_sqr(y,&tmp);
  277. n-=2;
  278. }
  279. for (; n; n-=2) {
  280. p448_sqr(&tmp,y);
  281. p448_sqr(y,&tmp);
  282. }
  283. }
  284. #ifdef __cplusplus
  285. }; /* extern "C" */
  286. #endif
  287. #endif /* __P448_H__ */