You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

377 lines
6.6 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P448_H__
  5. #define __P448_H__ 1
  6. #include <stdint.h>
  7. #include <assert.h>
  8. #include "word.h"
  9. typedef struct p448_t {
  10. uint64_t limb[8];
  11. } __attribute__((aligned(32))) p448_t;
  12. #ifdef __cplusplus
  13. extern "C" {
  14. #endif
  15. static __inline__ void
  16. p448_set_ui (
  17. p448_t *out,
  18. uint64_t x
  19. ) __attribute__((unused,always_inline));
  20. static __inline__ void
  21. p448_cond_swap (
  22. p448_t *a,
  23. p448_t *b,
  24. mask_t do_swap
  25. ) __attribute__((unused,always_inline));
  26. static __inline__ void
  27. p448_add (
  28. p448_t *out,
  29. const p448_t *a,
  30. const p448_t *b
  31. ) __attribute__((unused,always_inline));
  32. static __inline__ void
  33. p448_sub (
  34. p448_t *out,
  35. const p448_t *a,
  36. const p448_t *b
  37. ) __attribute__((unused,always_inline));
  38. static __inline__ void
  39. p448_neg (
  40. p448_t *out,
  41. const p448_t *a
  42. ) __attribute__((unused,always_inline));
  43. static __inline__ void
  44. p448_cond_neg (
  45. p448_t *a,
  46. mask_t doNegate
  47. ) __attribute__((unused,always_inline));
  48. static __inline__ void
  49. p448_addw (
  50. p448_t *a,
  51. uint64_t x
  52. ) __attribute__((unused,always_inline));
  53. static __inline__ void
  54. p448_subw (
  55. p448_t *a,
  56. uint64_t x
  57. ) __attribute__((unused,always_inline));
  58. static __inline__ void
  59. p448_copy (
  60. p448_t *out,
  61. const p448_t *a
  62. ) __attribute__((unused,always_inline));
  63. static __inline__ void
  64. p448_weak_reduce (
  65. p448_t *inout
  66. ) __attribute__((unused,always_inline));
  67. void
  68. p448_strong_reduce (
  69. p448_t *inout
  70. );
  71. mask_t
  72. p448_is_zero (
  73. const p448_t *in
  74. );
  75. static __inline__ void
  76. p448_bias (
  77. p448_t *inout,
  78. int amount
  79. ) __attribute__((unused,always_inline));
  80. void
  81. p448_mul (
  82. p448_t *__restrict__ out,
  83. const p448_t *a,
  84. const p448_t *b
  85. );
  86. void
  87. p448_mulw (
  88. p448_t *__restrict__ out,
  89. const p448_t *a,
  90. uint64_t b
  91. );
  92. void
  93. p448_sqr (
  94. p448_t *__restrict__ out,
  95. const p448_t *a
  96. );
  97. static __inline__ void
  98. p448_sqrn (
  99. p448_t *__restrict__ y,
  100. const p448_t *x,
  101. int n
  102. ) __attribute__((unused,always_inline));
  103. void
  104. p448_serialize (
  105. uint8_t *serial,
  106. const struct p448_t *x
  107. );
  108. mask_t
  109. p448_deserialize (
  110. p448_t *x,
  111. const uint8_t serial[56]
  112. );
  113. static __inline__ void
  114. p448_mask(
  115. struct p448_t *a,
  116. const struct p448_t *b,
  117. mask_t mask
  118. ) __attribute__((unused,always_inline));
  119. /**
  120. * Returns 1/x.
  121. *
  122. * If x=0, returns 0.
  123. */
  124. void
  125. p448_inverse (
  126. struct p448_t* a,
  127. const struct p448_t* x
  128. );
  129. void
  130. simultaneous_invert_p448 (
  131. struct p448_t *__restrict__ out,
  132. const struct p448_t *in,
  133. unsigned int n
  134. );
  135. static inline mask_t
  136. p448_eq (
  137. const struct p448_t *a,
  138. const struct p448_t *b
  139. ) __attribute__((always_inline,unused));
  140. /* -------------- Inline functions begin here -------------- */
  141. void
  142. p448_set_ui (
  143. p448_t *out,
  144. uint64_t x
  145. ) {
  146. int i;
  147. out->limb[0] = x;
  148. for (i=1; i<8; i++) {
  149. out->limb[i] = 0;
  150. }
  151. }
  152. void
  153. p448_cond_swap (
  154. p448_t *a,
  155. p448_t *b,
  156. mask_t doswap
  157. ) {
  158. big_register_t *aa = (big_register_t*)a;
  159. big_register_t *bb = (big_register_t*)b;
  160. big_register_t m = doswap;
  161. unsigned int i;
  162. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  163. big_register_t x = m & (aa[i]^bb[i]);
  164. aa[i] ^= x;
  165. bb[i] ^= x;
  166. }
  167. }
  168. void
  169. p448_add (
  170. p448_t *out,
  171. const p448_t *a,
  172. const p448_t *b
  173. ) {
  174. unsigned int i;
  175. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  176. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
  177. }
  178. /*
  179. unsigned int i;
  180. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  181. out->limb[i] = a->limb[i] + b->limb[i];
  182. }
  183. */
  184. }
  185. void
  186. p448_sub (
  187. p448_t *out,
  188. const p448_t *a,
  189. const p448_t *b
  190. ) {
  191. unsigned int i;
  192. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  193. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
  194. }
  195. /*
  196. unsigned int i;
  197. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  198. out->limb[i] = a->limb[i] - b->limb[i];
  199. }
  200. */
  201. }
  202. void
  203. p448_neg (
  204. struct p448_t *out,
  205. const p448_t *a
  206. ) {
  207. unsigned int i;
  208. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  209. ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i];
  210. }
  211. /*
  212. unsigned int i;
  213. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  214. out->limb[i] = -a->limb[i];
  215. }
  216. */
  217. }
  218. void
  219. p448_cond_neg(
  220. struct p448_t *a,
  221. mask_t doNegate
  222. ) {
  223. unsigned int i;
  224. struct p448_t negated;
  225. big_register_t *aa = (big_register_t *)a;
  226. big_register_t *nn = (big_register_t*)&negated;
  227. big_register_t m = doNegate;
  228. p448_neg(&negated, a);
  229. p448_bias(&negated, 2);
  230. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  231. aa[i] = (aa[i] & ~m) | (nn[i] & m);
  232. }
  233. }
  234. void
  235. p448_addw (
  236. p448_t *a,
  237. uint64_t x
  238. ) {
  239. a->limb[0] += x;
  240. }
  241. void
  242. p448_subw (
  243. p448_t *a,
  244. uint64_t x
  245. ) {
  246. a->limb[0] -= x;
  247. }
  248. void
  249. p448_copy (
  250. p448_t *out,
  251. const p448_t *a
  252. ) {
  253. *out = *a;
  254. }
  255. void
  256. p448_bias (
  257. p448_t *a,
  258. int amt
  259. ) {
  260. uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
  261. uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
  262. uint64x4_t *aa = (uint64x4_t*) a;
  263. aa[0] += lo;
  264. aa[1] += hi;
  265. }
  266. void
  267. p448_weak_reduce (
  268. p448_t *a
  269. ) {
  270. /* PERF: use pshufb/palignr if anyone cares about speed of this */
  271. uint64_t mask = (1ull<<56) - 1;
  272. uint64_t tmp = a->limb[7] >> 56;
  273. int i;
  274. a->limb[4] += tmp;
  275. for (i=7; i>0; i--) {
  276. a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
  277. }
  278. a->limb[0] = (a->limb[0] & mask) + tmp;
  279. }
  280. void
  281. p448_sqrn (
  282. p448_t *__restrict__ y,
  283. const p448_t *x,
  284. int n
  285. ) {
  286. p448_t tmp;
  287. assert(n>0);
  288. if (n&1) {
  289. p448_sqr(y,x);
  290. n--;
  291. } else {
  292. p448_sqr(&tmp,x);
  293. p448_sqr(y,&tmp);
  294. n-=2;
  295. }
  296. for (; n; n-=2) {
  297. p448_sqr(&tmp,y);
  298. p448_sqr(y,&tmp);
  299. }
  300. }
  301. mask_t
  302. p448_eq (
  303. const struct p448_t *a,
  304. const struct p448_t *b
  305. ) {
  306. struct p448_t ra, rb;
  307. p448_copy(&ra, a);
  308. p448_copy(&rb, b);
  309. p448_weak_reduce(&ra);
  310. p448_weak_reduce(&rb);
  311. p448_sub(&ra, &ra, &rb);
  312. p448_bias(&ra, 2);
  313. return p448_is_zero(&ra);
  314. }
  315. void
  316. p448_mask (
  317. struct p448_t *a,
  318. const struct p448_t *b,
  319. mask_t mask
  320. ) {
  321. unsigned int i;
  322. for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
  323. a->limb[i] = b->limb[i] & mask;
  324. }
  325. }
  326. #ifdef __cplusplus
  327. }; /* extern "C" */
  328. #endif
  329. #endif /* __P448_H__ */