You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1616 lines
47 KiB

  1. /**
  2. * @file curve25519/decaf.c
  3. * @author Mike Hamburg
  4. *
  5. * @copyright
  6. * Copyright (c) 2015-2016 Cryptography Research, Inc. \n
  7. * Released under the MIT License. See LICENSE.txt for license information.
  8. *
  9. * @brief Decaf high-level functions.
  10. *
  11. * @warning This file was automatically generated in Python.
  12. * Please do not edit it.
  13. */
  14. #define _XOPEN_SOURCE 600 /* for posix_memalign */
  15. #include "word.h"
  16. #include "field.h"
  17. #include <decaf.h>
  18. #include <decaf/ed255.h>
  19. /* Template stuff */
  20. #define API_NS(_id) decaf_255_##_id
  21. #define SCALAR_BITS DECAF_255_SCALAR_BITS
  22. #define SCALAR_SER_BYTES DECAF_255_SCALAR_BYTES
  23. #define SCALAR_LIMBS DECAF_255_SCALAR_LIMBS
  24. #define scalar_t API_NS(scalar_t)
  25. #define point_t API_NS(point_t)
  26. #define precomputed_s API_NS(precomputed_s)
  27. #define IMAGINE_TWIST 1
  28. #define COFACTOR 8
  29. /* Comb config: number of combs, n, t, s. */
  30. #define COMBS_N 3
  31. #define COMBS_T 5
  32. #define COMBS_S 17
  33. #define DECAF_WINDOW_BITS 4
  34. #define DECAF_WNAF_FIXED_TABLE_BITS 5
  35. #define DECAF_WNAF_VAR_TABLE_BITS 3
  36. #define EDDSA_USE_SIGMA_ISOGENY 1
  37. static const int EDWARDS_D = -121665;
  38. static const scalar_t point_scalarmul_adjustment = {{{
  39. SC_LIMB(0xd6ec31748d98951c), SC_LIMB(0xc6ef5bf4737dcf70), SC_LIMB(0xfffffffffffffffe), SC_LIMB(0x0fffffffffffffff)
  40. }}}, precomputed_scalarmul_adjustment = {{{
  41. SC_LIMB(0x977f4a4775473484), SC_LIMB(0x6de72ae98b3ab623), SC_LIMB(0xffffffffffffffff), SC_LIMB(0x0fffffffffffffff)
  42. }}};
  43. const uint8_t decaf_x25519_base_point[DECAF_X25519_PUBLIC_BYTES] = { 0x09 };
  44. static const gf RISTRETTO_ISOMAGIC = {{{
  45. 0x0fdaa805d40ea, 0x2eb482e57d339, 0x007610274bc58, 0x6510b613dc8ff, 0x786c8905cfaff
  46. }}};
  47. #if COFACTOR==8 || EDDSA_USE_SIGMA_ISOGENY
  48. static const gf SQRT_ONE_MINUS_D = {FIELD_LITERAL(
  49. 0x6db8831bbddec, 0x38d7b56c9c165, 0x016b221394bdc, 0x7540f7816214a, 0x0a0d85b4032b1
  50. )};
  51. #endif
  52. #if IMAGINE_TWIST
  53. #define TWISTED_D (-(EDWARDS_D))
  54. #else
  55. #define TWISTED_D ((EDWARDS_D)-1)
  56. #endif
  57. #if TWISTED_D < 0
  58. #define EFF_D (-(TWISTED_D))
  59. #define NEG_D 1
  60. #else
  61. #define EFF_D TWISTED_D
  62. #define NEG_D 0
  63. #endif
  64. /* End of template stuff */
  65. /* Sanity */
  66. #if (COFACTOR == 8) && !IMAGINE_TWIST && !UNSAFE_CURVE_HAS_POINTS_AT_INFINITY
  67. /* FUTURE MAGIC: Curve41417 doesn't have these properties. */
  68. #error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
  69. /* OK, but why?
  70. * Two reasons: #1: There are bugs when COFACTOR == && IMAGINE_TWIST
  71. # #2:
  72. */
  73. #endif
  74. #if IMAGINE_TWIST && (P_MOD_8 != 5)
  75. #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
  76. #endif
  77. #if (COFACTOR != 8) && (COFACTOR != 4)
  78. #error "COFACTOR must be 4 or 8"
  79. #endif
  80. #if IMAGINE_TWIST
  81. extern const gf SQRT_MINUS_ONE;
  82. #endif
  83. #define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
  84. extern const point_t API_NS(point_base);
  85. /* Projective Niels coordinates */
  86. typedef struct { gf a, b, c; } niels_s, niels_t[1];
  87. typedef struct { niels_t n; gf z; } VECTOR_ALIGNED pniels_s, pniels_t[1];
  88. /* Precomputed base */
  89. struct precomputed_s { niels_t table [COMBS_N<<(COMBS_T-1)]; };
  90. extern const gf API_NS(precomputed_base_as_fe)[];
  91. const precomputed_s *API_NS(precomputed_base) =
  92. (const precomputed_s *) &API_NS(precomputed_base_as_fe);
  93. const size_t API_NS(sizeof_precomputed_s) = sizeof(precomputed_s);
  94. const size_t API_NS(alignof_precomputed_s) = sizeof(big_register_t);
  95. /** Inverse. */
  96. static void
  97. gf_invert(gf y, const gf x, int assert_nonzero) {
  98. gf t1, t2;
  99. gf_sqr(t1, x); // o^2
  100. mask_t ret = gf_isr(t2, t1); // +-1/sqrt(o^2) = +-1/o
  101. (void)ret;
  102. if (assert_nonzero) assert(ret);
  103. gf_sqr(t1, t2);
  104. gf_mul(t2, t1, x); // not direct to y in case of alias.
  105. gf_copy(y, t2);
  106. }
  107. /** identity = (0,1) */
  108. const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
  109. /* Predeclare because not static: called by elligator */
  110. void API_NS(deisogenize) (
  111. gf_s *__restrict__ s,
  112. gf_s *__restrict__ inv_el_sum,
  113. gf_s *__restrict__ inv_el_m1,
  114. const point_t p,
  115. mask_t toggle_s,
  116. mask_t toggle_altx,
  117. mask_t toggle_rotation
  118. );
  119. void API_NS(deisogenize) (
  120. gf_s *__restrict__ s,
  121. gf_s *__restrict__ inv_el_sum,
  122. gf_s *__restrict__ inv_el_m1,
  123. const point_t p,
  124. mask_t toggle_s,
  125. mask_t toggle_altx,
  126. mask_t toggle_rotation
  127. ) {
  128. #if COFACTOR == 4 && !IMAGINE_TWIST
  129. (void)toggle_rotation; /* Only applies to cofactor 8 */
  130. gf t1;
  131. gf_s *t2 = s, *t3=inv_el_sum, *t4=inv_el_m1;
  132. gf_add(t1,p->x,p->t);
  133. gf_sub(t2,p->x,p->t);
  134. gf_mul(t3,t1,t2); /* t3 = num */
  135. gf_sqr(t2,p->x);
  136. gf_mul(t1,t2,t3);
  137. gf_mulw(t2,t1,-1-TWISTED_D); /* -x^2 * (a-d) * num */
  138. gf_isr(t1,t2); /* t1 = isr */
  139. gf_mul(t2,t1,t3); /* t2 = ratio */
  140. gf_mul(t4,t2,RISTRETTO_ISOMAGIC);
  141. mask_t negx = gf_lobit(t4) ^ toggle_altx;
  142. gf_cond_neg(t2, negx);
  143. gf_mul(t3,t2,p->z);
  144. gf_sub(t3,t3,p->t);
  145. gf_mul(t2,t3,p->x);
  146. gf_mulw(t4,t2,-1-TWISTED_D);
  147. gf_mul(s,t4,t1);
  148. mask_t lobs = gf_lobit(s);
  149. gf_cond_neg(s,lobs);
  150. gf_copy(inv_el_m1,p->x);
  151. gf_cond_neg(inv_el_m1,~lobs^negx^toggle_s);
  152. gf_add(inv_el_m1,inv_el_m1,p->t);
  153. #elif COFACTOR == 8 && IMAGINE_TWIST
  154. /* More complicated because of rotation */
  155. gf t1,t2,t3,t4,t5;
  156. gf_add(t1,p->z,p->y);
  157. gf_sub(t2,p->z,p->y);
  158. gf_mul(t3,t1,t2); /* t3 = num */
  159. gf_mul(t2,p->x,p->y); /* t2 = den */
  160. gf_sqr(t1,t2);
  161. gf_mul(t4,t1,t3);
  162. gf_mulw(t1,t4,-1-TWISTED_D);
  163. gf_isr(t4,t1); /* isqrt(num*(a-d)*den^2) */
  164. gf_mul(t1,t2,t4);
  165. gf_mul(t2,t1,RISTRETTO_ISOMAGIC); /* t2 = "iden" in ristretto.sage */
  166. gf_mul(t1,t3,t4); /* t1 = "inum" in ristretto.sage */
  167. /* Calculate altxy = iden*inum*i*t^2*(d-a) */
  168. gf_mul(t3,t1,t2);
  169. gf_mul_qnr(t4,t3);
  170. gf_mul(t3,t4,p->t);
  171. gf_mul(t4,t3,p->t);
  172. gf_mulw(t3,t4,TWISTED_D+1); /* iden*inum*i*t^2*(d-1) */
  173. mask_t rotate = toggle_rotation ^ gf_lobit(t3);
  174. /* Rotate if altxy is negative */
  175. gf_cond_swap(t1,t2,rotate);
  176. gf_mul_qnr(t4,p->x);
  177. gf_cond_sel(t4,p->y,t4,rotate); /* t4 = "fac" = ix if rotate, else y */
  178. gf_mul_qnr(t5,RISTRETTO_ISOMAGIC); /* t5 = imi */
  179. gf_mul(t3,t5,t2); /* iden * imi */
  180. gf_mul(t2,t5,t1);
  181. gf_mul(t5,t2,p->t); /* "altx" = iden*imi*t */
  182. mask_t negx = gf_lobit(t5) ^ toggle_altx;
  183. gf_cond_neg(t1,negx^rotate);
  184. gf_mul(t2,t1,p->z);
  185. gf_add(t2,t2,ONE);
  186. gf_mul(inv_el_sum,t2,t4);
  187. gf_mul(s,inv_el_sum,t3);
  188. mask_t negs = gf_lobit(s);
  189. gf_cond_neg(s,negs);
  190. mask_t negz = ~negs ^ toggle_s ^ negx;
  191. gf_copy(inv_el_m1,p->z);
  192. gf_cond_neg(inv_el_m1,negz);
  193. gf_sub(inv_el_m1,inv_el_m1,t4);
  194. #else
  195. #error "Cofactor must be 4 (with no IMAGINE_TWIST) or 8 (with IMAGINE_TWIST)"
  196. #endif
  197. }
  198. void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
  199. gf s,ie1,ie2;
  200. API_NS(deisogenize)(s,ie1,ie2,p,0,0,0);
  201. gf_serialize(ser,s,1);
  202. }
  203. decaf_error_t API_NS(point_decode) (
  204. point_t p,
  205. const unsigned char ser[SER_BYTES],
  206. decaf_bool_t allow_identity
  207. ) {
  208. gf s, s2, num, tmp;
  209. gf_s *tmp2=s2, *ynum=p->z, *isr=p->x, *den=p->t;
  210. mask_t succ = gf_deserialize(s, ser, 1);
  211. succ &= bool_to_mask(allow_identity) | ~gf_eq(s, ZERO);
  212. succ &= ~gf_lobit(s);
  213. gf_sqr(s2,s); /* s^2 = -as^2 */
  214. #if IMAGINE_TWIST
  215. gf_sub(s2,ZERO,s2); /* -as^2 */
  216. #endif
  217. gf_sub(den,ONE,s2); /* 1+as^2 */
  218. gf_add(ynum,ONE,s2); /* 1-as^2 */
  219. gf_mulw(num,s2,-4*TWISTED_D);
  220. gf_sqr(tmp,den); /* tmp = den^2 */
  221. gf_add(num,tmp,num); /* num = den^2 - 4*d*s^2 */
  222. gf_mul(tmp2,num,tmp); /* tmp2 = num*den^2 */
  223. succ &= gf_isr(isr,tmp2); /* isr = 1/sqrt(num*den^2) */
  224. gf_mul(tmp,isr,den); /* isr*den */
  225. gf_mul(p->y,tmp,ynum); /* isr*den*(1-as^2) */
  226. gf_mul(tmp2,tmp,s); /* s*isr*den */
  227. gf_add(tmp2,tmp2,tmp2); /* 2*s*isr*den */
  228. gf_mul(tmp,tmp2,isr); /* 2*s*isr^2*den */
  229. gf_mul(p->x,tmp,num); /* 2*s*isr^2*den*num */
  230. gf_mul(tmp,tmp2,RISTRETTO_ISOMAGIC); /* 2*s*isr*den*magic */
  231. gf_cond_neg(p->x,gf_lobit(tmp)); /* flip x */
  232. #if COFACTOR==8
  233. /* Additionally check y != 0 and x*y*isomagic nonegative */
  234. succ &= ~gf_eq(p->y,ZERO);
  235. gf_mul(tmp,p->x,p->y);
  236. gf_mul(tmp2,tmp,RISTRETTO_ISOMAGIC);
  237. succ &= ~gf_lobit(tmp2);
  238. #endif
  239. #if IMAGINE_TWIST
  240. gf_copy(tmp,p->x);
  241. gf_mul_qnr(p->x,tmp);
  242. #endif
  243. /* Fill in z and t */
  244. gf_copy(p->z,ONE);
  245. gf_mul(p->t,p->x,p->y);
  246. assert(API_NS(point_valid)(p) | ~succ);
  247. return decaf_succeed_if(mask_to_bool(succ));
  248. }
  249. void API_NS(point_sub) (
  250. point_t p,
  251. const point_t q,
  252. const point_t r
  253. ) {
  254. gf a, b, c, d;
  255. gf_sub_nr ( b, q->y, q->x ); /* 3+e */
  256. gf_sub_nr ( d, r->y, r->x ); /* 3+e */
  257. gf_add_nr ( c, r->y, r->x ); /* 2+e */
  258. gf_mul ( a, c, b );
  259. gf_add_nr ( b, q->y, q->x ); /* 2+e */
  260. gf_mul ( p->y, d, b );
  261. gf_mul ( b, r->t, q->t );
  262. gf_mulw ( p->x, b, 2*EFF_D );
  263. gf_add_nr ( b, a, p->y ); /* 2+e */
  264. gf_sub_nr ( c, p->y, a ); /* 3+e */
  265. gf_mul ( a, q->z, r->z );
  266. gf_add_nr ( a, a, a ); /* 2+e */
  267. if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
  268. #if NEG_D
  269. gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
  270. gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
  271. #else
  272. gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
  273. gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
  274. #endif
  275. gf_mul ( p->z, a, p->y );
  276. gf_mul ( p->x, p->y, c );
  277. gf_mul ( p->y, a, b );
  278. gf_mul ( p->t, b, c );
  279. }
  280. void API_NS(point_add) (
  281. point_t p,
  282. const point_t q,
  283. const point_t r
  284. ) {
  285. gf a, b, c, d;
  286. gf_sub_nr ( b, q->y, q->x ); /* 3+e */
  287. gf_sub_nr ( c, r->y, r->x ); /* 3+e */
  288. gf_add_nr ( d, r->y, r->x ); /* 2+e */
  289. gf_mul ( a, c, b );
  290. gf_add_nr ( b, q->y, q->x ); /* 2+e */
  291. gf_mul ( p->y, d, b );
  292. gf_mul ( b, r->t, q->t );
  293. gf_mulw ( p->x, b, 2*EFF_D );
  294. gf_add_nr ( b, a, p->y ); /* 2+e */
  295. gf_sub_nr ( c, p->y, a ); /* 3+e */
  296. gf_mul ( a, q->z, r->z );
  297. gf_add_nr ( a, a, a ); /* 2+e */
  298. if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
  299. #if NEG_D
  300. gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
  301. gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
  302. #else
  303. gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
  304. gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
  305. #endif
  306. gf_mul ( p->z, a, p->y );
  307. gf_mul ( p->x, p->y, c );
  308. gf_mul ( p->y, a, b );
  309. gf_mul ( p->t, b, c );
  310. }
  311. static DECAF_NOINLINE void
  312. point_double_internal (
  313. point_t p,
  314. const point_t q,
  315. int before_double
  316. ) {
  317. gf a, b, c, d;
  318. gf_sqr ( c, q->x );
  319. gf_sqr ( a, q->y );
  320. gf_add_nr ( d, c, a ); /* 2+e */
  321. gf_add_nr ( p->t, q->y, q->x ); /* 2+e */
  322. gf_sqr ( b, p->t );
  323. gf_subx_nr ( b, b, d, 3 ); /* 4+e */
  324. gf_sub_nr ( p->t, a, c ); /* 3+e */
  325. gf_sqr ( p->x, q->z );
  326. gf_add_nr ( p->z, p->x, p->x ); /* 2+e */
  327. gf_subx_nr ( a, p->z, p->t, 4 ); /* 6+e */
  328. if (GF_HEADROOM == 5) gf_weak_reduce(a); /* or 1+e */
  329. gf_mul ( p->x, a, b );
  330. gf_mul ( p->z, p->t, a );
  331. gf_mul ( p->y, p->t, d );
  332. if (!before_double) gf_mul ( p->t, b, d );
  333. }
  334. void API_NS(point_double)(point_t p, const point_t q) {
  335. point_double_internal(p,q,0);
  336. }
  337. void API_NS(point_negate) (
  338. point_t nega,
  339. const point_t a
  340. ) {
  341. gf_sub(nega->x, ZERO, a->x);
  342. gf_copy(nega->y, a->y);
  343. gf_copy(nega->z, a->z);
  344. gf_sub(nega->t, ZERO, a->t);
  345. }
  346. /* Operations on [p]niels */
  347. static DECAF_INLINE void
  348. cond_neg_niels (
  349. niels_t n,
  350. mask_t neg
  351. ) {
  352. gf_cond_swap(n->a, n->b, neg);
  353. gf_cond_neg(n->c, neg);
  354. }
  355. static DECAF_NOINLINE void pt_to_pniels (
  356. pniels_t b,
  357. const point_t a
  358. ) {
  359. gf_sub ( b->n->a, a->y, a->x );
  360. gf_add ( b->n->b, a->x, a->y );
  361. gf_mulw ( b->n->c, a->t, 2*TWISTED_D );
  362. gf_add ( b->z, a->z, a->z );
  363. }
  364. static DECAF_NOINLINE void pniels_to_pt (
  365. point_t e,
  366. const pniels_t d
  367. ) {
  368. gf eu;
  369. gf_add ( eu, d->n->b, d->n->a );
  370. gf_sub ( e->y, d->n->b, d->n->a );
  371. gf_mul ( e->t, e->y, eu);
  372. gf_mul ( e->x, d->z, e->y );
  373. gf_mul ( e->y, d->z, eu );
  374. gf_sqr ( e->z, d->z );
  375. }
  376. static DECAF_NOINLINE void
  377. niels_to_pt (
  378. point_t e,
  379. const niels_t n
  380. ) {
  381. gf_add ( e->y, n->b, n->a );
  382. gf_sub ( e->x, n->b, n->a );
  383. gf_mul ( e->t, e->y, e->x );
  384. gf_copy ( e->z, ONE );
  385. }
  386. static DECAF_NOINLINE void
  387. add_niels_to_pt (
  388. point_t d,
  389. const niels_t e,
  390. int before_double
  391. ) {
  392. gf a, b, c;
  393. gf_sub_nr ( b, d->y, d->x ); /* 3+e */
  394. gf_mul ( a, e->a, b );
  395. gf_add_nr ( b, d->x, d->y ); /* 2+e */
  396. gf_mul ( d->y, e->b, b );
  397. gf_mul ( d->x, e->c, d->t );
  398. gf_add_nr ( c, a, d->y ); /* 2+e */
  399. gf_sub_nr ( b, d->y, a ); /* 3+e */
  400. gf_sub_nr ( d->y, d->z, d->x ); /* 3+e */
  401. gf_add_nr ( a, d->x, d->z ); /* 2+e */
  402. gf_mul ( d->z, a, d->y );
  403. gf_mul ( d->x, d->y, b );
  404. gf_mul ( d->y, a, c );
  405. if (!before_double) gf_mul ( d->t, b, c );
  406. }
  407. static DECAF_NOINLINE void
  408. sub_niels_from_pt (
  409. point_t d,
  410. const niels_t e,
  411. int before_double
  412. ) {
  413. gf a, b, c;
  414. gf_sub_nr ( b, d->y, d->x ); /* 3+e */
  415. gf_mul ( a, e->b, b );
  416. gf_add_nr ( b, d->x, d->y ); /* 2+e */
  417. gf_mul ( d->y, e->a, b );
  418. gf_mul ( d->x, e->c, d->t );
  419. gf_add_nr ( c, a, d->y ); /* 2+e */
  420. gf_sub_nr ( b, d->y, a ); /* 3+e */
  421. gf_add_nr ( d->y, d->z, d->x ); /* 2+e */
  422. gf_sub_nr ( a, d->z, d->x ); /* 3+e */
  423. gf_mul ( d->z, a, d->y );
  424. gf_mul ( d->x, d->y, b );
  425. gf_mul ( d->y, a, c );
  426. if (!before_double) gf_mul ( d->t, b, c );
  427. }
  428. static void
  429. add_pniels_to_pt (
  430. point_t p,
  431. const pniels_t pn,
  432. int before_double
  433. ) {
  434. gf L0;
  435. gf_mul ( L0, p->z, pn->z );
  436. gf_copy ( p->z, L0 );
  437. add_niels_to_pt( p, pn->n, before_double );
  438. }
  439. static void
  440. sub_pniels_from_pt (
  441. point_t p,
  442. const pniels_t pn,
  443. int before_double
  444. ) {
  445. gf L0;
  446. gf_mul ( L0, p->z, pn->z );
  447. gf_copy ( p->z, L0 );
  448. sub_niels_from_pt( p, pn->n, before_double );
  449. }
  450. static DECAF_NOINLINE void
  451. prepare_fixed_window(
  452. pniels_t *multiples,
  453. const point_t b,
  454. int ntable
  455. ) {
  456. point_t tmp;
  457. pniels_t pn;
  458. int i;
  459. point_double_internal(tmp, b, 0);
  460. pt_to_pniels(pn, tmp);
  461. pt_to_pniels(multiples[0], b);
  462. API_NS(point_copy)(tmp, b);
  463. for (i=1; i<ntable; i++) {
  464. add_pniels_to_pt(tmp, pn, 0);
  465. pt_to_pniels(multiples[i], tmp);
  466. }
  467. decaf_bzero(pn,sizeof(pn));
  468. decaf_bzero(tmp,sizeof(tmp));
  469. }
  470. void API_NS(point_scalarmul) (
  471. point_t a,
  472. const point_t b,
  473. const scalar_t scalar
  474. ) {
  475. const int WINDOW = DECAF_WINDOW_BITS,
  476. WINDOW_MASK = (1<<WINDOW)-1,
  477. WINDOW_T_MASK = WINDOW_MASK >> 1,
  478. NTABLE = 1<<(WINDOW-1);
  479. scalar_t scalar1x;
  480. API_NS(scalar_add)(scalar1x, scalar, point_scalarmul_adjustment);
  481. API_NS(scalar_halve)(scalar1x,scalar1x);
  482. /* Set up a precomputed table with odd multiples of b. */
  483. pniels_t pn, multiples[NTABLE];
  484. point_t tmp;
  485. prepare_fixed_window(multiples, b, NTABLE);
  486. /* Initialize. */
  487. int i,j,first=1;
  488. i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
  489. for (; i>=0; i-=WINDOW) {
  490. /* Fetch another block of bits */
  491. word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS);
  492. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  493. bits ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  494. }
  495. bits &= WINDOW_MASK;
  496. mask_t inv = (bits>>(WINDOW-1))-1;
  497. bits ^= inv;
  498. /* Add in from table. Compute t only on last iteration. */
  499. constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
  500. cond_neg_niels(pn->n, inv);
  501. if (first) {
  502. pniels_to_pt(tmp, pn);
  503. first = 0;
  504. } else {
  505. /* Using Hisil et al's lookahead method instead of extensible here
  506. * for no particular reason. Double WINDOW times, but only compute t on
  507. * the last one.
  508. */
  509. for (j=0; j<WINDOW-1; j++)
  510. point_double_internal(tmp, tmp, -1);
  511. point_double_internal(tmp, tmp, 0);
  512. add_pniels_to_pt(tmp, pn, i ? -1 : 0);
  513. }
  514. }
  515. /* Write out the answer */
  516. API_NS(point_copy)(a,tmp);
  517. decaf_bzero(scalar1x,sizeof(scalar1x));
  518. decaf_bzero(pn,sizeof(pn));
  519. decaf_bzero(multiples,sizeof(multiples));
  520. decaf_bzero(tmp,sizeof(tmp));
  521. }
  522. void API_NS(point_double_scalarmul) (
  523. point_t a,
  524. const point_t b,
  525. const scalar_t scalarb,
  526. const point_t c,
  527. const scalar_t scalarc
  528. ) {
  529. const int WINDOW = DECAF_WINDOW_BITS,
  530. WINDOW_MASK = (1<<WINDOW)-1,
  531. WINDOW_T_MASK = WINDOW_MASK >> 1,
  532. NTABLE = 1<<(WINDOW-1);
  533. scalar_t scalar1x, scalar2x;
  534. API_NS(scalar_add)(scalar1x, scalarb, point_scalarmul_adjustment);
  535. API_NS(scalar_halve)(scalar1x,scalar1x);
  536. API_NS(scalar_add)(scalar2x, scalarc, point_scalarmul_adjustment);
  537. API_NS(scalar_halve)(scalar2x,scalar2x);
  538. /* Set up a precomputed table with odd multiples of b. */
  539. pniels_t pn, multiples1[NTABLE], multiples2[NTABLE];
  540. point_t tmp;
  541. prepare_fixed_window(multiples1, b, NTABLE);
  542. prepare_fixed_window(multiples2, c, NTABLE);
  543. /* Initialize. */
  544. int i,j,first=1;
  545. i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
  546. for (; i>=0; i-=WINDOW) {
  547. /* Fetch another block of bits */
  548. word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
  549. bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
  550. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  551. bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  552. bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  553. }
  554. bits1 &= WINDOW_MASK;
  555. bits2 &= WINDOW_MASK;
  556. mask_t inv1 = (bits1>>(WINDOW-1))-1;
  557. mask_t inv2 = (bits2>>(WINDOW-1))-1;
  558. bits1 ^= inv1;
  559. bits2 ^= inv2;
  560. /* Add in from table. Compute t only on last iteration. */
  561. constant_time_lookup(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK);
  562. cond_neg_niels(pn->n, inv1);
  563. if (first) {
  564. pniels_to_pt(tmp, pn);
  565. first = 0;
  566. } else {
  567. /* Using Hisil et al's lookahead method instead of extensible here
  568. * for no particular reason. Double WINDOW times, but only compute t on
  569. * the last one.
  570. */
  571. for (j=0; j<WINDOW-1; j++)
  572. point_double_internal(tmp, tmp, -1);
  573. point_double_internal(tmp, tmp, 0);
  574. add_pniels_to_pt(tmp, pn, 0);
  575. }
  576. constant_time_lookup(pn, multiples2, sizeof(pn), NTABLE, bits2 & WINDOW_T_MASK);
  577. cond_neg_niels(pn->n, inv2);
  578. add_pniels_to_pt(tmp, pn, i?-1:0);
  579. }
  580. /* Write out the answer */
  581. API_NS(point_copy)(a,tmp);
  582. decaf_bzero(scalar1x,sizeof(scalar1x));
  583. decaf_bzero(scalar2x,sizeof(scalar2x));
  584. decaf_bzero(pn,sizeof(pn));
  585. decaf_bzero(multiples1,sizeof(multiples1));
  586. decaf_bzero(multiples2,sizeof(multiples2));
  587. decaf_bzero(tmp,sizeof(tmp));
  588. }
  589. void API_NS(point_dual_scalarmul) (
  590. point_t a1,
  591. point_t a2,
  592. const point_t b,
  593. const scalar_t scalar1,
  594. const scalar_t scalar2
  595. ) {
  596. const int WINDOW = DECAF_WINDOW_BITS,
  597. WINDOW_MASK = (1<<WINDOW)-1,
  598. WINDOW_T_MASK = WINDOW_MASK >> 1,
  599. NTABLE = 1<<(WINDOW-1);
  600. scalar_t scalar1x, scalar2x;
  601. API_NS(scalar_add)(scalar1x, scalar1, point_scalarmul_adjustment);
  602. API_NS(scalar_halve)(scalar1x,scalar1x);
  603. API_NS(scalar_add)(scalar2x, scalar2, point_scalarmul_adjustment);
  604. API_NS(scalar_halve)(scalar2x,scalar2x);
  605. /* Set up a precomputed table with odd multiples of b. */
  606. point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp;
  607. pniels_t pn;
  608. API_NS(point_copy)(working, b);
  609. /* Initialize. */
  610. int i,j;
  611. for (i=0; i<NTABLE; i++) {
  612. API_NS(point_copy)(multiples1[i], API_NS(point_identity));
  613. API_NS(point_copy)(multiples2[i], API_NS(point_identity));
  614. }
  615. for (i=0; i<SCALAR_BITS; i+=WINDOW) {
  616. if (i) {
  617. for (j=0; j<WINDOW-1; j++)
  618. point_double_internal(working, working, -1);
  619. point_double_internal(working, working, 0);
  620. }
  621. /* Fetch another block of bits */
  622. word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
  623. bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
  624. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  625. bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  626. bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  627. }
  628. bits1 &= WINDOW_MASK;
  629. bits2 &= WINDOW_MASK;
  630. mask_t inv1 = (bits1>>(WINDOW-1))-1;
  631. mask_t inv2 = (bits2>>(WINDOW-1))-1;
  632. bits1 ^= inv1;
  633. bits2 ^= inv2;
  634. pt_to_pniels(pn, working);
  635. constant_time_lookup(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
  636. cond_neg_niels(pn->n, inv1);
  637. /* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */
  638. add_pniels_to_pt(tmp, pn, 0);
  639. constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
  640. constant_time_lookup(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
  641. cond_neg_niels(pn->n, inv1^inv2);
  642. /* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */
  643. add_pniels_to_pt(tmp, pn, 0);
  644. constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
  645. }
  646. if (NTABLE > 1) {
  647. API_NS(point_copy)(working, multiples1[NTABLE-1]);
  648. API_NS(point_copy)(tmp , multiples2[NTABLE-1]);
  649. for (i=NTABLE-1; i>1; i--) {
  650. API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]);
  651. API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]);
  652. API_NS(point_add)(working, working, multiples1[i-1]);
  653. API_NS(point_add)(tmp, tmp, multiples2[i-1]);
  654. }
  655. API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]);
  656. API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]);
  657. point_double_internal(working, working, 0);
  658. point_double_internal(tmp, tmp, 0);
  659. API_NS(point_add)(a1, working, multiples1[0]);
  660. API_NS(point_add)(a2, tmp, multiples2[0]);
  661. } else {
  662. API_NS(point_copy)(a1, multiples1[0]);
  663. API_NS(point_copy)(a2, multiples2[0]);
  664. }
  665. decaf_bzero(scalar1x,sizeof(scalar1x));
  666. decaf_bzero(scalar2x,sizeof(scalar2x));
  667. decaf_bzero(pn,sizeof(pn));
  668. decaf_bzero(multiples1,sizeof(multiples1));
  669. decaf_bzero(multiples2,sizeof(multiples2));
  670. decaf_bzero(tmp,sizeof(tmp));
  671. decaf_bzero(working,sizeof(working));
  672. }
  673. decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
  674. /* equality mod 2-torsion compares x/y */
  675. gf a, b;
  676. gf_mul ( a, p->y, q->x );
  677. gf_mul ( b, q->y, p->x );
  678. mask_t succ = gf_eq(a,b);
  679. #if (COFACTOR == 8) && IMAGINE_TWIST
  680. gf_mul ( a, p->y, q->y );
  681. gf_mul ( b, q->x, p->x );
  682. #if !(IMAGINE_TWIST)
  683. gf_sub ( a, ZERO, a );
  684. #else
  685. /* Interesting note: the 4tor would normally be rotation.
  686. * But because of the *i twist, it's actually
  687. * (x,y) <-> (iy,ix)
  688. */
  689. /* No code, just a comment. */
  690. #endif
  691. succ |= gf_eq(a,b);
  692. #endif
  693. return mask_to_bool(succ);
  694. }
  695. decaf_bool_t API_NS(point_valid) (
  696. const point_t p
  697. ) {
  698. gf a,b,c;
  699. gf_mul(a,p->x,p->y);
  700. gf_mul(b,p->z,p->t);
  701. mask_t out = gf_eq(a,b);
  702. gf_sqr(a,p->x);
  703. gf_sqr(b,p->y);
  704. gf_sub(a,b,a);
  705. gf_sqr(b,p->t);
  706. gf_mulw(c,b,TWISTED_D);
  707. gf_sqr(b,p->z);
  708. gf_add(b,b,c);
  709. out &= gf_eq(a,b);
  710. out &= ~gf_eq(p->z,ZERO);
  711. return mask_to_bool(out);
  712. }
  713. void API_NS(point_debugging_torque) (
  714. point_t q,
  715. const point_t p
  716. ) {
  717. #if COFACTOR == 8 && IMAGINE_TWIST
  718. gf tmp;
  719. gf_mul(tmp,p->x,SQRT_MINUS_ONE);
  720. gf_mul(q->x,p->y,SQRT_MINUS_ONE);
  721. gf_copy(q->y,tmp);
  722. gf_copy(q->z,p->z);
  723. gf_sub(q->t,ZERO,p->t);
  724. #else
  725. gf_sub(q->x,ZERO,p->x);
  726. gf_sub(q->y,ZERO,p->y);
  727. gf_copy(q->z,p->z);
  728. gf_copy(q->t,p->t);
  729. #endif
  730. }
  731. void API_NS(point_debugging_pscale) (
  732. point_t q,
  733. const point_t p,
  734. const uint8_t factor[SER_BYTES]
  735. ) {
  736. gf gfac,tmp;
  737. /* NB this means you'll never pscale by negative numbers for p521 */
  738. ignore_result(gf_deserialize(gfac,factor,0));
  739. gf_cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
  740. gf_mul(tmp,p->x,gfac);
  741. gf_copy(q->x,tmp);
  742. gf_mul(tmp,p->y,gfac);
  743. gf_copy(q->y,tmp);
  744. gf_mul(tmp,p->z,gfac);
  745. gf_copy(q->z,tmp);
  746. gf_mul(tmp,p->t,gfac);
  747. gf_copy(q->t,tmp);
  748. }
  749. static void gf_batch_invert (
  750. gf *__restrict__ out,
  751. const gf *in,
  752. unsigned int n
  753. ) {
  754. gf t1;
  755. assert(n>1);
  756. gf_copy(out[1], in[0]);
  757. int i;
  758. for (i=1; i<(int) (n-1); i++) {
  759. gf_mul(out[i+1], out[i], in[i]);
  760. }
  761. gf_mul(out[0], out[n-1], in[n-1]);
  762. gf_invert(out[0], out[0], 1);
  763. for (i=n-1; i>0; i--) {
  764. gf_mul(t1, out[i], out[0]);
  765. gf_copy(out[i], t1);
  766. gf_mul(t1, out[0], in[i]);
  767. gf_copy(out[0], t1);
  768. }
  769. }
  770. static void batch_normalize_niels (
  771. niels_t *table,
  772. const gf *zs,
  773. gf *__restrict__ zis,
  774. int n
  775. ) {
  776. int i;
  777. gf product;
  778. gf_batch_invert(zis, zs, n);
  779. for (i=0; i<n; i++) {
  780. gf_mul(product, table[i]->a, zis[i]);
  781. gf_strong_reduce(product);
  782. gf_copy(table[i]->a, product);
  783. gf_mul(product, table[i]->b, zis[i]);
  784. gf_strong_reduce(product);
  785. gf_copy(table[i]->b, product);
  786. gf_mul(product, table[i]->c, zis[i]);
  787. gf_strong_reduce(product);
  788. gf_copy(table[i]->c, product);
  789. }
  790. decaf_bzero(product,sizeof(product));
  791. }
  792. void API_NS(precompute) (
  793. precomputed_s *table,
  794. const point_t base
  795. ) {
  796. const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
  797. assert(n*t*s >= SCALAR_BITS);
  798. point_t working, start, doubles[t-1];
  799. API_NS(point_copy)(working, base);
  800. pniels_t pn_tmp;
  801. gf zs[n<<(t-1)], zis[n<<(t-1)];
  802. unsigned int i,j,k;
  803. /* Compute n tables */
  804. for (i=0; i<n; i++) {
  805. /* Doubling phase */
  806. for (j=0; j<t; j++) {
  807. if (j) API_NS(point_add)(start, start, working);
  808. else API_NS(point_copy)(start, working);
  809. if (j==t-1 && i==n-1) break;
  810. point_double_internal(working, working,0);
  811. if (j<t-1) API_NS(point_copy)(doubles[j], working);
  812. for (k=0; k<s-1; k++)
  813. point_double_internal(working, working, k<s-2);
  814. }
  815. /* Gray-code phase */
  816. for (j=0;; j++) {
  817. int gray = j ^ (j>>1);
  818. int idx = (((i+1)<<(t-1))-1) ^ gray;
  819. pt_to_pniels(pn_tmp, start);
  820. memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
  821. gf_copy(zs[idx], pn_tmp->z);
  822. if (j >= (1u<<(t-1)) - 1) break;
  823. int delta = (j+1) ^ ((j+1)>>1) ^ gray;
  824. for (k=0; delta>1; k++)
  825. delta >>=1;
  826. if (gray & (1<<k)) {
  827. API_NS(point_add)(start, start, doubles[k]);
  828. } else {
  829. API_NS(point_sub)(start, start, doubles[k]);
  830. }
  831. }
  832. }
  833. batch_normalize_niels(table->table,(const gf *)zs,zis,n<<(t-1));
  834. decaf_bzero(zs,sizeof(zs));
  835. decaf_bzero(zis,sizeof(zis));
  836. decaf_bzero(pn_tmp,sizeof(pn_tmp));
  837. decaf_bzero(working,sizeof(working));
  838. decaf_bzero(start,sizeof(start));
  839. decaf_bzero(doubles,sizeof(doubles));
  840. }
  841. static DECAF_INLINE void
  842. constant_time_lookup_niels (
  843. niels_s *__restrict__ ni,
  844. const niels_t *table,
  845. int nelts,
  846. int idx
  847. ) {
  848. constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
  849. }
  850. void API_NS(precomputed_scalarmul) (
  851. point_t out,
  852. const precomputed_s *table,
  853. const scalar_t scalar
  854. ) {
  855. int i;
  856. unsigned j,k;
  857. const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
  858. scalar_t scalar1x;
  859. API_NS(scalar_add)(scalar1x, scalar, precomputed_scalarmul_adjustment);
  860. API_NS(scalar_halve)(scalar1x,scalar1x);
  861. niels_t ni;
  862. for (i=s-1; i>=0; i--) {
  863. if (i != (int)s-1) point_double_internal(out,out,0);
  864. for (j=0; j<n; j++) {
  865. int tab = 0;
  866. for (k=0; k<t; k++) {
  867. unsigned int bit = i + s*(k + j*t);
  868. if (bit < SCALAR_BITS) {
  869. tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
  870. }
  871. }
  872. mask_t invert = (tab>>(t-1))-1;
  873. tab ^= invert;
  874. tab &= (1<<(t-1)) - 1;
  875. constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);
  876. cond_neg_niels(ni, invert);
  877. if ((i!=(int)s-1)||j) {
  878. add_niels_to_pt(out, ni, j==n-1 && i);
  879. } else {
  880. niels_to_pt(out, ni);
  881. }
  882. }
  883. }
  884. decaf_bzero(ni,sizeof(ni));
  885. decaf_bzero(scalar1x,sizeof(scalar1x));
  886. }
  887. void API_NS(point_cond_sel) (
  888. point_t out,
  889. const point_t a,
  890. const point_t b,
  891. decaf_bool_t pick_b
  892. ) {
  893. constant_time_select(out,a,b,sizeof(point_t),bool_to_mask(pick_b),0);
  894. }
  895. /* FUTURE: restore Curve25519 Montgomery ladder? */
  896. decaf_error_t API_NS(direct_scalarmul) (
  897. uint8_t scaled[SER_BYTES],
  898. const uint8_t base[SER_BYTES],
  899. const scalar_t scalar,
  900. decaf_bool_t allow_identity,
  901. decaf_bool_t short_circuit
  902. ) {
  903. point_t basep;
  904. decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity);
  905. if (short_circuit && succ != DECAF_SUCCESS) return succ;
  906. API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
  907. API_NS(point_scalarmul)(basep, basep, scalar);
  908. API_NS(point_encode)(scaled, basep);
  909. API_NS(point_destroy)(basep);
  910. return succ;
  911. }
  912. void API_NS(point_mul_by_cofactor_and_encode_like_eddsa) (
  913. uint8_t enc[DECAF_EDDSA_25519_PUBLIC_BYTES],
  914. const point_t p
  915. ) {
  916. /* The point is now on the twisted curve. Move it to untwisted. */
  917. gf x, y, z, t;
  918. point_t q;
  919. #if COFACTOR == 8
  920. API_NS(point_double)(q,p);
  921. #else
  922. API_NS(point_copy)(q,p);
  923. #endif
  924. #if EDDSA_USE_SIGMA_ISOGENY
  925. {
  926. /* Use 4-isogeny like ed25519:
  927. * 2*x*y*sqrt(d/a-1)/(ax^2 + y^2 - 2)
  928. * (y^2 - ax^2)/(y^2 + ax^2)
  929. * with a = -1, d = -EDWARDS_D:
  930. * -2xysqrt(EDWARDS_D-1)/(2z^2-y^2+x^2)
  931. * (y^2+x^2)/(y^2-x^2)
  932. */
  933. gf u;
  934. gf_sqr ( x, q->x ); // x^2
  935. gf_sqr ( t, q->y ); // y^2
  936. gf_add( u, x, t ); // x^2 + y^2
  937. gf_add( z, q->y, q->x );
  938. gf_sqr ( y, z);
  939. gf_sub ( y, u, y ); // -2xy
  940. gf_sub ( z, t, x ); // y^2 - x^2
  941. gf_sqr ( x, q->z );
  942. gf_add ( t, x, x);
  943. gf_sub ( t, t, z); // 2z^2 - y^2 + x^2
  944. gf_mul ( x, y, z ); // 2xy(y^2-x^2)
  945. gf_mul ( y, u, t ); // (x^2+y^2)(2z^2-y^2+x^2)
  946. gf_mul ( u, z, t );
  947. gf_copy( z, u );
  948. gf_mul ( u, x, RISTRETTO_ISOMAGIC );
  949. #if IMAGINE_TWIST
  950. gf_mul_qnr( x, u );
  951. #else
  952. #error "... probably wrong"
  953. gf_copy( x, u );
  954. #endif
  955. decaf_bzero(u,sizeof(u));
  956. }
  957. #elif IMAGINE_TWIST
  958. {
  959. API_NS(point_double)(q,q);
  960. API_NS(point_double)(q,q);
  961. gf_mul_qnr(x, q->x);
  962. gf_copy(y, q->y);
  963. gf_copy(z, q->z);
  964. }
  965. #else
  966. {
  967. /* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
  968. gf u;
  969. gf_sqr ( x, q->x );
  970. gf_sqr ( t, q->y );
  971. gf_add( u, x, t );
  972. gf_add( z, q->y, q->x );
  973. gf_sqr ( y, z);
  974. gf_sub ( y, y, u );
  975. gf_sub ( z, t, x );
  976. gf_sqr ( x, q->z );
  977. gf_add ( t, x, x);
  978. gf_sub ( t, t, z);
  979. gf_mul ( x, t, y );
  980. gf_mul ( y, z, u );
  981. gf_mul ( z, u, t );
  982. decaf_bzero(u,sizeof(u));
  983. }
  984. #endif
  985. /* Affinize */
  986. gf_invert(z,z,1);
  987. gf_mul(t,x,z);
  988. gf_mul(x,y,z);
  989. /* Encode */
  990. enc[DECAF_EDDSA_25519_PRIVATE_BYTES-1] = 0;
  991. gf_serialize(enc, x, 1);
  992. enc[DECAF_EDDSA_25519_PRIVATE_BYTES-1] |= 0x80 & gf_lobit(t);
  993. decaf_bzero(x,sizeof(x));
  994. decaf_bzero(y,sizeof(y));
  995. decaf_bzero(z,sizeof(z));
  996. decaf_bzero(t,sizeof(t));
  997. API_NS(point_destroy)(q);
  998. }
  999. decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
  1000. point_t p,
  1001. const uint8_t enc[DECAF_EDDSA_25519_PUBLIC_BYTES]
  1002. ) {
  1003. uint8_t enc2[DECAF_EDDSA_25519_PUBLIC_BYTES];
  1004. memcpy(enc2,enc,sizeof(enc2));
  1005. mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80);
  1006. enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80;
  1007. mask_t succ = gf_deserialize(p->y, enc2, 1);
  1008. #if 7 == 0
  1009. succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]);
  1010. #endif
  1011. gf_sqr(p->x,p->y);
  1012. gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
  1013. #if EDDSA_USE_SIGMA_ISOGENY
  1014. gf_mulw(p->t,p->z,EDWARDS_D); /* d-dy^2 */
  1015. gf_mulw(p->x,p->z,EDWARDS_D-1); /* num = (1-y^2)(d-1) */
  1016. gf_copy(p->z,p->x);
  1017. #else
  1018. gf_mulw(p->t,p->x,EDWARDS_D); /* dy^2 */
  1019. #endif
  1020. gf_sub(p->t,ONE,p->t); /* denom = 1-dy^2 or 1-d + dy^2 */
  1021. gf_mul(p->x,p->z,p->t);
  1022. succ &= gf_isr(p->t,p->x); /* 1/sqrt(num * denom) */
  1023. gf_mul(p->x,p->t,p->z); /* sqrt(num / denom) */
  1024. gf_cond_neg(p->x,gf_lobit(p->x)^low);
  1025. gf_copy(p->z,ONE);
  1026. #if EDDSA_USE_SIGMA_ISOGENY
  1027. {
  1028. /* Use 4-isogeny like ed25519:
  1029. * 2*x*y/sqrt(1-d/a)/(ax^2 + y^2 - 2)
  1030. * (y^2 - ax^2)/(y^2 + ax^2)
  1031. * (MAGIC: above formula may be off by a factor of -a
  1032. * or something somewhere; check it for other a)
  1033. *
  1034. * with a = -1, d = -EDWARDS_D:
  1035. * -2xy/sqrt(1-EDWARDS_D)/(2z^2-y^2+x^2)
  1036. * (y^2+x^2)/(y^2-x^2)
  1037. */
  1038. gf a, b, c, d;
  1039. gf_sqr ( c, p->x );
  1040. gf_sqr ( a, p->y );
  1041. gf_add ( d, c, a ); // x^2 + y^2
  1042. gf_add ( p->t, p->y, p->x );
  1043. gf_sqr ( b, p->t );
  1044. gf_sub ( b, b, d ); // 2xy
  1045. gf_sub ( p->t, a, c ); // y^2 - x^2
  1046. gf_sqr ( p->x, p->z );
  1047. gf_add ( p->z, p->x, p->x );
  1048. gf_sub ( a, p->z, p->t ); // 2z^2 - y^2 + x^2
  1049. gf_mul ( c, a, SQRT_ONE_MINUS_D );
  1050. gf_mul ( p->x, b, p->t); // (2xy)(y^2-x^2)
  1051. gf_mul ( p->z, p->t, c ); // (y^2-x^2)sd(2z^2 - y^2 + x^2)
  1052. gf_mul ( p->y, d, c ); // (y^2+x^2)sd(2z^2 - y^2 + x^2)
  1053. gf_mul ( p->t, d, b );
  1054. decaf_bzero(a,sizeof(a));
  1055. decaf_bzero(b,sizeof(b));
  1056. decaf_bzero(c,sizeof(c));
  1057. decaf_bzero(d,sizeof(d));
  1058. }
  1059. #elif IMAGINE_TWIST
  1060. {
  1061. gf_mul(p->t,p->x,SQRT_MINUS_ONE);
  1062. gf_copy(p->x,p->t);
  1063. gf_mul(p->t,p->x,p->y);
  1064. }
  1065. #else
  1066. {
  1067. /* 4-isogeny 2xy/(y^2-ax^2), (y^2+ax^2)/(2-y^2-ax^2) */
  1068. gf a, b, c, d;
  1069. gf_sqr ( c, p->x );
  1070. gf_sqr ( a, p->y );
  1071. gf_add ( d, c, a );
  1072. gf_add ( p->t, p->y, p->x );
  1073. gf_sqr ( b, p->t );
  1074. gf_sub ( b, b, d );
  1075. gf_sub ( p->t, a, c );
  1076. gf_sqr ( p->x, p->z );
  1077. gf_add ( p->z, p->x, p->x );
  1078. gf_sub ( a, p->z, d );
  1079. gf_mul ( p->x, a, b );
  1080. gf_mul ( p->z, p->t, a );
  1081. gf_mul ( p->y, p->t, d );
  1082. gf_mul ( p->t, b, d );
  1083. decaf_bzero(a,sizeof(a));
  1084. decaf_bzero(b,sizeof(b));
  1085. decaf_bzero(c,sizeof(c));
  1086. decaf_bzero(d,sizeof(d));
  1087. }
  1088. #endif
  1089. decaf_bzero(enc2,sizeof(enc2));
  1090. assert(API_NS(point_valid)(p) || ~succ);
  1091. return decaf_succeed_if(mask_to_bool(succ));
  1092. }
  1093. decaf_error_t decaf_x25519 (
  1094. uint8_t out[X_PUBLIC_BYTES],
  1095. const uint8_t base[X_PUBLIC_BYTES],
  1096. const uint8_t scalar[X_PRIVATE_BYTES]
  1097. ) {
  1098. gf x1, x2, z2, x3, z3, t1, t2;
  1099. ignore_result(gf_deserialize(x1,base,1));
  1100. gf_copy(x2,ONE);
  1101. gf_copy(z2,ZERO);
  1102. gf_copy(x3,x1);
  1103. gf_copy(z3,ONE);
  1104. int t;
  1105. mask_t swap = 0;
  1106. for (t = X_PRIVATE_BITS-1; t>=0; t--) {
  1107. uint8_t sb = scalar[t/8];
  1108. /* Scalar conditioning */
  1109. if (t/8==0) sb &= -(uint8_t)COFACTOR;
  1110. else if (t == X_PRIVATE_BITS-1) sb = -1;
  1111. mask_t k_t = (sb>>(t%8)) & 1;
  1112. k_t = -k_t; /* set to all 0s or all 1s */
  1113. swap ^= k_t;
  1114. gf_cond_swap(x2,x3,swap);
  1115. gf_cond_swap(z2,z3,swap);
  1116. swap = k_t;
  1117. gf_add_nr(t1,x2,z2); /* A = x2 + z2 */ /* 2+e */
  1118. gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */ /* 3+e */
  1119. gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */ /* 3+e */
  1120. gf_mul(x2,t1,z2); /* DA */
  1121. gf_add_nr(z2,z3,x3); /* C = x3 + z3 */ /* 2+e */
  1122. gf_mul(x3,t2,z2); /* CB */
  1123. gf_sub_nr(z3,x2,x3); /* DA-CB */ /* 3+e */
  1124. gf_sqr(z2,z3); /* (DA-CB)^2 */
  1125. gf_mul(z3,x1,z2); /* z3 = x1(DA-CB)^2 */
  1126. gf_add_nr(z2,x2,x3); /* (DA+CB) */ /* 2+e */
  1127. gf_sqr(x3,z2); /* x3 = (DA+CB)^2 */
  1128. gf_sqr(z2,t1); /* AA = A^2 */
  1129. gf_sqr(t1,t2); /* BB = B^2 */
  1130. gf_mul(x2,z2,t1); /* x2 = AA*BB */
  1131. gf_sub_nr(t2,z2,t1); /* E = AA-BB */ /* 3+e */
  1132. gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
  1133. gf_add_nr(t1,t1,z2); /* AA + a24*E */ /* 2+e */
  1134. gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
  1135. }
  1136. /* Finish */
  1137. gf_cond_swap(x2,x3,swap);
  1138. gf_cond_swap(z2,z3,swap);
  1139. gf_invert(z2,z2,0);
  1140. gf_mul(x1,x2,z2);
  1141. gf_serialize(out,x1,1);
  1142. mask_t nz = ~gf_eq(x1,ZERO);
  1143. decaf_bzero(x1,sizeof(x1));
  1144. decaf_bzero(x2,sizeof(x2));
  1145. decaf_bzero(z2,sizeof(z2));
  1146. decaf_bzero(x3,sizeof(x3));
  1147. decaf_bzero(z3,sizeof(z3));
  1148. decaf_bzero(t1,sizeof(t1));
  1149. decaf_bzero(t2,sizeof(t2));
  1150. return decaf_succeed_if(mask_to_bool(nz));
  1151. }
  1152. /* Thanks Johan Pascal */
  1153. void decaf_ed25519_convert_public_key_to_x25519 (
  1154. uint8_t x[DECAF_X25519_PUBLIC_BYTES],
  1155. const uint8_t ed[DECAF_EDDSA_25519_PUBLIC_BYTES]
  1156. ) {
  1157. gf y;
  1158. {
  1159. uint8_t enc2[DECAF_EDDSA_25519_PUBLIC_BYTES];
  1160. memcpy(enc2,ed,sizeof(enc2));
  1161. /* retrieve y from the ed compressed point */
  1162. enc2[DECAF_EDDSA_25519_PUBLIC_BYTES-1] &= ~0x80;
  1163. ignore_result(gf_deserialize(y, enc2, 0));
  1164. decaf_bzero(enc2,sizeof(enc2));
  1165. }
  1166. {
  1167. gf n,d;
  1168. #if EDDSA_USE_SIGMA_ISOGENY
  1169. /* u = (1+y)/(1-y)*/
  1170. gf_add(n, y, ONE); /* n = y+1 */
  1171. gf_sub(d, ONE, y); /* d = 1-y */
  1172. gf_invert(d, d, 0); /* d = 1/(1-y) */
  1173. gf_mul(y, n, d); /* u = (y+1)/(1-y) */
  1174. gf_serialize(x,y,1);
  1175. #else /* EDDSA_USE_SIGMA_ISOGENY */
  1176. /* u = y^2 * (1-dy^2) / (1-y^2) */
  1177. gf_sqr(n,y); /* y^2*/
  1178. gf_sub(d,ONE,n); /* 1-y^2*/
  1179. gf_invert(d,d,0); /* 1/(1-y^2)*/
  1180. gf_mul(y,n,d); /* y^2 / (1-y^2) */
  1181. gf_mulw(d,n,EDWARDS_D); /* dy^2*/
  1182. gf_sub(d, ONE, d); /* 1-dy^2*/
  1183. gf_mul(n, y, d); /* y^2 * (1-dy^2) / (1-y^2) */
  1184. gf_serialize(x,n,1);
  1185. #endif /* EDDSA_USE_SIGMA_ISOGENY */
  1186. decaf_bzero(y,sizeof(y));
  1187. decaf_bzero(n,sizeof(n));
  1188. decaf_bzero(d,sizeof(d));
  1189. }
  1190. }
  1191. void decaf_x25519_generate_key (
  1192. uint8_t out[X_PUBLIC_BYTES],
  1193. const uint8_t scalar[X_PRIVATE_BYTES]
  1194. ) {
  1195. decaf_x25519_derive_public_key(out,scalar);
  1196. }
  1197. void decaf_x25519_derive_public_key (
  1198. uint8_t out[X_PUBLIC_BYTES],
  1199. const uint8_t scalar[X_PRIVATE_BYTES]
  1200. ) {
  1201. /* Scalar conditioning */
  1202. uint8_t scalar2[X_PRIVATE_BYTES];
  1203. memcpy(scalar2,scalar,sizeof(scalar2));
  1204. scalar2[0] &= -(uint8_t)COFACTOR;
  1205. scalar2[X_PRIVATE_BYTES-1] &= ~(-1u<<((X_PRIVATE_BITS+7)%8));
  1206. scalar2[X_PRIVATE_BYTES-1] |= 1<<((X_PRIVATE_BITS+7)%8);
  1207. scalar_t the_scalar;
  1208. API_NS(scalar_decode_long)(the_scalar,scalar2,sizeof(scalar2));
  1209. /* We're gonna isogenize by 2, so divide by 2.
  1210. *
  1211. * Why by 2, even though it's a 4-isogeny?
  1212. *
  1213. * The isogeny map looks like
  1214. * Montgomery <-2-> Jacobi <-2-> Edwards
  1215. *
  1216. * Since the Jacobi base point is the PREimage of the iso to
  1217. * the Montgomery curve, and we're going
  1218. * Jacobi -> Edwards -> Jacobi -> Montgomery,
  1219. * we pick up only a factor of 2 over Jacobi -> Montgomery.
  1220. */
  1221. API_NS(scalar_halve)(the_scalar,the_scalar);
  1222. point_t p;
  1223. API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),the_scalar);
  1224. /* Isogenize to Montgomery curve.
  1225. *
  1226. * Why isn't this just a separate function, eg decaf_encode_like_x25519?
  1227. * Basically because in general it does the wrong thing if there is a cofactor
  1228. * component in the input. In this function though, there isn't a cofactor
  1229. * component in the input.
  1230. */
  1231. gf_invert(p->t,p->x,0); /* 1/x */
  1232. gf_mul(p->z,p->t,p->y); /* y/x */
  1233. gf_sqr(p->y,p->z); /* (y/x)^2 */
  1234. #if IMAGINE_TWIST
  1235. gf_sub(p->y,ZERO,p->y);
  1236. #endif
  1237. gf_serialize(out,p->y,1);
  1238. decaf_bzero(scalar2,sizeof(scalar2));
  1239. API_NS(scalar_destroy)(the_scalar);
  1240. API_NS(point_destroy)(p);
  1241. }
  1242. /**
  1243. * @cond internal
  1244. * Control for variable-time scalar multiply algorithms.
  1245. */
  1246. struct smvt_control {
  1247. int power, addend;
  1248. };
  1249. static int recode_wnaf (
  1250. struct smvt_control *control, /* [nbits/(table_bits+1) + 3] */
  1251. const scalar_t scalar,
  1252. unsigned int table_bits
  1253. ) {
  1254. unsigned int table_size = SCALAR_BITS/(table_bits+1) + 3;
  1255. int position = table_size - 1; /* at the end */
  1256. /* place the end marker */
  1257. control[position].power = -1;
  1258. control[position].addend = 0;
  1259. position--;
  1260. /* PERF: Could negate scalar if it's large. But then would need more cases
  1261. * in the actual code that uses it, all for an expected reduction of like 1/5 op.
  1262. * Probably not worth it.
  1263. */
  1264. uint64_t current = scalar->limb[0] & 0xFFFF;
  1265. uint32_t mask = (1<<(table_bits+1))-1;
  1266. unsigned int w;
  1267. const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2;
  1268. for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) {
  1269. if (w < (SCALAR_BITS-1)/16+1) {
  1270. /* Refill the 16 high bits of current */
  1271. current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16);
  1272. }
  1273. while (current & 0xFFFF) {
  1274. assert(position >= 0);
  1275. uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos;
  1276. int32_t delta = odd & mask;
  1277. if (odd & 1<<(table_bits+1)) delta -= (1<<(table_bits+1));
  1278. current -= delta << pos;
  1279. control[position].power = pos + 16*(w-1);
  1280. control[position].addend = delta;
  1281. position--;
  1282. }
  1283. current >>= 16;
  1284. }
  1285. assert(current==0);
  1286. position++;
  1287. unsigned int n = table_size - position;
  1288. unsigned int i;
  1289. for (i=0; i<n; i++) {
  1290. control[i] = control[i+position];
  1291. }
  1292. return n-1;
  1293. }
  1294. static void
  1295. prepare_wnaf_table(
  1296. pniels_t *output,
  1297. const point_t working,
  1298. unsigned int tbits
  1299. ) {
  1300. point_t tmp;
  1301. int i;
  1302. pt_to_pniels(output[0], working);
  1303. if (tbits == 0) return;
  1304. API_NS(point_double)(tmp,working);
  1305. pniels_t twop;
  1306. pt_to_pniels(twop, tmp);
  1307. add_pniels_to_pt(tmp, output[0],0);
  1308. pt_to_pniels(output[1], tmp);
  1309. for (i=2; i < 1<<tbits; i++) {
  1310. add_pniels_to_pt(tmp, twop,0);
  1311. pt_to_pniels(output[i], tmp);
  1312. }
  1313. API_NS(point_destroy)(tmp);
  1314. decaf_bzero(twop,sizeof(twop));
  1315. }
  1316. extern const gf API_NS(precomputed_wnaf_as_fe)[];
  1317. static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
  1318. const size_t API_NS(sizeof_precomputed_wnafs) __attribute((visibility("hidden")))
  1319. = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;
  1320. void API_NS(precompute_wnafs) (
  1321. niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
  1322. const point_t base
  1323. ) __attribute__ ((visibility ("hidden")));
  1324. void API_NS(precompute_wnafs) (
  1325. niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
  1326. const point_t base
  1327. ) {
  1328. pniels_t tmp[1<<DECAF_WNAF_FIXED_TABLE_BITS];
  1329. gf zs[1<<DECAF_WNAF_FIXED_TABLE_BITS], zis[1<<DECAF_WNAF_FIXED_TABLE_BITS];
  1330. int i;
  1331. prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
  1332. for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
  1333. memcpy(out[i], tmp[i]->n, sizeof(niels_t));
  1334. gf_copy(zs[i], tmp[i]->z);
  1335. }
  1336. batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
  1337. decaf_bzero(tmp,sizeof(tmp));
  1338. decaf_bzero(zs,sizeof(zs));
  1339. decaf_bzero(zis,sizeof(zis));
  1340. }
  1341. void API_NS(base_double_scalarmul_non_secret) (
  1342. point_t combo,
  1343. const scalar_t scalar1,
  1344. const point_t base2,
  1345. const scalar_t scalar2
  1346. ) {
  1347. const int table_bits_var = DECAF_WNAF_VAR_TABLE_BITS,
  1348. table_bits_pre = DECAF_WNAF_FIXED_TABLE_BITS;
  1349. struct smvt_control control_var[SCALAR_BITS/(table_bits_var+1)+3];
  1350. struct smvt_control control_pre[SCALAR_BITS/(table_bits_pre+1)+3];
  1351. int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
  1352. int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
  1353. pniels_t precmp_var[1<<table_bits_var];
  1354. prepare_wnaf_table(precmp_var, base2, table_bits_var);
  1355. int contp=0, contv=0, i = control_var[0].power;
  1356. if (i < 0) {
  1357. API_NS(point_copy)(combo, API_NS(point_identity));
  1358. return;
  1359. } else if (i > control_pre[0].power) {
  1360. pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
  1361. contv++;
  1362. } else if (i == control_pre[0].power && i >=0 ) {
  1363. pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
  1364. add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i);
  1365. contv++; contp++;
  1366. } else {
  1367. i = control_pre[0].power;
  1368. niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]);
  1369. contp++;
  1370. }
  1371. for (i--; i >= 0; i--) {
  1372. int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
  1373. point_double_internal(combo,combo,i && !(cv||cp));
  1374. if (cv) {
  1375. assert(control_var[contv].addend);
  1376. if (control_var[contv].addend > 0) {
  1377. add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
  1378. } else {
  1379. sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
  1380. }
  1381. contv++;
  1382. }
  1383. if (cp) {
  1384. assert(control_pre[contp].addend);
  1385. if (control_pre[contp].addend > 0) {
  1386. add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i);
  1387. } else {
  1388. sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i);
  1389. }
  1390. contp++;
  1391. }
  1392. }
  1393. /* This function is non-secret, but whatever this is cheap. */
  1394. decaf_bzero(control_var,sizeof(control_var));
  1395. decaf_bzero(control_pre,sizeof(control_pre));
  1396. decaf_bzero(precmp_var,sizeof(precmp_var));
  1397. assert(contv == ncb_var); (void)ncb_var;
  1398. assert(contp == ncb_pre); (void)ncb_pre;
  1399. }
  1400. void API_NS(point_destroy) (
  1401. point_t point
  1402. ) {
  1403. decaf_bzero(point, sizeof(point_t));
  1404. }
  1405. void API_NS(precomputed_destroy) (
  1406. precomputed_s *pre
  1407. ) {
  1408. decaf_bzero(pre, API_NS(sizeof_precomputed_s));
  1409. }