You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

729 lines
20 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #include <stdlib.h>
  5. #include "scalarmul.h"
  6. #include "string.h"
  7. #include "barrett_field.h"
  8. mask_t
  9. p448_montgomery_ladder(
  10. struct p448_t *out,
  11. const struct p448_t *in,
  12. const uint64_t *scalar,
  13. int nbits,
  14. int n_extra_doubles
  15. ) {
  16. struct montgomery_t mont;
  17. p448_sqr(&mont.z0,in);
  18. p448_copy(&mont.za,&mont.z0);
  19. p448_set_ui(&mont.xa,1);
  20. p448_set_ui(&mont.zd,0);
  21. p448_set_ui(&mont.xd,1);
  22. int i,j,n=(nbits-1)&63;
  23. mask_t pflip = 0;
  24. for (j=(nbits+63)/64-1; j>=0; j--) {
  25. uint64_t w = scalar[j];
  26. for (i=n; i>=0; i--) {
  27. mask_t flip = -((w>>i)&1);
  28. p448_cond_swap(&mont.xa,&mont.xd,flip^pflip);
  29. p448_cond_swap(&mont.za,&mont.zd,flip^pflip);
  30. p448_montgomery_step(&mont);
  31. pflip = flip;
  32. }
  33. n = 63;
  34. }
  35. p448_cond_swap(&mont.xa,&mont.xd,pflip);
  36. p448_cond_swap(&mont.za,&mont.zd,pflip);
  37. for (j=0; j<n_extra_doubles; j++) {
  38. p448_montgomery_step(&mont);
  39. }
  40. struct p448_t sign;
  41. p448_montgomery_serialize(&sign, out, &mont, in);
  42. p448_addw(&sign,1);
  43. return ~p448_is_zero(&sign);
  44. }
  45. static __inline__ void
  46. niels_cond_negate(
  47. struct tw_niels_t *n,
  48. mask_t doNegate
  49. ) {
  50. p448_cond_swap(&n->a, &n->b, doNegate);
  51. p448_cond_neg(&n->c, doNegate); /* TODO: bias amt? */
  52. }
  53. static __inline__ void
  54. pniels_cond_negate(
  55. struct tw_pniels_t *n,
  56. mask_t doNegate
  57. ) {
  58. niels_cond_negate(&n->n, doNegate);
  59. }
  60. void
  61. constant_time_lookup_pniels(
  62. struct tw_pniels_t *out,
  63. const struct tw_pniels_t *in,
  64. int nin,
  65. int idx
  66. ) {
  67. big_register_t big_one = 1, big_i = idx;
  68. big_register_t *o = (big_register_t *)out;
  69. const big_register_t *i = (const big_register_t *)in;
  70. int j;
  71. unsigned int k;
  72. memset(out, 0, sizeof(*out));
  73. for (j=0; j<nin; j++, big_i-=big_one) {
  74. big_register_t mask = br_is_zero(big_i);
  75. for (k=0; k<sizeof(*out)/sizeof(*o); k++) {
  76. o[k] |= mask & i[k+j*sizeof(*out)/sizeof(*o)];
  77. }
  78. }
  79. }
  80. static __inline__ void
  81. constant_time_lookup_niels(
  82. struct tw_niels_t *out,
  83. const struct tw_niels_t *in,
  84. int nin,
  85. int idx
  86. ) {
  87. big_register_t big_one = 1, big_i = idx;
  88. big_register_t *o = (big_register_t *)out;
  89. const big_register_t *i = (const big_register_t *)in;
  90. int j;
  91. unsigned int k;
  92. memset(out, 0, sizeof(*out));
  93. for (j=0; j<nin; j++, big_i-=big_one) {
  94. big_register_t mask = br_is_zero(big_i);
  95. for (k=0; k<sizeof(*out)/sizeof(*o); k++) {
  96. o[k] |= mask & i[k+j*sizeof(*out)/sizeof(*o)];
  97. }
  98. }
  99. }
  100. static void
  101. convert_to_signed_window_form(
  102. word_t *out,
  103. const word_t *scalar,
  104. const word_t *prepared_data,
  105. int nwords
  106. ) {
  107. mask_t mask = -(scalar[0]&1);
  108. word_t carry = add_nr_ext_packed(out, scalar, nwords, prepared_data, nwords, ~mask);
  109. carry += add_nr_ext_packed(out, out, nwords, prepared_data+nwords, nwords, mask);
  110. assert(!(out[0]&1));
  111. int i;
  112. for (i=0; i<nwords; i++) {
  113. out[i] >>= 1;
  114. if (i<nwords-1) {
  115. out[i] |= out[i+1]<<(WORD_BITS-1);
  116. } else {
  117. out[i] |= carry<<(WORD_BITS-1);
  118. }
  119. }
  120. }
  121. void
  122. edwards_scalar_multiply(
  123. struct tw_extensible_t *working,
  124. const uint64_t scalar[7]
  125. ) {
  126. const int nbits=448; /* HACK? */
  127. word_t prepared_data[14] = {
  128. 0x9595b847fdf73126ull,
  129. 0x9bb9b8a856af5200ull,
  130. 0xb3136e22f37d5c4full,
  131. 0x0000000189a19442ull,
  132. 0x0000000000000000ull,
  133. 0x0000000000000000ull,
  134. 0x4000000000000000ull,
  135. 0x721cf5b5529eec33ull,
  136. 0x7a4cf635c8e9c2abull,
  137. 0xeec492d944a725bfull,
  138. 0x000000020cd77058ull,
  139. 0x0000000000000000ull,
  140. 0x0000000000000000ull,
  141. 0x0000000000000000ull
  142. }; /* TODO: split off */
  143. uint64_t scalar2[7];
  144. convert_to_signed_window_form(scalar2,scalar,prepared_data,7);
  145. struct tw_extensible_t tabulator;
  146. copy_tw_extensible(&tabulator, working);
  147. p448_tw_extensible_double(&tabulator);
  148. struct tw_pniels_t pn, multiples[8];
  149. convert_tw_extensible_to_tw_pniels(&pn, &tabulator);
  150. convert_tw_extensible_to_tw_pniels(&multiples[0], working);
  151. int i;
  152. for (i=1; i<8; i++) {
  153. p448_tw_extensible_add_pniels(working, &pn);
  154. convert_tw_extensible_to_tw_pniels(&multiples[i], working);
  155. }
  156. i = nbits - 4;
  157. int bits = scalar2[i/64] >> (i%64) & 0xF,
  158. inv = (bits>>3)-1;
  159. bits ^= inv;
  160. constant_time_lookup_pniels(&pn, multiples, 8, bits&7);
  161. pniels_cond_negate(&pn, inv);
  162. convert_tw_pniels_to_tw_extensible(working, &pn);
  163. for (i-=4; i>=0; i-=4) {
  164. p448_tw_extensible_double(working);
  165. p448_tw_extensible_double(working);
  166. p448_tw_extensible_double(working);
  167. p448_tw_extensible_double(working);
  168. bits = scalar2[i/64] >> (i%64) & 0xF;
  169. inv = (bits>>3)-1;
  170. bits ^= inv;
  171. constant_time_lookup_pniels(&pn, multiples, 8, bits&7);
  172. pniels_cond_negate(&pn, inv);
  173. p448_tw_extensible_add_pniels(working, &pn);
  174. }
  175. }
  176. void
  177. edwards_comb(
  178. struct tw_extensible_t *working,
  179. const word_t scalar[7],
  180. const struct tw_niels_t *table,
  181. int n,
  182. int t,
  183. int s
  184. ) {
  185. word_t prepared_data[14] = {
  186. 0xebec9967f5d3f5c2ull,
  187. 0x0aa09b49b16c9a02ull,
  188. 0x7f6126aec172cd8eull,
  189. 0x00000007b027e54dull,
  190. 0x0000000000000000ull,
  191. 0x0000000000000000ull,
  192. 0x4000000000000000ull,
  193. 0xc873d6d54a7bb0cfull,
  194. 0xe933d8d723a70aadull,
  195. 0xbb124b65129c96fdull,
  196. 0x00000008335dc163ull,
  197. 0x0000000000000000ull,
  198. 0x0000000000000000ull,
  199. 0x0000000000000000ull
  200. }; /* TODO: split off. Above is for 450 bits */
  201. word_t scalar2[7];
  202. convert_to_signed_window_form(scalar2,scalar,prepared_data,7);
  203. /* const int n=3, t=5, s=30; */
  204. int i,j,k;
  205. struct tw_niels_t ni;
  206. for (i=0; i<s; i++) {
  207. if (i) p448_tw_extensible_double(working);
  208. for (j=0; j<n; j++) {
  209. int tab = 0;
  210. /*
  211. * PERF: This computation takes about 1.5µs on SBR, i.e. 2-3% of the
  212. * time of a keygen or sign op. Surely it is possible to speed it up.
  213. */
  214. for (k=0; k<t; k++) {
  215. int bit = (s-1-i) + k*s + j*(s*t);
  216. if (bit < 7*WORD_BITS) {
  217. tab |= (scalar2[bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k;
  218. }
  219. }
  220. mask_t invert = (tab>>(t-1))-1;
  221. tab ^= invert;
  222. tab &= (1<<(t-1)) - 1;
  223. constant_time_lookup_niels(&ni, table + (j<<(t-1)), 1<<(t-1), tab);
  224. niels_cond_negate(&ni, invert);
  225. if (i||j) {
  226. p448_tw_extensible_add_niels(working, &ni);
  227. } else {
  228. convert_tw_niels_to_tw_extensible(working, &ni);
  229. }
  230. }
  231. }
  232. }
  233. void
  234. simultaneous_invert_p448(
  235. struct p448_t *out,
  236. const struct p448_t *in,
  237. int n
  238. ) {
  239. if (!n) return;
  240. p448_copy(&out[1], &in[0]);
  241. int i;
  242. for (i=1; i<n-1; i++) {
  243. p448_mul(&out[i+1], &out[i], &in[i]);
  244. }
  245. p448_mul(&out[0], &out[n-1], &in[n-1]);
  246. struct p448_t tmp;
  247. p448_inverse(&tmp, &out[0]);
  248. p448_copy(&out[0], &tmp);
  249. /* at this point, out[0] = product(in[i]) ^ -1
  250. * out[i] = product(in[0]..in[i-1]) if i != 0
  251. */
  252. for (i=n-1; i>0; i--) {
  253. p448_mul(&tmp, &out[i], &out[0]);
  254. p448_copy(&out[i], &tmp);
  255. p448_mul(&tmp, &out[0], &in[i]);
  256. p448_copy(&out[0], &tmp);
  257. }
  258. }
  259. mask_t
  260. precompute_for_combs(
  261. struct tw_niels_t *out,
  262. const struct tw_extensible_t *const_base,
  263. int n,
  264. int t,
  265. int s
  266. ) {
  267. if (s < 1) return 0;
  268. struct tw_extensible_t working, start;
  269. copy_tw_extensible(&working, const_base);
  270. struct tw_pniels_t pn_tmp;
  271. struct tw_pniels_t *doubles = (struct tw_pniels_t *) malloc(sizeof(*doubles) * (t-1));
  272. struct p448_t *zs = (struct p448_t *) malloc(sizeof(*zs) * (n<<(t-1)));
  273. struct p448_t *zis = (struct p448_t *) malloc(sizeof(*zis) * (n<<(t-1)));
  274. if (!doubles || !zs || !zis) {
  275. free(doubles);
  276. free(zs);
  277. free(zis);
  278. return 0;
  279. }
  280. int i,j,k;
  281. for (i=0; i<n; i++) {
  282. /* doubling phase */
  283. for (j=0; j<t; j++) {
  284. if (j) {
  285. convert_tw_extensible_to_tw_pniels(&pn_tmp, &working);
  286. p448_tw_extensible_add_pniels(&start, &pn_tmp);
  287. } else {
  288. copy_tw_extensible(&start, &working);
  289. }
  290. if (j==t-1 && i==n-1) {
  291. break;
  292. }
  293. p448_tw_extensible_double(&working);
  294. if (j<t-1) {
  295. convert_tw_extensible_to_tw_pniels(&doubles[j], &working);
  296. }
  297. for (k=0; k<s-1; k++) {
  298. p448_tw_extensible_double(&working);
  299. }
  300. }
  301. /* Gray-code phase */
  302. for (j=0;; j++) {
  303. int gray = j ^ (j>>1);
  304. int idx = ((i+1)<<(t-1))-1 ^ gray;
  305. convert_tw_extensible_to_tw_pniels(&pn_tmp, &start);
  306. copy_tw_niels(&out[idx], &pn_tmp.n);
  307. p448_copy(&zs[idx], &pn_tmp.z);
  308. if (j >= (1<<(t-1)) - 1) break;
  309. int delta = (j+1) ^ ((j+1)>>1) ^ gray;
  310. for (k=0; delta>1; k++)
  311. delta >>=1;
  312. if (gray & (1<<k)) {
  313. /* start += doubles[k] */
  314. p448_tw_extensible_add_pniels(&start, &doubles[k]);
  315. } else {
  316. /* start -= doubles[k] */
  317. /* PERF: uncond negate */
  318. copy_tw_pniels(&pn_tmp, &doubles[k]);
  319. pniels_cond_negate(&pn_tmp, -1);
  320. p448_tw_extensible_add_pniels(&start, &pn_tmp);
  321. }
  322. }
  323. }
  324. simultaneous_invert_p448(zis, zs, n<<(t-1));
  325. p448_t product;
  326. for (i=0; i<n<<(t-1); i++) {
  327. p448_mul(&product, &out[i].a, &zis[i]);
  328. p448_strong_reduce(&product);
  329. p448_copy(&out[i].a, &product);
  330. p448_mul(&product, &out[i].b, &zis[i]);
  331. p448_strong_reduce(&product);
  332. p448_copy(&out[i].b, &product);
  333. p448_mul(&product, &out[i].c, &zis[i]);
  334. p448_strong_reduce(&product);
  335. p448_copy(&out[i].c, &product);
  336. }
  337. mask_t ret = ~p448_is_zero(&zis[0]);
  338. free(doubles);
  339. free(zs);
  340. free(zis);
  341. return ret;
  342. }
  343. mask_t
  344. precompute_for_wnaf(
  345. struct tw_niels_t *out,
  346. const struct tw_extensible_t *const_base,
  347. int tbits
  348. ) {
  349. int i;
  350. struct p448_t *zs = (struct p448_t *) malloc(sizeof(*zs)<<tbits);
  351. struct p448_t *zis = (struct p448_t *) malloc(sizeof(*zis)<<tbits);
  352. if (!zs || !zis) {
  353. free(zs);
  354. free(zis);
  355. return 0;
  356. }
  357. struct tw_extensible_t base;
  358. copy_tw_extensible(&base,const_base);
  359. struct tw_pniels_t twop, tmp;
  360. convert_tw_extensible_to_tw_pniels(&tmp, &base);
  361. p448_copy(&zs[0], &tmp.z);
  362. copy_tw_niels(&out[0], &tmp.n);
  363. if (tbits > 0) {
  364. p448_tw_extensible_double(&base);
  365. convert_tw_extensible_to_tw_pniels(&twop, &base);
  366. p448_tw_extensible_add_pniels(&base, &tmp);
  367. convert_tw_extensible_to_tw_pniels(&tmp, &base);
  368. p448_copy(&zs[1], &tmp.z);
  369. copy_tw_niels(&out[1], &tmp.n);
  370. for (i=2; i < 1<<tbits; i++) {
  371. p448_tw_extensible_add_pniels(&base, &twop);
  372. convert_tw_extensible_to_tw_pniels(&tmp, &base);
  373. p448_copy(&zs[i], &tmp.z);
  374. copy_tw_niels(&out[i], &tmp.n);
  375. }
  376. }
  377. simultaneous_invert_p448(zis, zs, 1<<tbits);
  378. p448_t product;
  379. for (i=0; i<1<<tbits; i++) {
  380. p448_mul(&product, &out[i].a, &zis[i]);
  381. p448_strong_reduce(&product);
  382. p448_copy(&out[i].a, &product);
  383. p448_mul(&product, &out[i].b, &zis[i]);
  384. p448_strong_reduce(&product);
  385. p448_copy(&out[i].b, &product);
  386. p448_mul(&product, &out[i].c, &zis[i]);
  387. p448_strong_reduce(&product);
  388. p448_copy(&out[i].c, &product);
  389. }
  390. free(zs);
  391. free(zis);
  392. return -1;
  393. }
  394. struct smvt_control {
  395. int power, addend;
  396. };
  397. static int
  398. recode_wnaf(
  399. struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */
  400. const word_t *scalar,
  401. int nbits,
  402. int tableBits)
  403. {
  404. int current = 0, position=0, i;
  405. /* PERF: negate scalar if it's large
  406. * PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one...
  407. */
  408. for (i=nbits-1; i >= -2 - tableBits; i--) {
  409. int bit = (i >= 0)
  410. ? (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1
  411. : 0;
  412. current = 2*current + bit;
  413. /*
  414. * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0
  415. * So current loses (tableBits+1) bits every time. It otherwise gains
  416. * 1 bit per iteration. The number of iterations is
  417. * (nbits + 2 + tableBits), and an additional control word is added at
  418. * the end. So the total number of control words is at most
  419. * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2.
  420. * There's also the stopper with power -1, for a total of +3.
  421. */
  422. if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) {
  423. int delta = (current + 1) >> 1;
  424. current = -(current & 1);
  425. int j;
  426. for (j=i; (delta & 1) == 0; j++) {
  427. delta >>= 1;
  428. }
  429. control[position].power = j+1;
  430. control[position].addend = delta;
  431. position++;
  432. assert(position <= nbits/(tableBits+1) + 2);
  433. }
  434. }
  435. control[position].power = -1;
  436. control[position].addend = 0;
  437. return position;
  438. }
  439. static void
  440. prepare_wnaf_table(
  441. struct tw_pniels_t *output,
  442. struct tw_extensible_t *working,
  443. int tbits
  444. ) {
  445. convert_tw_extensible_to_tw_pniels(&output[0], working);
  446. if (tbits == 0) return;
  447. p448_tw_extensible_double(working);
  448. struct tw_pniels_t twop;
  449. convert_tw_extensible_to_tw_pniels(&twop, working);
  450. p448_tw_extensible_add_pniels(working, &output[0]);
  451. convert_tw_extensible_to_tw_pniels(&output[1], working);
  452. for (int i=2; i < 1<<tbits; i++) {
  453. p448_tw_extensible_add_pniels(working, &twop);
  454. convert_tw_extensible_to_tw_pniels(&output[i], working);
  455. }
  456. }
  457. int
  458. edwards_scalar_multiply_vt(
  459. struct tw_extensible_t *working,
  460. const uint64_t scalar[7]
  461. ) {
  462. /* HACK: not 448? */
  463. const int nbits=448, table_bits = 3;
  464. struct smvt_control control[nbits/(table_bits+1)+3];
  465. int control_bits = recode_wnaf(control, scalar, nbits, table_bits);
  466. struct tw_pniels_t precmp[1<<table_bits];
  467. prepare_wnaf_table(precmp, working, table_bits);
  468. if (control_bits > 0) {
  469. assert(control[0].addend > 0);
  470. assert(control[0].power >= 0);
  471. convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]);
  472. } else {
  473. set_identity_tw_extensible(working);
  474. return control_bits;
  475. }
  476. int conti = 1, i;
  477. struct tw_pniels_t neg;
  478. for (i = control[0].power - 1; i >= 0; i--) {
  479. p448_tw_extensible_double(working);
  480. if (i == control[conti].power) {
  481. assert(control[conti].addend);
  482. if (control[conti].addend > 0) {
  483. p448_tw_extensible_add_pniels(working, &precmp[control[conti].addend >> 1]);
  484. } else {
  485. /* PERF: uncond negate */
  486. copy_tw_pniels(&neg, &precmp[(-control[conti].addend) >> 1]);
  487. pniels_cond_negate(&neg, -1);
  488. p448_tw_extensible_add_pniels(working, &neg);
  489. }
  490. conti++;
  491. assert(conti <= control_bits);
  492. }
  493. }
  494. return control_bits; /* TODO: don't return anything, this is just for testing */
  495. }
  496. void
  497. edwards_scalar_multiply_vt_pre(
  498. struct tw_extensible_t *working,
  499. const uint64_t scalar[7],
  500. const struct tw_niels_t *precmp,
  501. int table_bits
  502. ) {
  503. /* HACK: not 448? */
  504. const int nbits=448;
  505. struct smvt_control control[nbits/(table_bits+1)+3];
  506. int control_bits = recode_wnaf(control, scalar, nbits, table_bits);
  507. if (control_bits > 0) {
  508. assert(control[0].addend > 0);
  509. assert(control[0].power >= 0);
  510. convert_tw_niels_to_tw_extensible(working, &precmp[control[0].addend >> 1]);
  511. } else {
  512. set_identity_tw_extensible(working);
  513. return;
  514. }
  515. int conti = 1, i;
  516. struct tw_niels_t neg;
  517. for (i = control[0].power - 1; i >= 0; i--) {
  518. p448_tw_extensible_double(working);
  519. if (i == control[conti].power) {
  520. assert(control[conti].addend);
  521. if (control[conti].addend > 0) {
  522. p448_tw_extensible_add_niels(working, &precmp[control[conti].addend >> 1]);
  523. } else {
  524. /* PERF: uncond negate */
  525. copy_tw_niels(&neg, &precmp[(-control[conti].addend) >> 1]);
  526. niels_cond_negate(&neg, -1);
  527. p448_tw_extensible_add_niels(working, &neg);
  528. }
  529. conti++;
  530. assert(conti <= control_bits);
  531. }
  532. }
  533. }
  534. int
  535. edwards_combo_var_fixed_vt(
  536. struct tw_extensible_t *working,
  537. const uint64_t scalar_var[7],
  538. const uint64_t scalar_pre[7],
  539. const struct tw_niels_t *precmp,
  540. int table_bits_pre
  541. ) {
  542. /* HACK: not 448? */
  543. const int nbits_var=448, nbits_pre=448, table_bits_var = 3;
  544. struct smvt_control control_var[nbits_var/(table_bits_var+1)+3];
  545. struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3];
  546. int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var);
  547. int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre);
  548. (void)ncb_var;
  549. (void)ncb_pre;
  550. struct tw_pniels_t precmp_var[1<<table_bits_var];
  551. prepare_wnaf_table(precmp_var, working, table_bits_var);
  552. int contp=0, contv=0, i;
  553. i = control_var[0].power;
  554. if (i > control_pre[0].power) {
  555. convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]);
  556. contv++;
  557. } else if (i == control_pre[0].power && i >=0 ) {
  558. convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]);
  559. p448_tw_extensible_add_niels(working, &precmp[control_pre[0].addend >> 1]);
  560. contv++; contp++;
  561. } else {
  562. i = control_pre[0].power;
  563. convert_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]);
  564. contp++;
  565. }
  566. if (i < 0) {
  567. set_identity_tw_extensible(working);
  568. return ncb_pre;
  569. }
  570. struct tw_pniels_t pneg;
  571. struct tw_niels_t neg;
  572. for (i--; i >= 0; i--) {
  573. p448_tw_extensible_double(working);
  574. if (i == control_var[contv].power) {
  575. assert(control_var[contv].addend);
  576. if (control_var[contv].addend > 0) {
  577. p448_tw_extensible_add_pniels(working, &precmp_var[control_var[contv].addend >> 1]);
  578. } else {
  579. /* PERF: uncond negate */
  580. copy_tw_pniels(&pneg, &precmp_var[(-control_var[contv].addend) >> 1]);
  581. pniels_cond_negate(&pneg, -1);
  582. p448_tw_extensible_add_pniels(working, &pneg);
  583. }
  584. contv++;
  585. }
  586. if (i == control_pre[contp].power) {
  587. assert(control_pre[contp].addend);
  588. if (control_pre[contp].addend > 0) {
  589. p448_tw_extensible_add_niels(working, &precmp[control_pre[contp].addend >> 1]);
  590. } else {
  591. /* PERF: uncond negate */
  592. copy_tw_niels(&neg, &precmp[(-control_pre[contp].addend) >> 1]);
  593. niels_cond_negate(&neg, -1);
  594. p448_tw_extensible_add_niels(working, &neg);
  595. }
  596. contp++;
  597. }
  598. }
  599. assert(contv == ncb_var);
  600. assert(contp == ncb_pre);
  601. return ncb_pre;
  602. }