Implement a secure ICS protocol targeting LoRa Node151 microcontroller for controlling irrigation.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

438 lines
11 KiB

  1. /**
  2. * @cond internal
  3. * @file x25519.c
  4. * @copyright
  5. * Copyright (c) 2015-2016 Cryptography Research, Inc. \n
  6. * Released under the MIT License. See LICENSE.txt for license information.
  7. * @author Mike Hamburg
  8. * @brief Key exchange and signatures based on X25519.
  9. */
  10. #include <stdint.h>
  11. #include "x25519.h"
  12. #include "strobe.h"
  13. #include "strobe_config.h"
  14. #if X25519_WBITS == 64
  15. typedef uint64_t limb_t;
  16. typedef __uint128_t dlimb_t;
  17. typedef __int128_t sdlimb_t;
  18. #define eswap_limb eswap_letoh_64
  19. #define LIMB(x) x##ull
  20. #elif X25519_WBITS == 32
  21. typedef uint32_t limb_t;
  22. typedef uint64_t dlimb_t;
  23. typedef int64_t sdlimb_t;
  24. #define eswap_limb eswap_letoh_32
  25. #define LIMB(x) (uint32_t)(x##ull),(uint32_t)((x##ull)>>32)
  26. #else
  27. #error "Need to know X25519_WBITS"
  28. #endif
  29. #define NLIMBS (256/X25519_WBITS)
  30. typedef limb_t fe[NLIMBS];
  31. #if X25519_SUPPORT_SIGN
  32. typedef limb_t scalar_t[NLIMBS];
  33. static const limb_t MONTGOMERY_FACTOR = (limb_t)0xd2b51da312547e1bull;
  34. static const scalar_t sc_p = {
  35. LIMB(0x5812631a5cf5d3ed), LIMB(0x14def9dea2f79cd6),
  36. LIMB(0x0000000000000000), LIMB(0x1000000000000000)
  37. }, sc_r2 = {
  38. LIMB(0xa40611e3449c0f01), LIMB(0xd00e1ba768859347),
  39. LIMB(0xceec73d217f5be65), LIMB(0x0399411b7c309a3d)
  40. };
  41. #endif
  42. static inline limb_t umaal(
  43. limb_t *carry, limb_t acc, limb_t mand, limb_t mier
  44. ) {
  45. dlimb_t tmp = (dlimb_t) mand * mier + acc + *carry;
  46. *carry = tmp >> X25519_WBITS;
  47. return tmp;
  48. }
  49. /* These functions are implemented in terms of umaal on ARM */
  50. static inline limb_t adc(limb_t *carry, limb_t acc, limb_t mand) {
  51. dlimb_t total = (dlimb_t)*carry + acc + mand;
  52. *carry = total>>X25519_WBITS;
  53. return total;
  54. }
  55. static inline limb_t adc0(limb_t *carry, limb_t acc) {
  56. dlimb_t total = (dlimb_t)*carry + acc;
  57. *carry = total>>X25519_WBITS;
  58. return total;
  59. }
  60. /* Precondition: carry is small.
  61. * Invariant: result of propagate is < 2^255 + 1 word
  62. * In particular, always less than 2p.
  63. * Also, output x >= min(x,19)
  64. */
  65. static void propagate(fe x, limb_t over) {
  66. unsigned i;
  67. over = x[NLIMBS-1]>>(X25519_WBITS-1) | over<<1;
  68. x[NLIMBS-1] &= ~((limb_t)1<<(X25519_WBITS-1));
  69. limb_t carry = over * 19;
  70. for (i=0; i<NLIMBS; i++) {
  71. x[i] = adc0(&carry, x[i]);
  72. }
  73. }
  74. static void add(fe out, const fe a, const fe b) {
  75. unsigned i;
  76. limb_t carry = 0;
  77. for (i=0; i<NLIMBS; i++) {
  78. out[i] = adc(&carry, a[i], b[i]);
  79. }
  80. propagate(out,carry);
  81. }
  82. static void sub(fe out, const fe a, const fe b) {
  83. unsigned i;
  84. sdlimb_t carry = -76;
  85. for (i=0; i<NLIMBS; i++) {
  86. out[i] = carry = carry + a[i] - b[i];
  87. carry >>= X25519_WBITS;
  88. }
  89. propagate(out,2+carry);
  90. }
  91. static void __attribute__((unused))
  92. swapin(limb_t *x, const uint8_t *in) {
  93. memcpy(x,in,sizeof(fe));
  94. unsigned i;
  95. for (i=0; i<NLIMBS; i++) {
  96. x[i] = eswap_limb(x[i]);
  97. }
  98. }
  99. static void __attribute__((unused))
  100. swapout(uint8_t *out, limb_t *x) {
  101. unsigned i;
  102. for (i=0; i<NLIMBS; i++) {
  103. x[i] = eswap_limb(x[i]);
  104. }
  105. memcpy(out,x,sizeof(fe));
  106. }
  107. static void mul(fe out, const fe a, const limb_t b[], unsigned nb) {
  108. /* GCC at least produces pretty decent asm for this, so don't need to have dedicated asm. */
  109. limb_t accum[2*NLIMBS] = {0};
  110. unsigned i,j;
  111. limb_t carry2;
  112. for (i=0; i<nb; i++) {
  113. carry2 = 0;
  114. limb_t mand = b[i];
  115. for (j=0; j<NLIMBS; j++) {
  116. accum[i+j] = umaal(&carry2, accum[i+j], mand, a[j]);
  117. }
  118. accum[i+j] = carry2;
  119. }
  120. carry2 = 0;
  121. const limb_t mand = 38;
  122. for (j=0; j<NLIMBS; j++) {
  123. out[j] = umaal(&carry2, accum[j], mand, accum[j+NLIMBS]);
  124. }
  125. propagate(out,carry2);
  126. }
  127. static void sqr(fe out, const fe a) { mul(out,a,a,NLIMBS); }
  128. static void mul1(fe out, const fe a) { mul(out,a,out,NLIMBS); }
  129. static void sqr1(fe a) { mul1(a,a); }
  130. static void condswap(limb_t a[2*NLIMBS], limb_t b[2*NLIMBS], limb_t doswap) {
  131. unsigned i;
  132. for (i=0; i<2*NLIMBS; i++) {
  133. limb_t xor = (a[i]^b[i]) & doswap;
  134. a[i] ^= xor; b[i] ^= xor;
  135. }
  136. }
  137. static limb_t canon(fe x) {
  138. /* Canonicalize a field element x, reducing it to the least residue
  139. * which is congruent to it mod 2^255-19.
  140. *
  141. * Precondition: x < 2^255 + 1 word
  142. */
  143. /* First, add 19. */
  144. unsigned i;
  145. limb_t carry0 = 19;
  146. for (i=0; i<NLIMBS; i++) {
  147. x[i] = adc0(&carry0, x[i]);
  148. }
  149. propagate(x,carry0);
  150. /* Here, 19 <= x2 < 2^255
  151. *
  152. * This is because we added 19, so before propagate it can't be less than 19.
  153. * After propagate, it still can't be less than 19, because if propagate does
  154. * anything it adds 19.
  155. *
  156. * We know that the high bit must be clear, because either the input was
  157. * ~ 2^255 + one word + 19 (in which case it propagates to at most 2 words)
  158. * or it was < 2^255.
  159. *
  160. * So now, if we subtract 19, we will get back to something in [0,2^255-19).
  161. */
  162. sdlimb_t carry = -19;
  163. limb_t res = 0;
  164. for (i=0; i<NLIMBS; i++) {
  165. res |= x[i] = carry += x[i];
  166. carry >>= X25519_WBITS;
  167. }
  168. return ((dlimb_t)res - 1) >> X25519_WBITS;
  169. }
  170. static const limb_t a24[1]={121665};
  171. static void ladder_part1(fe xs[5]) {
  172. limb_t *x2 = xs[0], *z2=xs[1],*x3=xs[2],*z3=xs[3],*t1=xs[4];
  173. add(t1,x2,z2); // t1 = A
  174. sub(z2,x2,z2); // z2 = B
  175. add(x2,x3,z3); // x2 = C
  176. sub(z3,x3,z3); // z3 = D
  177. mul1(z3,t1); // z3 = DA
  178. mul1(x2,z2); // x3 = BC
  179. add(x3,z3,x2); // x3 = DA+CB
  180. sub(z3,z3,x2); // z3 = DA-CB
  181. sqr1(t1); // t1 = AA
  182. sqr1(z2); // z2 = BB
  183. sub(x2,t1,z2); // x2 = E = AA-BB
  184. mul(z2,x2,a24,sizeof(a24)/sizeof(a24[0])); // z2 = E*a24
  185. add(z2,z2,t1); // z2 = E*a24 + AA
  186. }
  187. static void ladder_part2(fe xs[5], const fe x1) {
  188. limb_t *x2 = xs[0], *z2=xs[1],*x3=xs[2],*z3=xs[3],*t1=xs[4];
  189. sqr1(z3); // z3 = (DA-CB)^2
  190. mul1(z3,x1); // z3 = x1 * (DA-CB)^2
  191. sqr1(x3); // x3 = (DA+CB)^2
  192. mul1(z2,x2); // z2 = AA*(E*a24+AA)
  193. sub(x2,t1,x2); // x2 = BB again
  194. mul1(x2,t1); // x2 = AA*BB
  195. }
  196. static void x25519_core(fe xs[5], const uint8_t scalar[X25519_BYTES], const uint8_t *x1, int clamp) {
  197. int i;
  198. #if X25519_MEMCPY_PARAMS
  199. fe x1i;
  200. swapin(x1i,x1);
  201. x1 = (const uint8_t *)x1;
  202. #endif
  203. limb_t swap = 0;
  204. limb_t *x2 = xs[0],*x3=xs[2],*z3=xs[3];
  205. memset(xs,0,4*sizeof(fe));
  206. x2[0] = z3[0] = 1;
  207. memcpy(x3,x1,sizeof(fe));
  208. for (i=255; i>=0; i--) {
  209. uint8_t bytei = scalar[i/8];
  210. if (clamp) {
  211. if (i/8 == 0) {
  212. bytei &= ~7;
  213. } else if (i/8 == X25519_BYTES-1) {
  214. bytei &= 0x7F;
  215. bytei |= 0x40;
  216. }
  217. }
  218. limb_t doswap = -(limb_t)((bytei>>(i%8)) & 1);
  219. condswap(x2,x3,swap^doswap);
  220. swap = doswap;
  221. ladder_part1(xs);
  222. ladder_part2(xs,(const limb_t *)x1);
  223. }
  224. condswap(x2,x3,swap);
  225. }
  226. int x25519(uint8_t out[X25519_BYTES], const uint8_t scalar[X25519_BYTES], const uint8_t x1[X25519_BYTES], int clamp) {
  227. fe xs[5];
  228. x25519_core(xs,scalar,x1,clamp);
  229. /* Precomputed inversion chain */
  230. limb_t *x2 = xs[0], *z2=xs[1], *z3=xs[3];
  231. int i;
  232. limb_t *prev = z2;
  233. #if X25519_USE_POWER_CHAIN
  234. static const struct { uint8_t a,c,n; } steps[13] = {
  235. {2,1,1 },
  236. {2,1,1 },
  237. {4,2,3 },
  238. {2,4,6 },
  239. {3,1,1 },
  240. {3,2,12 },
  241. {4,3,25 },
  242. {2,3,25 },
  243. {2,4,50 },
  244. {3,2,125},
  245. {3,1,2 },
  246. {3,1,2 },
  247. {3,1,1 }
  248. };
  249. for (i=0; i<13; i++) {
  250. int j;
  251. limb_t *a = xs[steps[i].a];
  252. for (j=steps[i].n; j>0; j--) {
  253. sqr(a, prev);
  254. prev = a;
  255. }
  256. mul1(a,xs[steps[i].c]);
  257. }
  258. #else
  259. /* Raise to the p-2 = 0x7f..ffeb */
  260. for (i=253; i>=0; i--) {
  261. sqr(z3,prev);
  262. prev = z3;
  263. if (i>=8 || (0xeb>>i & 1)) {
  264. mul1(z3,z2);
  265. }
  266. }
  267. #endif
  268. /* Here prev = z3 */
  269. /* x2 /= z2 */
  270. #if X25519_MEMCPY_PARAMS
  271. mul1(x2,z3);
  272. int ret = canon(x2);
  273. swapout(out,x2);
  274. #else
  275. mul((limb_t *)out, x2, z3, NLIMBS);
  276. int ret = canon((limb_t*)out);
  277. #endif
  278. if (clamp) return ret;
  279. else return 0;
  280. }
  281. const uint8_t X25519_BASE_POINT[X25519_BYTES] = {9};
  282. #if X25519_SUPPORT_VERIFY
  283. static limb_t x25519_verify_core(
  284. fe xs[5],
  285. const limb_t *other1,
  286. const uint8_t other2[X25519_BYTES]
  287. ) {
  288. limb_t *z2=xs[1],*x3=xs[2],*z3=xs[3];
  289. #if X25519_MEMCPY_PARAMS
  290. fe xo2;
  291. swapin(xo2,other2);
  292. #else
  293. const limb_t *xo2 = (const limb_t *)other2;
  294. #endif
  295. memcpy(x3, other1, 2*sizeof(fe));
  296. ladder_part1(xs);
  297. /* Here z2 = t2^2 */
  298. mul1(z2,other1);
  299. mul1(z2,other1+NLIMBS);
  300. mul1(z2,xo2);
  301. const limb_t sixteen = 16;
  302. mul (z2,z2,&sixteen,1);
  303. mul1(z3,xo2);
  304. sub(z3,z3,x3);
  305. sqr1(z3);
  306. /* check equality */
  307. sub(z3,z3,z2);
  308. /* If canon(z2) then both sides are zero.
  309. * If canon(z3) then the two sides are equal.
  310. *
  311. * Reject sigs where both sides are zero, because
  312. * that can happen if an input causes the ladder to
  313. * return 0/0.
  314. */
  315. return canon(z2) | ~canon(z3);
  316. }
  317. int x25519_verify_p2 (
  318. const uint8_t response[X25519_BYTES],
  319. const uint8_t challenge[X25519_BYTES],
  320. const uint8_t eph[X25519_BYTES],
  321. const uint8_t pub[X25519_BYTES]
  322. ) {
  323. fe xs[7];
  324. x25519_core(&xs[0],challenge,pub,0);
  325. x25519_core(&xs[2],response,X25519_BASE_POINT,0);
  326. return x25519_verify_core(&xs[2],xs[0],eph);
  327. }
  328. #endif // X25519_SUPPORT_VERIFY
  329. #if X25519_SUPPORT_SIGN
  330. static void sc_montmul (
  331. scalar_t out,
  332. const scalar_t a,
  333. const scalar_t b
  334. ) {
  335. /**
  336. * OK, so carry bounding. We're using a high carry, so that the
  337. * inputs don't have to be reduced.
  338. *
  339. * First montmul: output < (M^2 + Mp)/M = M+p, subtract p, < M. This gets rid of high carry.
  340. * Second montmul, by r^2 mod p < p: output < (Mp + Mp)/M = 2p, subtract p, < p, done.
  341. */
  342. unsigned i,j;
  343. limb_t hic = 0;
  344. for (i=0; i<NLIMBS; i++) {
  345. limb_t carry=0, carry2=0, mand = a[i], mand2 = MONTGOMERY_FACTOR;
  346. for (j=0; j<NLIMBS; j++) {
  347. limb_t acc = out[j];
  348. acc = umaal(&carry, acc, mand, b[j]);
  349. if (j==0) mand2 *= acc;
  350. acc = umaal(&carry2, acc, mand2, sc_p[j]);
  351. if (j>0) out[j-1] = acc;
  352. }
  353. /* Add two carry registers and high carry */
  354. out[NLIMBS-1] = adc(&hic, carry, carry2);
  355. }
  356. /* Reduce */
  357. sdlimb_t scarry = 0;
  358. for (i=0; i<NLIMBS; i++) {
  359. out[i] = scarry = scarry + out[i] - sc_p[i];
  360. scarry >>= X25519_WBITS;
  361. }
  362. limb_t need_add = -(scarry + hic);
  363. limb_t carry = 0;
  364. for (i=0; i<NLIMBS; i++) {
  365. out[i] = umaal(&carry, out[i], need_add, sc_p[i]);
  366. }
  367. }
  368. void x25519_sign_p2 (
  369. uint8_t response[X25519_BYTES],
  370. const uint8_t challenge[X25519_BYTES],
  371. const uint8_t eph_secret[X25519_BYTES],
  372. const uint8_t secret[X25519_BYTES]
  373. ) {
  374. /* FUTURE memory/code size: just make eph_secret non-const? */
  375. scalar_t scalar1;
  376. swapin(scalar1,eph_secret);
  377. #if X25519_MEMCPY_PARAMS
  378. scalar_t scalar2, scalar3;
  379. swapin(scalar2,secret);
  380. swapin(scalar3,challenge);
  381. sc_montmul(scalar1,scalar2,scalar3);
  382. memset(scalar2,0,sizeof(scalar2));
  383. sc_montmul(scalar2,scalar1,sc_r2);
  384. swapout(response,scalar2);
  385. #else
  386. sc_montmul(scalar1,(const limb_t *)secret,(const limb_t *)challenge);
  387. memset(response,0,X25519_BYTES);
  388. sc_montmul((limb_t *)response,scalar1,sc_r2);
  389. #endif
  390. }
  391. #endif // X25519_SUPPORT_SIGN