You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

443 lines
13 KiB

  1. /* Copyright (c) 2011 Stanford University.
  2. * Copyright (c) 2014 Cryptography Research, Inc.
  3. * Released under the MIT License. See LICENSE.txt for license information.
  4. */
  5. /* Chacha random number generator code copied from crandom */
  6. #include "intrinsics.h"
  7. #include "crandom.h"
  8. #include <stdio.h>
  9. volatile unsigned int crandom_features = 0;
  10. unsigned int crandom_detect_features() {
  11. unsigned int out = GEN;
  12. # if (defined(__i386__) || defined(__x86_64__))
  13. u_int32_t a,b,c,d;
  14. a=1; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
  15. out |= GEN;
  16. if (d & 1<<26) out |= SSE2;
  17. if (d & 1<< 9) out |= SSSE3;
  18. if (c & 1<<25) out |= AESNI;
  19. if (c & 1<<28) out |= AVX;
  20. if (b & 1<<5) out |= AVX2;
  21. a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
  22. if (c & 1<<11) out |= XOP;
  23. if (c & 1<<30) out |= RDRAND;
  24. # endif
  25. return out;
  26. }
  27. INTRINSIC u_int64_t rdrand(int abort_on_fail) {
  28. uint64_t out = 0;
  29. int tries = 1000;
  30. if (HAVE(RDRAND)) {
  31. # if defined(__x86_64__)
  32. u_int64_t out, a=0;
  33. for (; tries && !a; tries--) {
  34. __asm__ __volatile__ (
  35. "rdrand %0\n\tsetc %%al"
  36. : "=r"(out), "+a"(a) :: "cc"
  37. );
  38. }
  39. # elif (defined(__i386__))
  40. u_int32_t reg, a=0;
  41. uint64_t out;
  42. for (; tries && !a; tries--) {
  43. __asm__ __volatile__ (
  44. "rdrand %0\n\tsetc %%al"
  45. : "=r"(reg), "+a"(a) :: "cc"
  46. );
  47. }
  48. out = reg; a = 0;
  49. for (; tries && !a; tries--) {
  50. __asm__ __volatile__ (
  51. "rdrand %0\n\tsetc %%al"
  52. : "=r"(reg), "+a"(a) :: "cc"
  53. );
  54. }
  55. out = out << 32 | reg;
  56. return out;
  57. # else
  58. abort(); // whut
  59. # endif
  60. } else {
  61. tries = 0;
  62. }
  63. if (abort_on_fail && !tries) {
  64. abort();
  65. }
  66. return out;
  67. }
  68. /* ------------------------------- Vectorized code ------------------------------- */
  69. #define shuffle(x,i) _mm_shuffle_epi32(x, \
  70. i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64)
  71. #define add _mm_add_epi32
  72. #define add64 _mm_add_epi64
  73. #define NEED_XOP (MIGHT_HAVE(XOP))
  74. #define NEED_SSSE3 (MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP))
  75. #define NEED_SSE2 (MIGHT_HAVE(SSE2) && !MUST_HAVE(SSSE3))
  76. #define NEED_CONV (!MUST_HAVE(SSE2))
  77. #if NEED_XOP
  78. static __inline__ void
  79. quarter_round_xop(
  80. ssereg *a,
  81. ssereg *b,
  82. ssereg *c,
  83. ssereg *d
  84. ) {
  85. *a = add(*a,*b); *d = xop_rotate(16, *d ^ *a);
  86. *c = add(*c,*d); *b = xop_rotate(12, *b ^ *c);
  87. *a = add(*a,*b); *d = xop_rotate(8, *d ^ *a);
  88. *c = add(*c,*d); *b = xop_rotate(7, *b ^ *c);
  89. }
  90. #endif
  91. #if NEED_SSSE3
  92. static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull };
  93. static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull };
  94. INTRINSIC ssereg ssse3_rotate_8(ssereg a) {
  95. return _mm_shuffle_epi8(a, shuffle8);
  96. }
  97. INTRINSIC ssereg ssse3_rotate_16(ssereg a) {
  98. return _mm_shuffle_epi8(a, shuffle16);
  99. }
  100. static __inline__ void
  101. quarter_round_ssse3(
  102. ssereg *a,
  103. ssereg *b,
  104. ssereg *c,
  105. ssereg *d
  106. ) {
  107. *a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a);
  108. *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c);
  109. *a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a);
  110. *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c);
  111. }
  112. #endif /* MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) */
  113. #if NEED_SSE2
  114. static __inline__ void
  115. quarter_round_sse2(
  116. ssereg *a,
  117. ssereg *b,
  118. ssereg *c,
  119. ssereg *d
  120. ) {
  121. *a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a);
  122. *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c);
  123. *a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a);
  124. *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c);
  125. }
  126. #endif
  127. #define DOUBLE_ROUND(qrf) { \
  128. qrf(&a1,&b1,&c1,&d1); \
  129. qrf(&a2,&b2,&c2,&d2); \
  130. b1 = shuffle(b1,1); \
  131. c1 = shuffle(c1,2); \
  132. d1 = shuffle(d1,3); \
  133. b2 = shuffle(b2,1); \
  134. c2 = shuffle(c2,2); \
  135. d2 = shuffle(d2,3); \
  136. \
  137. qrf(&a1,&b1,&c1,&d1); \
  138. qrf(&a2,&b2,&c2,&d2); \
  139. b1 = shuffle(b1,3); \
  140. c1 = shuffle(c1,2); \
  141. d1 = shuffle(d1,1); \
  142. b2 = shuffle(b2,3); \
  143. c2 = shuffle(c2,2); \
  144. d2 = shuffle(d2,1); \
  145. }
  146. #define OUTPUT_FUNCTION { \
  147. output[0] = add(a1,aa); \
  148. output[1] = add(b1,bb); \
  149. output[2] = add(c1,cc); \
  150. output[3] = add(d1,dd); \
  151. output[4] = add(a2,aa); \
  152. output[5] = add(b2,bb); \
  153. output[6] = add(c2,add(cc,p)); \
  154. output[7] = add(d2,dd); \
  155. \
  156. output += 8; \
  157. \
  158. cc = add64(add64(cc,p), p); \
  159. a1 = a2 = aa; \
  160. b1 = b2 = bb; \
  161. c1 = cc; c2 = add64(cc,p);\
  162. d1 = d2 = dd; \
  163. }
  164. /* ------------------------------------------------------------------------------- */
  165. INTRINSIC u_int32_t rotate(int r, u_int32_t a) {
  166. return a<<r ^ a>>(32-r);
  167. }
  168. static __inline__ void
  169. quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) {
  170. *a = *a + *b; *d = rotate(16, *d^*a);
  171. *c = *c + *d; *b = rotate(12, *b^*c);
  172. *a = *a + *b; *d = rotate(8, *d^*a);
  173. *c = *c + *d; *b = rotate(7, *b^*c);
  174. }
  175. static void
  176. crandom_chacha_expand(u_int64_t iv,
  177. u_int64_t ctr,
  178. int nr,
  179. int output_size,
  180. const unsigned char *key_,
  181. unsigned char *output_) {
  182. # if MIGHT_HAVE_SSE2
  183. if (HAVE(SSE2)) {
  184. ssereg *key = (ssereg *)key_;
  185. ssereg *output = (ssereg *)output_;
  186. ssereg a1 = key[0], a2 = a1, aa = a1,
  187. b1 = key[1], b2 = b1, bb = b1,
  188. c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1,
  189. d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull},
  190. d2 = d1, dd = d1,
  191. p = {0, 1};
  192. int i,r;
  193. # if (NEED_XOP)
  194. if (HAVE(XOP)) {
  195. for (i=0; i<output_size; i+=128) {
  196. for (r=nr; r>0; r-=2)
  197. DOUBLE_ROUND(quarter_round_xop);
  198. OUTPUT_FUNCTION;
  199. }
  200. return;
  201. }
  202. # endif
  203. # if (NEED_SSSE3)
  204. if (HAVE(SSSE3)) {
  205. for (i=0; i<output_size; i+=128) {
  206. for (r=nr; r>0; r-=2)
  207. DOUBLE_ROUND(quarter_round_ssse3);
  208. OUTPUT_FUNCTION;
  209. }
  210. return;
  211. }
  212. # endif
  213. # if (NEED_SSE2)
  214. if (HAVE(SSE2)) {
  215. for (i=0; i<output_size; i+=128) {
  216. for (r=nr; r>0; r-=2)
  217. DOUBLE_ROUND(quarter_round_sse2);
  218. OUTPUT_FUNCTION;
  219. }
  220. return;
  221. }
  222. # endif
  223. }
  224. # endif
  225. # if NEED_CONV
  226. {
  227. const u_int32_t *key = (const u_int32_t *)key_;
  228. u_int32_t
  229. x[16],
  230. input[16] = {
  231. key[0], key[1], key[2], key[3],
  232. key[4], key[5], key[6], key[7],
  233. iv, iv>>32, ctr, ctr>>32,
  234. 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
  235. },
  236. *output = (u_int32_t *)output_;
  237. int i, r;
  238. for (i=0; i<output_size; i+= 64) {
  239. for (r=0; r<16; r++) {
  240. x[r] = input[r];
  241. }
  242. for (r=nr; r>0; r-=2) {
  243. quarter_round(&x[0], &x[4], &x[8], &x[12]);
  244. quarter_round(&x[1], &x[5], &x[9], &x[13]);
  245. quarter_round(&x[2], &x[6], &x[10], &x[14]);
  246. quarter_round(&x[3], &x[7], &x[11], &x[15]);
  247. quarter_round(&x[0], &x[5], &x[10], &x[15]);
  248. quarter_round(&x[1], &x[6], &x[11], &x[12]);
  249. quarter_round(&x[2], &x[7], &x[8], &x[13]);
  250. quarter_round(&x[3], &x[4], &x[9], &x[14]);
  251. }
  252. for (r=0; r<16; r++) {
  253. output[r] = x[r] + input[r];
  254. }
  255. output += 16;
  256. input[11] ++;
  257. if (!input[11]) input[12]++;
  258. }
  259. }
  260. #endif /* NEED_CONV */
  261. }
  262. /* "return 4", cf xkcd #221 */
  263. #define CRANDOM_MAGIC 0x72657475726e2034ull
  264. int
  265. crandom_init_from_file(
  266. struct crandom_state_t *state,
  267. const char *filename,
  268. int reseed_interval,
  269. int reseeds_mandatory
  270. ) {
  271. state->fill = 0;
  272. state->reseed_countdown = reseed_interval;
  273. state->reseed_interval = reseed_interval;
  274. state->ctr = 0;
  275. state->randomfd = open(filename, O_RDONLY);
  276. if (state->randomfd == -1) {
  277. int err = errno;
  278. return err ? err : -1;
  279. }
  280. ssize_t offset = 0, red;
  281. do {
  282. red = read(state->randomfd, state->seed + offset, 32 - offset);
  283. if (red > 0) offset += red;
  284. } while (red > 0 && offset < 32);
  285. if (offset < 32) {
  286. int err = errno;
  287. return err ? err : -1;
  288. }
  289. memset(state->buffer, 0, 96);
  290. state->magic = CRANDOM_MAGIC;
  291. state->reseeds_mandatory = reseeds_mandatory;
  292. return 0;
  293. }
  294. void
  295. crandom_init_from_buffer(
  296. struct crandom_state_t *state,
  297. const char initial_seed[32]
  298. ) {
  299. memcpy(state->seed, initial_seed, 32);
  300. memset(state->buffer, 0, 96);
  301. state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0;
  302. state->randomfd = -1;
  303. state->magic = CRANDOM_MAGIC;
  304. }
  305. int
  306. crandom_generate(
  307. struct crandom_state_t *state,
  308. unsigned char *output,
  309. unsigned long long length
  310. ) {
  311. /* the generator isn't seeded; maybe they ignored the return value of init_from_file */
  312. if (unlikely(state->magic != CRANDOM_MAGIC)) {
  313. abort();
  314. }
  315. int ret = 0;
  316. while (length) {
  317. if (unlikely(state->fill <= 0)) {
  318. uint64_t iv = 0;
  319. if (state->reseed_interval) {
  320. /* it's nondeterministic, stir in some rdrand() or rdtsc() */
  321. if (HAVE(RDRAND)) {
  322. iv = rdrand(0);
  323. if (!iv) iv = rdtsc();
  324. } else {
  325. iv = rdtsc();
  326. }
  327. state->reseed_countdown--;
  328. if (unlikely(state->reseed_countdown <= 0)) {
  329. /* reseed by xoring in random state */
  330. state->reseed_countdown = state->reseed_interval;
  331. ssize_t offset = 0, red;
  332. do {
  333. red = read(state->randomfd, state->buffer + offset, 32 - offset);
  334. if (red > 0) offset += red;
  335. } while (red > 0 && offset < 32);
  336. if (offset < 32) {
  337. /* The read failed. Signal an error with the return code.
  338. *
  339. * If reseeds are mandatory, crash.
  340. *
  341. * If not, the generator is still probably safe to use, because reseeding
  342. * is basically over-engineering for caution. Also, the user might ignore
  343. * the return code, so we still need to fill the request.
  344. *
  345. * Set reseed_countdown = 1 so we'll try again later. If the user's
  346. * performance sucks as a result of ignoring the error code while calling
  347. * us in a loop, well, that's life.
  348. */
  349. if (state->reseeds_mandatory) {
  350. abort();
  351. }
  352. ret = errno;
  353. if (ret == 0) ret = -1;
  354. state->reseed_countdown = 1;
  355. }
  356. int i;
  357. for (i=0; i<32; i++) {
  358. /* Stir in the buffer. If somehow the read failed, it'll be zeros. */
  359. state->seed[i] ^= state->buffer[i];
  360. }
  361. }
  362. }
  363. crandom_chacha_expand(iv,state->ctr,20,128,state->seed,state->seed);
  364. state->ctr++;
  365. state->fill = sizeof(state->buffer);
  366. }
  367. unsigned long long copy = (length > state->fill) ? state->fill : length;
  368. state->fill -= copy;
  369. memcpy(output, state->buffer + state->fill, copy);
  370. memset(state->buffer + state->fill, 0, copy);
  371. output += copy; length -= copy;
  372. }
  373. return ret;
  374. }
  375. void
  376. crandom_destroy(
  377. struct crandom_state_t *state
  378. ) {
  379. if (state->magic == CRANDOM_MAGIC && state->randomfd) {
  380. (void) close(state->randomfd);
  381. /* Ignore the return value from close(), because what would it mean?
  382. * "Your random device, which you were reading over NFS, lost some data"?
  383. */
  384. }
  385. memset(state, 0, sizeof(*state));
  386. }