You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

382 lines
11 KiB

  1. /* Copyright (c) 2011 Stanford University.
  2. * Copyright (c) 2014 Cryptography Research, Inc.
  3. * Released under the MIT License. See LICENSE.txt for license information.
  4. */
  5. /* Chacha random number generator code copied from crandom */
  6. #include "intrinsics.h"
  7. #include "crandom.h"
  8. volatile unsigned int crandom_features = 0;
  9. unsigned int crandom_detect_features() {
  10. unsigned int out = GEN;
  11. # if (defined(__i386__) || defined(__x86_64__))
  12. u_int32_t a,b,c,d;
  13. a=1; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
  14. out |= GEN;
  15. if (d & 1<<26) out |= SSE2;
  16. if (d & 1<< 9) out |= SSSE3;
  17. if (c & 1<<25) out |= AESNI;
  18. if (c & 1<<28) out |= AVX;
  19. if (b & 1<<5) out |= AVX2;
  20. a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
  21. if (c & 1<<11) out |= XOP;
  22. # endif
  23. return out;
  24. }
  25. /* ------------------------------- Vectorized code ------------------------------- */
  26. #define shuffle(x,i) _mm_shuffle_epi32(x, \
  27. i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64)
  28. #define add _mm_add_epi32
  29. #define add64 _mm_add_epi64
  30. #define NEED_XOP (MIGHT_HAVE(XOP))
  31. #define NEED_SSSE3 (MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP))
  32. #define NEED_SSE2 (MIGHT_HAVE(SSE2) && !MUST_HAVE(SSSE3))
  33. #define NEED_CONV (!MUST_HAVE(SSE2))
  34. #if NEED_XOP
  35. static __inline__ void
  36. quarter_round_xop(
  37. ssereg *a,
  38. ssereg *b,
  39. ssereg *c,
  40. ssereg *d
  41. ) {
  42. *a = add(*a,*b); *d = xop_rotate(16, *d ^ *a);
  43. *c = add(*c,*d); *b = xop_rotate(12, *b ^ *c);
  44. *a = add(*a,*b); *d = xop_rotate(8, *d ^ *a);
  45. *c = add(*c,*d); *b = xop_rotate(7, *b ^ *c);
  46. }
  47. #endif
  48. #if NEED_SSSE3
  49. static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull };
  50. static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull };
  51. INTRINSIC ssereg ssse3_rotate_8(ssereg a) {
  52. return _mm_shuffle_epi8(a, shuffle8);
  53. }
  54. INTRINSIC ssereg ssse3_rotate_16(ssereg a) {
  55. return _mm_shuffle_epi8(a, shuffle16);
  56. }
  57. static __inline__ void
  58. quarter_round_ssse3(
  59. ssereg *a,
  60. ssereg *b,
  61. ssereg *c,
  62. ssereg *d
  63. ) {
  64. *a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a);
  65. *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c);
  66. *a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a);
  67. *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c);
  68. }
  69. #endif /* MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) */
  70. #if NEED_SSE2
  71. static __inline__ void
  72. quarter_round_sse2(
  73. ssereg *a,
  74. ssereg *b,
  75. ssereg *c,
  76. ssereg *d
  77. ) {
  78. *a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a);
  79. *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c);
  80. *a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a);
  81. *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c);
  82. }
  83. #endif
  84. #define DOUBLE_ROUND(qrf) { \
  85. qrf(&a1,&b1,&c1,&d1); \
  86. qrf(&a2,&b2,&c2,&d2); \
  87. b1 = shuffle(b1,1); \
  88. c1 = shuffle(c1,2); \
  89. d1 = shuffle(d1,3); \
  90. b2 = shuffle(b2,1); \
  91. c2 = shuffle(c2,2); \
  92. d2 = shuffle(d2,3); \
  93. \
  94. qrf(&a1,&b1,&c1,&d1); \
  95. qrf(&a2,&b2,&c2,&d2); \
  96. b1 = shuffle(b1,3); \
  97. c1 = shuffle(c1,2); \
  98. d1 = shuffle(d1,1); \
  99. b2 = shuffle(b2,3); \
  100. c2 = shuffle(c2,2); \
  101. d2 = shuffle(d2,1); \
  102. }
  103. #define OUTPUT_FUNCTION { \
  104. output[0] = add(a1,aa); \
  105. output[1] = add(b1,bb); \
  106. output[2] = add(c1,cc); \
  107. output[3] = add(d1,dd); \
  108. output[4] = add(a2,aa); \
  109. output[5] = add(b2,bb); \
  110. output[6] = add(c2,add(cc,p)); \
  111. output[7] = add(d2,dd); \
  112. \
  113. output += 8; \
  114. \
  115. cc = add64(add64(cc,p), p); \
  116. a1 = a2 = aa; \
  117. b1 = b2 = bb; \
  118. c1 = cc; c2 = add64(cc,p);\
  119. d1 = d2 = dd; \
  120. }
  121. /* ------------------------------------------------------------------------------- */
  122. INTRINSIC u_int32_t rotate(int r, u_int32_t a) {
  123. return a<<r ^ a>>(32-r);
  124. }
  125. static __inline__ void
  126. quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) {
  127. *a = *a + *b; *d = rotate(16, *d^*a);
  128. *c = *c + *d; *b = rotate(12, *b^*c);
  129. *a = *a + *b; *d = rotate(8, *d^*a);
  130. *c = *c + *d; *b = rotate(7, *b^*c);
  131. }
  132. static void
  133. crandom_chacha_expand(u_int64_t iv,
  134. u_int64_t ctr,
  135. int nr,
  136. int output_size,
  137. const unsigned char *key_,
  138. unsigned char *output_) {
  139. # if MIGHT_HAVE_SSE2
  140. if (HAVE(SSE2)) {
  141. ssereg *key = (ssereg *)key_;
  142. ssereg *output = (ssereg *)output_;
  143. ssereg a1 = key[0], a2 = a1, aa = a1,
  144. b1 = key[1], b2 = b1, bb = b1,
  145. c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1,
  146. d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull},
  147. d2 = d1, dd = d1,
  148. p = {0, 1};
  149. int i,r;
  150. # if (NEED_XOP)
  151. if (HAVE(XOP)) {
  152. for (i=0; i<output_size; i+=128) {
  153. for (r=nr; r>0; r-=2)
  154. DOUBLE_ROUND(quarter_round_xop);
  155. OUTPUT_FUNCTION;
  156. }
  157. return;
  158. }
  159. # endif
  160. # if (NEED_SSSE3)
  161. if (HAVE(SSSE3)) {
  162. for (i=0; i<output_size; i+=128) {
  163. for (r=nr; r>0; r-=2)
  164. DOUBLE_ROUND(quarter_round_ssse3);
  165. OUTPUT_FUNCTION;
  166. }
  167. return;
  168. }
  169. # endif
  170. # if (NEED_SSE2)
  171. if (HAVE(SSE2)) {
  172. for (i=0; i<output_size; i+=128) {
  173. for (r=nr; r>0; r-=2)
  174. DOUBLE_ROUND(quarter_round_sse2);
  175. OUTPUT_FUNCTION;
  176. }
  177. return;
  178. }
  179. # endif
  180. }
  181. # endif
  182. # if NEED_CONV
  183. {
  184. const u_int32_t *key = (const u_int32_t *)key_;
  185. u_int32_t
  186. x[16],
  187. input[16] = {
  188. key[0], key[1], key[2], key[3],
  189. key[4], key[5], key[6], key[7],
  190. iv, iv>>32, ctr, ctr>>32,
  191. 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
  192. },
  193. *output = (u_int32_t *)output_;
  194. int i, r;
  195. for (i=0; i<output_size; i+= 64) {
  196. for (r=0; r<16; r++) {
  197. x[r] = input[r];
  198. }
  199. for (r=nr; r>0; r-=2) {
  200. quarter_round(&x[0], &x[4], &x[8], &x[12]);
  201. quarter_round(&x[1], &x[5], &x[9], &x[13]);
  202. quarter_round(&x[2], &x[6], &x[10], &x[14]);
  203. quarter_round(&x[3], &x[7], &x[11], &x[15]);
  204. quarter_round(&x[0], &x[5], &x[10], &x[15]);
  205. quarter_round(&x[1], &x[6], &x[11], &x[12]);
  206. quarter_round(&x[2], &x[7], &x[8], &x[13]);
  207. quarter_round(&x[3], &x[4], &x[9], &x[14]);
  208. }
  209. for (r=0; r<16; r++) {
  210. output[r] = x[r] + input[r];
  211. }
  212. output += 16;
  213. input[11] ++;
  214. if (!input[11]) input[12]++;
  215. }
  216. }
  217. #endif /* NEED_CONV */
  218. }
  219. /* "return 4", cf xkcd #221 */
  220. #define CRANDOM_MAGIC 0x72657475726e2034ull
  221. int
  222. crandom_init_from_file(
  223. struct crandom_state_t *state,
  224. const char *filename,
  225. int reseed_interval,
  226. int reseeds_mandatory
  227. ) {
  228. state->fill = 0;
  229. state->reseed_countdown = reseed_interval;
  230. state->reseed_interval = reseed_interval;
  231. state->ctr = 0;
  232. state->randomfd = open(filename, O_RDONLY);
  233. if (state->randomfd == -1) {
  234. int err = errno;
  235. return err ? err : -1;
  236. }
  237. ssize_t offset = 0, red;
  238. do {
  239. red = read(state->randomfd, state->seed + offset, 32 - offset);
  240. if (red > 0) offset += red;
  241. } while (red > 0 && offset < 32);
  242. if (offset < 32) {
  243. int err = errno;
  244. return err ? err : -1;
  245. }
  246. bzero(state->buffer, 96);
  247. state->magic = CRANDOM_MAGIC;
  248. state->reseeds_mandatory = reseeds_mandatory;
  249. return 0;
  250. }
  251. void
  252. crandom_init_from_buffer(
  253. struct crandom_state_t *state,
  254. const char initial_seed[32]
  255. ) {
  256. memcpy(state->seed, initial_seed, 32);
  257. bzero(state->buffer, 96);
  258. state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0;
  259. state->randomfd = -1;
  260. state->magic = CRANDOM_MAGIC;
  261. }
  262. int
  263. crandom_generate(
  264. struct crandom_state_t *state,
  265. unsigned char *output,
  266. unsigned long long length
  267. ) {
  268. /* the generator isn't seeded; maybe they ignored the return value of init_from_file */
  269. if (unlikely(state->magic != CRANDOM_MAGIC)) abort();
  270. int ret = 0;
  271. while (length) {
  272. if (unlikely(state->fill <= 0)) {
  273. uint64_t iv = 0;
  274. if (state->reseed_interval) {
  275. /* it's nondeterministic, stir in some rdtsc() */
  276. iv = rdtsc();
  277. state->reseed_countdown--;
  278. if (unlikely(state->reseed_countdown <= 0)) {
  279. /* reseed by xoring in random state */
  280. state->reseed_countdown = state->reseed_interval;
  281. ssize_t offset = 0, red;
  282. do {
  283. red = read(state->randomfd, state->buffer + offset, 32 - offset);
  284. if (red > 0) offset += red;
  285. } while (red > 0 && offset < 32);
  286. if (offset < 32) {
  287. /* The read failed. Signal an error with the return code.
  288. *
  289. * If reseeds are mandatory, crash.
  290. *
  291. * If not, the generator is still probably safe to use, because reseeding
  292. * is basically over-engineering for caution. Also, the user might ignore
  293. * the return code, so we still need to fill the request.
  294. *
  295. * Set reseed_countdown = 1 so we'll try again later. If the user's perf
  296. * sucks as a result of ignoring the error code while calling us in a loop,
  297. * well, he gets what he deserves.
  298. */
  299. if (state->reseeds_mandatory) abort();
  300. ret = errno;
  301. if (ret == 0) ret = -1;
  302. state->reseed_countdown = 1;
  303. }
  304. int i;
  305. for (i=0; i<32; i++) {
  306. /* Stir in the buffer. If somehow the read failed, it'll be zeros. */
  307. state->seed[i] ^= state->buffer[i];
  308. }
  309. }
  310. }
  311. crandom_chacha_expand(iv,state->ctr,20,128,state->seed,state->seed);
  312. state->ctr++;
  313. state->fill = sizeof(state->buffer);
  314. }
  315. unsigned long long copy = (length > state->fill) ? state->fill : length;
  316. state->fill -= copy;
  317. memcpy(output, state->buffer + state->fill, copy);
  318. bzero(state->buffer + state->fill, copy);
  319. output += copy; length -= copy;
  320. }
  321. return ret;
  322. }
  323. void
  324. crandom_destroy(
  325. struct crandom_state_t *state
  326. ) {
  327. if (state->randomfd) close(state->randomfd);
  328. /* Ignore the return value, because what would it mean?
  329. * "Your random device, which you were reading over NFS, lost some data"?
  330. */
  331. bzero(state, sizeof(*state));
  332. }