| 
							- /**
 -  * @cond internal
 -  * @file x25519.c
 -  * @copyright
 -  *   Copyright (c) 2015-2016 Cryptography Research, Inc.  \n
 -  *   Released under the MIT License.  See LICENSE.txt for license information.
 -  * @author Mike Hamburg
 -  * @brief Key exchange and signatures based on X25519.
 -  */
 - #include <stdint.h>
 - #include "x25519.h"
 - #include "strobe.h"
 - #include "strobe_config.h"
 - 
 - #if X25519_WBITS == 64
 -     typedef uint64_t limb_t;
 -     typedef __uint128_t dlimb_t;
 -     typedef __int128_t sdlimb_t;
 -     #define eswap_limb eswap_letoh_64
 -     #define LIMB(x) x##ull
 - #elif X25519_WBITS == 32
 -     typedef uint32_t limb_t;
 -     typedef uint64_t dlimb_t;
 -     typedef int64_t sdlimb_t;
 -     #define eswap_limb eswap_letoh_32
 -     #define LIMB(x) (uint32_t)(x##ull),(uint32_t)((x##ull)>>32)
 - #else
 -     #error "Need to know X25519_WBITS"
 - #endif
 - 
 - #define NLIMBS (256/X25519_WBITS)
 - typedef limb_t fe[NLIMBS];
 - 
 - #if X25519_SUPPORT_SIGN
 - typedef limb_t scalar_t[NLIMBS];
 - static const limb_t MONTGOMERY_FACTOR = (limb_t)0xd2b51da312547e1bull;
 - static const scalar_t sc_p = {
 -     LIMB(0x5812631a5cf5d3ed), LIMB(0x14def9dea2f79cd6),
 -     LIMB(0x0000000000000000), LIMB(0x1000000000000000)
 - }, sc_r2 = {
 -     LIMB(0xa40611e3449c0f01), LIMB(0xd00e1ba768859347),
 -     LIMB(0xceec73d217f5be65), LIMB(0x0399411b7c309a3d)
 - };
 - #endif
 - 
 - static inline limb_t umaal(
 -     limb_t *carry, limb_t acc, limb_t mand, limb_t mier
 - ) {
 -     dlimb_t tmp = (dlimb_t) mand * mier + acc + *carry;
 -     *carry = tmp >> X25519_WBITS;
 -     return tmp;
 - }
 - 
 - /* These functions are implemented in terms of umaal on ARM */
 - static inline limb_t adc(limb_t *carry, limb_t acc, limb_t mand) {
 -     dlimb_t total = (dlimb_t)*carry + acc + mand;
 -     *carry = total>>X25519_WBITS;
 -     return total;
 - }
 - 
 - static inline limb_t adc0(limb_t *carry, limb_t acc) {
 -     dlimb_t total = (dlimb_t)*carry + acc;
 -     *carry = total>>X25519_WBITS;
 -     return total;
 - }
 - 
 - /* Precondition: carry is small.
 -  * Invariant: result of propagate is < 2^255 + 1 word
 -  * In particular, always less than 2p.
 -  * Also, output x >= min(x,19)
 -  */
 - static void propagate(fe x, limb_t over) {
 -     unsigned i;
 -     over = x[NLIMBS-1]>>(X25519_WBITS-1) | over<<1;
 -     x[NLIMBS-1] &= ~((limb_t)1<<(X25519_WBITS-1));
 - 
 -     limb_t carry = over * 19;
 -     for (i=0; i<NLIMBS; i++) {
 -         x[i] = adc0(&carry, x[i]);
 -     }
 - }
 - 
 - static void add(fe out, const fe a, const fe b) {      
 -     unsigned i;
 -     limb_t carry = 0;
 -     for (i=0; i<NLIMBS; i++) {
 -         out[i] = adc(&carry, a[i], b[i]);
 -     }
 -     propagate(out,carry);
 - }
 - 
 - static void sub(fe out, const fe a, const fe b) {
 -     unsigned i;
 -     sdlimb_t carry = -38;
 -     for (i=0; i<NLIMBS; i++) {
 -         out[i] = carry = carry + a[i] - b[i];
 -         carry >>= X25519_WBITS;
 -     }
 -     propagate(out,1+carry);
 - }
 - 
 - static void __attribute__((unused))
 - swapin(limb_t *x, const uint8_t *in) {
 -     memcpy(x,in,sizeof(fe));
 -     unsigned i;
 -     for (i=0; i<NLIMBS; i++) {
 -         x[i] = eswap_limb(x[i]);
 -     }
 - }
 - 
 - static void __attribute__((unused))
 - swapout(uint8_t *out, limb_t *x) {
 -     unsigned i;
 -     for (i=0; i<NLIMBS; i++) {
 -         x[i] = eswap_limb(x[i]);
 -     }
 -     memcpy(out,x,sizeof(fe));
 - }
 - 
 - static void mul(fe out, const fe a, const fe b, unsigned nb) {
 -     /* GCC at least produces pretty decent asm for this, so don't need to have dedicated asm. */
 -     limb_t accum[2*NLIMBS] = {0};
 -     unsigned i,j;
 - 
 -     limb_t carry2;
 -     for (i=0; i<nb; i++) {
 -         carry2 = 0;
 -         limb_t mand = b[i];
 -         for (j=0; j<NLIMBS; j++) {
 -             accum[i+j] = umaal(&carry2, accum[i+j], mand, a[j]);
 -         }
 -         accum[i+j] = carry2;
 -     }
 - 
 -     carry2 = 0;
 -     const limb_t mand = 38;
 -     for (j=0; j<NLIMBS; j++) {
 -          out[j] = umaal(&carry2, accum[j], mand, accum[j+NLIMBS]);
 -     }
 -     propagate(out,carry2);
 - }
 - 
 - static void sqr(fe out, const fe a) { mul(out,a,a,NLIMBS); }
 - static void mul1(fe out, const fe a) { mul(out,a,out,NLIMBS); }
 - static void sqr1(fe a) { mul1(a,a); }
 - 
 - static void condswap(limb_t a[2*NLIMBS], limb_t b[2*NLIMBS], limb_t doswap) {
 -     unsigned i;
 -     for (i=0; i<2*NLIMBS; i++) {
 -         limb_t xor = (a[i]^b[i]) & doswap;
 -         a[i] ^= xor; b[i] ^= xor;
 -     }
 - }
 - 
 - static limb_t canon(fe x) {
 -     /* Canonicalize a field element x, reducing it to the least residue
 -      * which is congruent to it mod 2^255-19.
 -      *
 -      * Precondition: x < 2^255 + 1 word
 -      */
 - 
 -     /* First, add 19. */
 -     unsigned i;
 -     limb_t carry0 = 19;
 -     for (i=0; i<NLIMBS; i++) {
 -         x[i] = adc0(&carry0, x[i]);
 -     }
 -     propagate(x,carry0);
 - 
 -     /* Here, 19 <= x2 < 2^255
 -      *
 -      * This is because we added 19, so before propagate it can't be less than 19.
 -      * After propagate, it still can't be less than 19, because if propagate does
 -      * anything it adds 19.
 -      *
 -      * We know that the high bit must be clear, because either the input was
 -      * ~ 2^255 + one word + 19 (in which case it propagates to at most 2 words)
 -      * or it was < 2^255.
 -      *
 -      * So now, if we subtract 19, we will get back to something in [0,2^255-19).
 -      */
 -     sdlimb_t carry = -19;
 -     limb_t res = 0;
 -     for (i=0; i<NLIMBS; i++) {
 -         res |= x[i] = carry += x[i];
 -         carry >>= X25519_WBITS;
 -     }
 -     return ((dlimb_t)res - 1) >> X25519_WBITS;
 - }
 - 
 - static const limb_t a24[1]={121665};
 - 
 - static void ladder_part1(fe xs[5]) {
 -     limb_t *x2 = xs[0], *z2=xs[1],*x3=xs[2],*z3=xs[3],*t1=xs[4];
 -     add(t1,x2,z2);  // t1 = A
 -     sub(z2,x2,z2);  // z2 = B
 -     add(x2,x3,z3);  // x2 = C
 -     sub(z3,x3,z3);  // z3 = D
 -     mul1(z3,t1);    // z3 = DA
 -     mul1(x2,z2);    // x3 = BC
 -     add(x3,z3,x2);  // x3 = DA+CB
 -     sub(z3,z3,x2);  // z3 = DA-CB
 -     sqr1(t1);       // t1 = AA
 -     sqr1(z2);       // z2 = BB
 -     sub(x2,t1,z2);  // x2 = E = AA-BB
 -     mul(z2,x2,a24,sizeof(a24)/sizeof(a24[0])); // z2 = E*a24
 -     add(z2,z2,t1);  // z2 = E*a24 + AA
 - }
 - static void ladder_part2(fe xs[5], const fe x1) {
 -     limb_t *x2 = xs[0], *z2=xs[1],*x3=xs[2],*z3=xs[3],*t1=xs[4];
 -     sqr1(z3);       // z3 = (DA-CB)^2
 -     mul1(z3,x1);    // z3 = x1 * (DA-CB)^2
 -     sqr1(x3);       // x3 = (DA+CB)^2
 -     mul1(z2,x2);    // z2 = AA*(E*a24+AA)
 -     sub(x2,t1,x2);  // x2 = BB again
 -     mul1(x2,t1);    // x2 = AA*BB
 - }
 - 
 - static void x25519_core(fe xs[5], const uint8_t scalar[X25519_BYTES], const uint8_t *x1, int clamp) {
 -     int i;
 - #if X25519_MEMCPY_PARAMS
 -     fe x1i;
 -     swapin(x1i,x1);
 -     x1 = (const uint8_t *)x1;
 - #endif
 -     limb_t swap = 0;
 -     limb_t *x2 = xs[0],*x3=xs[2],*z3=xs[3];
 -     memset(xs,0,4*sizeof(fe));
 -     x2[0] = z3[0] = 1;
 -     memcpy(x3,x1,sizeof(fe));
 - 
 -     for (i=255; i>=0; i--) {
 -         uint8_t bytei = scalar[i/8];
 -         if (clamp) {
 -             if (i/8 == 0) {
 -                 bytei &= ~7;
 -             } else if (i/8 == X25519_BYTES-1) {
 -                 bytei &= 0x7F;
 -                 bytei |= 0x40;
 -             }
 -         }
 -         limb_t doswap = -(limb_t)((bytei>>(i%8)) & 1);
 -         condswap(x2,x3,swap^doswap);
 -         swap = doswap;
 - 
 -         ladder_part1(xs);
 -         ladder_part2(xs,(const limb_t *)x1);
 -     }
 -     condswap(x2,x3,swap);
 - }
 - 
 - int x25519(uint8_t out[X25519_BYTES], const uint8_t scalar[X25519_BYTES], const uint8_t x1[X25519_BYTES], int clamp) {
 -     fe xs[5];
 -     x25519_core(xs,scalar,x1,clamp);
 - 
 -     /* Precomputed inversion chain */
 -     limb_t *x2 = xs[0], *z2=xs[1], *z3=xs[3];
 -     int i;
 - 
 -     limb_t *prev = z2;
 - #if X25519_USE_POWER_CHAIN
 -     static const struct { uint8_t a,c,n; } steps[13] = {
 -         {2,1,1  },
 -         {2,1,1  },
 -         {4,2,3  },
 -         {2,4,6  },
 -         {3,1,1  },
 -         {3,2,12 },
 -         {4,3,25 },
 -         {2,3,25 },
 -         {2,4,50 },
 -         {3,2,125},
 -         {3,1,2  },
 -         {3,1,2  },
 -         {3,1,1  }
 -     };
 -     for (i=0; i<13; i++) {
 -         int j;
 -         limb_t *a = xs[steps[i].a];
 -         for (j=steps[i].n; j>0; j--) {
 -             sqr(a, prev);
 -             prev = a;
 -         }
 -         mul1(a,xs[steps[i].c]);
 -     }
 - #else
 -     /* Raise to the p-2 = 0x7f..ffeb */
 -     for (i=253; i>=0; i--) {
 -         sqr(z3,prev);
 -         prev = z3;
 -         if (i>=8 || (0xeb>>i & 1)) {
 -             mul1(z3,z2);
 -         }
 -     }
 - #endif
 - 
 -     /* Here prev = z3 */
 -     /* x2 /= z2 */
 - #if X25519_MEMCPY_PARAMS
 -     mul1(x2,z3);
 -     int ret = canon(x2);
 -     swapout(out,x2);
 - #else
 -     mul((limb_t *)out, x2, z3, NLIMBS);
 -     int ret = canon((limb_t*)out);
 - #endif
 -     if (clamp) return ret;
 -     else return 0;
 - }
 - 
 - const uint8_t X25519_BASE_POINT[X25519_BYTES] = {9};
 - 
 - #if X25519_SUPPORT_VERIFY
 - static limb_t x25519_verify_core(
 -     fe xs[5],
 -     const limb_t *other1,
 -     const uint8_t other2[X25519_BYTES]
 - ) {
 -     limb_t *z2=xs[1],*x3=xs[2],*z3=xs[3];
 - #if X25519_MEMCPY_PARAMS
 -     fe xo2;
 -     swapin(xo2,other2);
 - #else
 -     const limb_t *xo2 = (const limb_t *)other2;
 - #endif
 - 
 -     memcpy(x3, other1, 2*sizeof(fe));
 - 
 -     ladder_part1(xs);
 - 
 -     /* Here z2 = t2^2 */
 -     mul1(z2,other1);
 -     mul1(z2,other1+NLIMBS);
 -     mul1(z2,xo2);
 -     const limb_t sixteen = 16;
 -     mul (z2,z2,&sixteen,1);
 - 
 -     mul1(z3,xo2);
 -     sub(z3,z3,x3);
 -     sqr1(z3);
 - 
 -     /* check equality */
 -     sub(z3,z3,z2);
 - 
 -     /* If canon(z2) then both sides are zero.
 -      * If canon(z3) then the two sides are equal.
 -      *
 -      * Reject sigs where both sides are zero, because
 -      * that can happen if an input causes the ladder to
 -      * return 0/0.
 -      */
 -     return canon(z2) | ~canon(z3);
 - }
 - 
 - int x25519_verify_p2 (
 -     const uint8_t response[X25519_BYTES],
 -     const uint8_t challenge[X25519_BYTES],
 -     const uint8_t eph[X25519_BYTES],
 -     const uint8_t pub[X25519_BYTES]
 - ) {
 -     fe xs[7];
 -     x25519_core(&xs[0],challenge,pub,0);
 -     x25519_core(&xs[2],response,X25519_BASE_POINT,0);
 -     return x25519_verify_core(&xs[2],xs[0],eph);
 - }
 - #endif // X25519_SUPPORT_VERIFY
 - 
 - #if X25519_SUPPORT_SIGN
 - static void sc_montmul (
 -     scalar_t out,
 -     const scalar_t a,
 -     const scalar_t b
 - ) {
 -    /**
 -     * OK, so carry bounding.  We're using a high carry, so that the
 -     * inputs don't have to be reduced.
 -     *
 -     * First montmul: output < (M^2 + Mp)/M = M+p, subtract p, < M.  This gets rid of high carry.
 -     * Second montmul, by r^2 mod p < p: output < (Mp + Mp)/M = 2p, subtract p, < p, done.
 -     */
 -     unsigned i,j;
 -     limb_t hic = 0;
 -     for (i=0; i<NLIMBS; i++) {
 -         limb_t carry=0, carry2=0, mand = a[i], mand2 = MONTGOMERY_FACTOR;
 - 
 -         for (j=0; j<NLIMBS; j++) {
 -             limb_t acc = out[j];
 -             acc = umaal(&carry, acc, mand, b[j]);
 -             if (j==0) mand2 *= acc;
 -             acc = umaal(&carry2, acc, mand2, sc_p[j]);
 -             if (j>0) out[j-1] = acc;
 -         }
 - 
 -         /* Add two carry registers and high carry */
 -         out[NLIMBS-1] = adc(&hic, carry, carry2);
 -     }
 - 
 -     /* Reduce */
 -     sdlimb_t scarry = 0;
 -     for (i=0; i<NLIMBS; i++) {
 -         out[i] = scarry = scarry + out[i] - sc_p[i];
 -         scarry >>= X25519_WBITS;
 -     }
 -     limb_t need_add = -(scarry + hic);
 - 
 -     limb_t carry = 0;
 -     for (i=0; i<NLIMBS; i++) {
 -         out[i] = umaal(&carry, out[i], need_add, sc_p[i]);
 -     }
 - }
 - 
 - void x25519_sign_p2 (
 -     uint8_t response[X25519_BYTES],
 -     const uint8_t challenge[X25519_BYTES],
 -     const uint8_t eph_secret[X25519_BYTES],
 -     const uint8_t secret[X25519_BYTES]
 - ) {
 -     /* FUTURE memory/code size: just make eph_secret non-const? */
 -     scalar_t scalar1;
 -     swapin(scalar1,eph_secret);
 - 
 -     #if X25519_MEMCPY_PARAMS
 -     scalar_t scalar2, scalar3;
 -     swapin(scalar2,secret);
 -     swapin(scalar3,challenge);
 -     sc_montmul(scalar1,scalar2,scalar3);
 -     memset(scalar2,0,sizeof(scalar2));
 -     sc_montmul(scalar2,scalar1,sc_r2);
 -     swapout(response,scalar2);
 -     #else
 -     sc_montmul(scalar1,(const limb_t *)secret,(const limb_t *)challenge);
 -     memset(response,0,X25519_BYTES);
 -     sc_montmul((limb_t *)response,scalar1,sc_r2);
 -     #endif
 - }
 - #endif // X25519_SUPPORT_SIGN
 
 
  |