Browse Source

Begin ref impl, currently an arch option (arch_ref64).

master
Mike Hamburg 10 years ago
parent
commit
a9c72b5a8d
6 changed files with 1690 additions and 10 deletions
  1. +10
    -0
      HISTORY.txt
  2. +825
    -0
      src/arch_ref64/ec_point.c
  3. +477
    -0
      src/arch_ref64/p448.c
  4. +373
    -0
      src/arch_ref64/p448.h
  5. +4
    -10
      src/goldilocks.c
  6. +1
    -0
      src/include/field.h

+ 10
- 0
HISTORY.txt View File

@@ -1,3 +1,13 @@
September 18, 2014:
Begin work on a "ref" implementation. Currently this is just the
arch_ref64 architecture. The ref implementation always weak_reduces
after arithmetic, and doesn't use vectors or other hackery. Currently
it still must declare field elements as vector aligned, though,
other code outside the arch directory can be vectorized.

Change goldilocks.c to use field_eq instead of calling deep into field
apis.

September 6, 2014:
Pull in minor changes from David Leon Gil and Nicholas Wilson, with
some adjustments. I hope the adjustments don't break their compiles.


+ 825
- 0
src/arch_ref64/ec_point.c View File

@@ -0,0 +1,825 @@
/**
* @cond internal
* @file ec_point.c
* @copyright
* Copyright (c) 2014 Cryptography Research, Inc. \n
* Released under the MIT License. See LICENSE.txt for license information.
* @author Mike Hamburg
* @warning This file was automatically generated.
*/

#include "ec_point.h"


void
p448_isr (
struct p448_t* a,
const struct p448_t* x
) {
struct p448_t L0, L1, L2;
p448_sqr ( &L1, x );
p448_mul ( &L2, x, &L1 );
p448_sqr ( &L1, &L2 );
p448_mul ( &L2, x, &L1 );
p448_sqrn ( &L1, &L2, 3 );
p448_mul ( &L0, &L2, &L1 );
p448_sqrn ( &L1, &L0, 3 );
p448_mul ( &L0, &L2, &L1 );
p448_sqrn ( &L2, &L0, 9 );
p448_mul ( &L1, &L0, &L2 );
p448_sqr ( &L0, &L1 );
p448_mul ( &L2, x, &L0 );
p448_sqrn ( &L0, &L2, 18 );
p448_mul ( &L2, &L1, &L0 );
p448_sqrn ( &L0, &L2, 37 );
p448_mul ( &L1, &L2, &L0 );
p448_sqrn ( &L0, &L1, 37 );
p448_mul ( &L1, &L2, &L0 );
p448_sqrn ( &L0, &L1, 111 );
p448_mul ( &L2, &L1, &L0 );
p448_sqr ( &L0, &L2 );
p448_mul ( &L1, x, &L0 );
p448_sqrn ( &L0, &L1, 223 );
p448_mul ( a, &L2, &L0 );
}

void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
) {
struct p448_t L0, L1;
p448_isr ( &L0, x );
p448_sqr ( &L1, &L0 );
p448_sqr ( &L0, &L1 );
p448_mul ( a, x, &L0 );
}

void
add_tw_niels_to_tw_extensible (
struct tw_extensible_t* d,
const struct tw_niels_t* e
) {
struct p448_t L0, L1;
p448_sub ( &L1, &d->y, &d->x );
p448_mul ( &L0, &e->a, &L1 );
p448_add ( &L1, &d->x, &d->y );
p448_mul ( &d->y, &e->b, &L1 );
p448_mul ( &L1, &d->u, &d->t );
p448_mul ( &d->x, &e->c, &L1 );
p448_add ( &d->u, &L0, &d->y );
p448_sub ( &d->t, &d->y, &L0 );
p448_sub ( &d->y, &d->z, &d->x );
p448_add ( &L0, &d->x, &d->z );
p448_mul ( &d->z, &L0, &d->y );
p448_mul ( &d->x, &d->y, &d->t );
p448_mul ( &d->y, &L0, &d->u );
}

void
sub_tw_niels_from_tw_extensible (
struct tw_extensible_t* d,
const struct tw_niels_t* e
) {
struct p448_t L0, L1;
p448_sub ( &L1, &d->y, &d->x );
p448_mul ( &L0, &e->b, &L1 );
p448_add ( &L1, &d->x, &d->y );
p448_mul ( &d->y, &e->a, &L1 );
p448_mul ( &L1, &d->u, &d->t );
p448_mul ( &d->x, &e->c, &L1 );
p448_add ( &d->u, &L0, &d->y );
p448_sub ( &d->t, &d->y, &L0 );
p448_add ( &d->y, &d->x, &d->z );
p448_sub ( &L0, &d->z, &d->x );
p448_mul ( &d->z, &L0, &d->y );
p448_mul ( &d->x, &d->y, &d->t );
p448_mul ( &d->y, &L0, &d->u );
}

void
add_tw_pniels_to_tw_extensible (
struct tw_extensible_t* e,
const struct tw_pniels_t* a
) {
struct p448_t L0;
p448_mul ( &L0, &e->z, &a->z );
p448_copy ( &e->z, &L0 );
add_tw_niels_to_tw_extensible( e, &a->n );
}

void
sub_tw_pniels_from_tw_extensible (
struct tw_extensible_t* e,
const struct tw_pniels_t* a
) {
struct p448_t L0;
p448_mul ( &L0, &e->z, &a->z );
p448_copy ( &e->z, &L0 );
sub_tw_niels_from_tw_extensible( e, &a->n );
}

void
double_tw_extensible (
struct tw_extensible_t* a
) {
struct p448_t L0, L1, L2;
p448_sqr ( &L2, &a->x );
p448_sqr ( &L0, &a->y );
p448_add ( &a->u, &L2, &L0 );
p448_add ( &a->t, &a->y, &a->x );
p448_sqr ( &L1, &a->t );
p448_sub ( &a->t, &L1, &a->u );
p448_sub ( &L1, &L0, &L2 );
p448_sqr ( &a->x, &a->z );
p448_add ( &a->z, &a->x, &a->x );
p448_sub ( &L0, &a->z, &L1 );
p448_mul ( &a->z, &L1, &L0 );
p448_mul ( &a->x, &L0, &a->t );
p448_mul ( &a->y, &L1, &a->u );
}

void
double_extensible (
struct extensible_t* a
) {
struct p448_t L0, L1, L2;
p448_sqr ( &L2, &a->x );
p448_sqr ( &L0, &a->y );
p448_add ( &L1, &L2, &L0 );
p448_add ( &a->t, &a->y, &a->x );
p448_sqr ( &a->u, &a->t );
p448_sub ( &a->t, &a->u, &L1 );
p448_sub ( &a->u, &L0, &L2 );
p448_sqr ( &a->x, &a->z );
p448_add ( &a->z, &a->x, &a->x );
p448_sub ( &L0, &a->z, &L1 );
p448_mul ( &a->z, &L1, &L0 );
p448_mul ( &a->x, &L0, &a->t );
p448_mul ( &a->y, &L1, &a->u );
}

void
twist_and_double (
struct tw_extensible_t* b,
const struct extensible_t* a
) {
struct p448_t L0;
p448_sqr ( &b->x, &a->x );
p448_sqr ( &b->z, &a->y );
p448_add ( &b->u, &b->x, &b->z );
p448_add ( &b->t, &a->y, &a->x );
p448_sqr ( &L0, &b->t );
p448_sub ( &b->t, &L0, &b->u );
p448_sub ( &L0, &b->z, &b->x );
p448_sqr ( &b->x, &a->z );
p448_add ( &b->z, &b->x, &b->x );
p448_sub ( &b->y, &b->z, &b->u );
p448_mul ( &b->z, &L0, &b->y );
p448_mul ( &b->x, &b->y, &b->t );
p448_mul ( &b->y, &L0, &b->u );
}

void
untwist_and_double (
struct extensible_t* b,
const struct tw_extensible_t* a
) {
struct p448_t L0;
p448_sqr ( &b->x, &a->x );
p448_sqr ( &b->z, &a->y );
p448_add ( &L0, &b->x, &b->z );
p448_add ( &b->t, &a->y, &a->x );
p448_sqr ( &b->u, &b->t );
p448_sub ( &b->t, &b->u, &L0 );
p448_sub ( &b->u, &b->z, &b->x );
p448_sqr ( &b->x, &a->z );
p448_add ( &b->z, &b->x, &b->x );
p448_sub ( &b->y, &b->z, &b->u );
p448_mul ( &b->z, &L0, &b->y );
p448_mul ( &b->x, &b->y, &b->t );
p448_mul ( &b->y, &L0, &b->u );
}

void
convert_tw_affine_to_tw_pniels (
struct tw_pniels_t* b,
const struct tw_affine_t* a
) {
p448_sub ( &b->n.a, &a->y, &a->x );
p448_add ( &b->n.b, &a->x, &a->y );
p448_mul ( &b->n.c, &a->y, &a->x );
p448_mulw ( &b->z, &b->n.c, 78164 );
p448_neg ( &b->n.c, &b->z );
p448_set_ui( &b->z, 2 );
}

void
convert_tw_affine_to_tw_extensible (
struct tw_extensible_t* b,
const struct tw_affine_t* a
) {
p448_copy ( &b->x, &a->x );
p448_copy ( &b->y, &a->y );
p448_set_ui( &b->z, 1 );
p448_copy ( &b->t, &a->x );
p448_copy ( &b->u, &a->y );
}

void
convert_affine_to_extensible (
struct extensible_t* b,
const struct affine_t* a
) {
p448_copy ( &b->x, &a->x );
p448_copy ( &b->y, &a->y );
p448_set_ui( &b->z, 1 );
p448_copy ( &b->t, &a->x );
p448_copy ( &b->u, &a->y );
}

void
convert_tw_extensible_to_tw_pniels (
struct tw_pniels_t* b,
const struct tw_extensible_t* a
) {
p448_sub ( &b->n.a, &a->y, &a->x );
p448_add ( &b->n.b, &a->x, &a->y );
p448_mul ( &b->n.c, &a->u, &a->t );
p448_mulw ( &b->z, &b->n.c, 78164 );
p448_neg ( &b->n.c, &b->z );
p448_add ( &b->z, &a->z, &a->z );
}

void
convert_tw_pniels_to_tw_extensible (
struct tw_extensible_t* e,
const struct tw_pniels_t* d
) {
p448_add ( &e->u, &d->n.b, &d->n.a );
p448_sub ( &e->t, &d->n.b, &d->n.a );
p448_mul ( &e->x, &d->z, &e->t );
p448_mul ( &e->y, &d->z, &e->u );
p448_sqr ( &e->z, &d->z );
}

void
convert_tw_niels_to_tw_extensible (
struct tw_extensible_t* e,
const struct tw_niels_t* d
) {
p448_add ( &e->y, &d->b, &d->a );
p448_sub ( &e->x, &d->b, &d->a );
p448_set_ui( &e->z, 1 );
p448_copy ( &e->t, &e->x );
p448_copy ( &e->u, &e->y );
}

void
montgomery_step (
struct montgomery_t* a
) {
struct p448_t L0, L1;
p448_add ( &L0, &a->zd, &a->xd );
p448_sub ( &L1, &a->xd, &a->zd );
p448_sub ( &a->zd, &a->xa, &a->za );
p448_mul ( &a->xd, &L0, &a->zd );
p448_add ( &a->zd, &a->za, &a->xa );
p448_mul ( &a->za, &L1, &a->zd );
p448_add ( &a->xa, &a->za, &a->xd );
p448_sqr ( &a->zd, &a->xa );
p448_mul ( &a->xa, &a->z0, &a->zd );
p448_sub ( &a->zd, &a->xd, &a->za );
p448_sqr ( &a->za, &a->zd );
p448_sqr ( &a->xd, &L0 );
p448_sqr ( &L0, &L1 );
p448_mulw ( &a->zd, &a->xd, 39082 );
p448_sub ( &L1, &a->xd, &L0 );
p448_mul ( &a->xd, &L0, &a->zd );
p448_sub ( &L0, &a->zd, &L1 );
p448_mul ( &a->zd, &L0, &L1 );
}

void
deserialize_montgomery (
struct montgomery_t* a,
const struct p448_t* sbz
) {
p448_sqr ( &a->z0, sbz );
p448_set_ui( &a->xd, 1 );
p448_set_ui( &a->zd, 0 );
p448_set_ui( &a->xa, 1 );
p448_copy ( &a->za, &a->z0 );
}

mask_t
serialize_montgomery (
struct p448_t* b,
const struct montgomery_t* a,
const struct p448_t* sbz
) {
mask_t L0, L1, L2;
struct p448_t L3, L4, L5, L6;
p448_mul ( &L6, &a->z0, &a->zd );
p448_sub ( &L4, &L6, &a->xd );
p448_mul ( &L6, &a->za, &L4 );
p448_mul ( &L5, &a->z0, &a->xd );
p448_sub ( &L4, &L5, &a->zd );
p448_mul ( &L3, &a->xa, &L4 );
p448_add ( &L5, &L3, &L6 );
p448_sub ( &L4, &L6, &L3 );
p448_mul ( &L6, &L4, &L5 );
p448_copy ( &L5, &a->z0 );
p448_addw ( &L5, 1 );
p448_sqr ( &L4, &L5 );
p448_mulw ( &L5, &L4, 39082 );
p448_neg ( &L4, &L5 );
p448_add ( &L3, &a->z0, &a->z0 );
p448_add ( &L5, &L3, &L3 );
p448_add ( &L3, &L5, &L4 );
p448_mul ( &L5, &a->xd, &L3 );
L1 = p448_is_zero( &a->zd );
L2 = - L1;
p448_mask ( &L4, &L5, L1 );
p448_add ( &L5, &L4, &a->zd );
L0 = ~ L1;
p448_mul ( &L4, sbz, &L6 );
p448_addw ( &L4, L2 );
p448_mul ( &L6, &L5, &L4 );
p448_mul ( &L4, &L6, &L5 );
p448_mul ( &L5, &L6, &a->xd );
p448_mul ( &L6, &L4, &L5 );
p448_isr ( &L3, &L6 );
p448_mul ( &L5, &L4, &L3 );
p448_sqr ( &L4, &L3 );
p448_mul ( &L3, &L6, &L4 );
p448_mask ( b, &L5, L0 );
p448_subw ( &L3, 1 );
L1 = p448_is_zero( &L3 );
L0 = p448_is_zero( sbz );
return L1 | L0;
}

void
serialize_extensible (
struct p448_t* b,
const struct extensible_t* a
) {
struct p448_t L0, L1, L2;
p448_sub ( &L0, &a->y, &a->z );
p448_add ( b, &a->z, &a->y );
p448_mul ( &L1, &a->z, &a->x );
p448_mul ( &L2, &L0, &L1 );
p448_mul ( &L1, &L2, &L0 );
p448_mul ( &L0, &L2, b );
p448_mul ( &L2, &L1, &L0 );
p448_isr ( &L0, &L2 );
p448_mul ( b, &L1, &L0 );
p448_sqr ( &L1, &L0 );
p448_mul ( &L0, &L2, &L1 );
}

void
untwist_and_double_and_serialize (
struct p448_t* b,
const struct tw_extensible_t* a
) {
struct p448_t L0, L1, L2, L3;
p448_mul ( &L3, &a->y, &a->x );
p448_add ( b, &a->y, &a->x );
p448_sqr ( &L1, b );
p448_add ( &L2, &L3, &L3 );
p448_sub ( b, &L1, &L2 );
p448_sqr ( &L2, &a->z );
p448_sqr ( &L1, &L2 );
p448_add ( &L2, b, b );
p448_mulw ( b, &L2, 39082 );
p448_neg ( &L2, b );
p448_mulw ( &L0, &L2, 39082 );
p448_neg ( b, &L0 );
p448_mul ( &L0, &L2, &L1 );
p448_mul ( &L2, b, &L0 );
p448_isr ( &L0, &L2 );
p448_mul ( &L1, b, &L0 );
p448_sqr ( b, &L0 );
p448_mul ( &L0, &L2, b );
p448_mul ( b, &L1, &L3 );
}

void
twist_even (
struct tw_extensible_t* b,
const struct extensible_t* a
) {
mask_t L0, L1;
p448_sqr ( &b->y, &a->z );
p448_sqr ( &b->z, &a->x );
p448_sub ( &b->u, &b->y, &b->z );
p448_sub ( &b->z, &a->z, &a->x );
p448_mul ( &b->y, &b->z, &a->y );
p448_sub ( &b->z, &a->z, &a->y );
p448_mul ( &b->x, &b->z, &b->y );
p448_mul ( &b->t, &b->x, &b->u );
p448_mul ( &b->y, &b->x, &b->t );
p448_isr ( &b->t, &b->y );
p448_mul ( &b->u, &b->x, &b->t );
p448_sqr ( &b->x, &b->t );
p448_mul ( &b->t, &b->y, &b->x );
p448_mul ( &b->x, &a->x, &b->u );
p448_mul ( &b->y, &a->y, &b->u );
L1 = p448_is_zero( &b->z );
L0 = - L1;
p448_addw ( &b->y, L0 );
p448_set_ui( &b->z, 1 );
p448_copy ( &b->t, &b->x );
p448_copy ( &b->u, &b->y );
}

void
test_only_twist (
struct tw_extensible_t* b,
const struct extensible_t* a
) {
mask_t L0, L1;
struct p448_t L2, L3;
p448_sqr ( &b->u, &a->z );
p448_sqr ( &b->y, &a->x );
p448_sub ( &b->z, &b->u, &b->y );
p448_add ( &b->y, &b->z, &b->z );
p448_add ( &b->u, &b->y, &b->y );
p448_sub ( &b->y, &a->z, &a->x );
p448_mul ( &b->x, &b->y, &a->y );
p448_sub ( &b->z, &a->z, &a->y );
p448_mul ( &b->t, &b->z, &b->x );
p448_mul ( &L3, &b->t, &b->u );
p448_mul ( &b->x, &b->t, &L3 );
p448_isr ( &L2, &b->x );
p448_mul ( &b->u, &b->t, &L2 );
p448_sqr ( &L3, &L2 );
p448_mul ( &b->t, &b->x, &L3 );
p448_add ( &L3, &a->y, &a->x );
p448_sub ( &L2, &a->x, &a->y );
p448_mul ( &b->x, &b->t, &L2 );
p448_add ( &L2, &b->x, &L3 );
p448_sub ( &b->t, &L3, &b->x );
p448_mul ( &b->x, &L2, &b->u );
L0 = p448_is_zero( &b->y );
L1 = - L0;
p448_addw ( &b->x, L1 );
p448_mul ( &b->y, &b->t, &b->u );
L0 = p448_is_zero( &b->z );
L1 = - L0;
p448_addw ( &b->y, L1 );
L1 = p448_is_zero( &a->y );
L0 = L1 + 1;
p448_set_ui( &b->z, L0 );
p448_copy ( &b->t, &b->x );
p448_copy ( &b->u, &b->y );
}

mask_t
is_square (
const struct p448_t* x
) {
mask_t L0, L1;
struct p448_t L2, L3;
p448_isr ( &L2, x );
p448_sqr ( &L3, &L2 );
p448_mul ( &L2, x, &L3 );
p448_subw ( &L2, 1 );
L1 = p448_is_zero( &L2 );
L0 = p448_is_zero( x );
return L1 | L0;
}

mask_t
is_even_pt (
const struct extensible_t* a
) {
struct p448_t L0, L1, L2;
p448_sqr ( &L2, &a->z );
p448_sqr ( &L1, &a->x );
p448_sub ( &L0, &L2, &L1 );
return is_square ( &L0 );
}

mask_t
is_even_tw (
const struct tw_extensible_t* a
) {
struct p448_t L0, L1, L2;
p448_sqr ( &L2, &a->z );
p448_sqr ( &L1, &a->x );
p448_add ( &L0, &L1, &L2 );
return is_square ( &L0 );
}

mask_t
deserialize_affine (
struct affine_t* a,
const struct p448_t* sz
) {
struct p448_t L0, L1, L2, L3;
p448_sqr ( &L1, sz );
p448_copy ( &L3, &L1 );
p448_addw ( &L3, 1 );
p448_sqr ( &a->x, &L3 );
p448_mulw ( &L3, &a->x, 39082 );
p448_neg ( &a->x, &L3 );
p448_add ( &L3, &L1, &L1 );
p448_add ( &a->y, &L3, &L3 );
p448_add ( &L3, &a->y, &a->x );
p448_copy ( &a->y, &L1 );
p448_subw ( &a->y, 1 );
p448_neg ( &a->x, &a->y );
p448_mul ( &a->y, &a->x, &L3 );
p448_sqr ( &L2, &a->x );
p448_mul ( &L0, &L2, &a->y );
p448_mul ( &a->y, &a->x, &L0 );
p448_isr ( &L3, &a->y );
p448_mul ( &a->y, &L2, &L3 );
p448_sqr ( &L2, &L3 );
p448_mul ( &L3, &L0, &L2 );
p448_mul ( &L0, &a->x, &L3 );
p448_add ( &L2, &a->y, &a->y );
p448_mul ( &a->x, sz, &L2 );
p448_addw ( &L1, 1 );
p448_mul ( &a->y, &L1, &L3 );
p448_subw ( &L0, 1 );
return p448_is_zero( &L0 );
}

mask_t
deserialize_and_twist_approx (
struct tw_extensible_t* a,
const struct p448_t* sdm1,
const struct p448_t* sz
) {
struct p448_t L0, L1;
p448_sqr ( &a->z, sz );
p448_copy ( &a->y, &a->z );
p448_addw ( &a->y, 1 );
p448_sqr ( &a->x, &a->y );
p448_mulw ( &a->y, &a->x, 39082 );
p448_neg ( &a->x, &a->y );
p448_add ( &a->y, &a->z, &a->z );
p448_add ( &a->u, &a->y, &a->y );
p448_add ( &a->y, &a->u, &a->x );
p448_sqr ( &a->x, &a->z );
p448_subw ( &a->x, 1 );
p448_neg ( &a->u, &a->x );
p448_mul ( &a->x, sdm1, &a->u );
p448_mul ( &L0, &a->x, &a->y );
p448_mul ( &a->t, &L0, &a->y );
p448_mul ( &a->u, &a->x, &a->t );
p448_mul ( &a->t, &a->u, &L0 );
p448_mul ( &a->y, &a->x, &a->t );
p448_isr ( &L0, &a->y );
p448_mul ( &a->y, &a->u, &L0 );
p448_sqr ( &L1, &L0 );
p448_mul ( &a->u, &a->t, &L1 );
p448_mul ( &a->t, &a->x, &a->u );
p448_add ( &a->x, sz, sz );
p448_mul ( &L0, &a->u, &a->x );
p448_copy ( &a->x, &a->z );
p448_subw ( &a->x, 1 );
p448_neg ( &L1, &a->x );
p448_mul ( &a->x, &L1, &L0 );
p448_mul ( &L0, &a->u, &a->y );
p448_addw ( &a->z, 1 );
p448_mul ( &a->y, &a->z, &L0 );
p448_subw ( &a->t, 1 );
mask_t ret = p448_is_zero( &a->t );
p448_set_ui( &a->z, 1 );
p448_copy ( &a->t, &a->x );
p448_copy ( &a->u, &a->y );
return ret;
}

void
set_identity_extensible (
struct extensible_t* a
) {
p448_set_ui( &a->x, 0 );
p448_set_ui( &a->y, 1 );
p448_set_ui( &a->z, 1 );
p448_set_ui( &a->t, 0 );
p448_set_ui( &a->u, 0 );
}

void
set_identity_tw_extensible (
struct tw_extensible_t* a
) {
p448_set_ui( &a->x, 0 );
p448_set_ui( &a->y, 1 );
p448_set_ui( &a->z, 1 );
p448_set_ui( &a->t, 0 );
p448_set_ui( &a->u, 0 );
}

void
set_identity_affine (
struct affine_t* a
) {
p448_set_ui( &a->x, 0 );
p448_set_ui( &a->y, 1 );
}

mask_t
eq_affine (
const struct affine_t* a,
const struct affine_t* b
) {
mask_t L0, L1;
struct p448_t L2;
p448_sub ( &L2, &a->x, &b->x );
L1 = p448_is_zero( &L2 );
p448_sub ( &L2, &a->y, &b->y );
L0 = p448_is_zero( &L2 );
return L1 & L0;
}

mask_t
eq_extensible (
const struct extensible_t* a,
const struct extensible_t* b
) {
mask_t L0, L1;
struct p448_t L2, L3, L4;
p448_mul ( &L4, &b->z, &a->x );
p448_mul ( &L3, &a->z, &b->x );
p448_sub ( &L2, &L4, &L3 );
L1 = p448_is_zero( &L2 );
p448_mul ( &L4, &b->z, &a->y );
p448_mul ( &L3, &a->z, &b->y );
p448_sub ( &L2, &L4, &L3 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
}

mask_t
eq_tw_extensible (
const struct tw_extensible_t* a,
const struct tw_extensible_t* b
) {
mask_t L0, L1;
struct p448_t L2, L3, L4;
p448_mul ( &L4, &b->z, &a->x );
p448_mul ( &L3, &a->z, &b->x );
p448_sub ( &L2, &L4, &L3 );
L1 = p448_is_zero( &L2 );
p448_mul ( &L4, &b->z, &a->y );
p448_mul ( &L3, &a->z, &b->y );
p448_sub ( &L2, &L4, &L3 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
}

void
elligator_2s_inject (
struct affine_t* a,
const struct p448_t* r
) {
mask_t L0, L1;
struct p448_t L2, L3, L4, L5, L6, L7, L8;
p448_sqr ( &a->x, r );
p448_sqr ( &L3, &a->x );
p448_copy ( &a->y, &L3 );
p448_subw ( &a->y, 1 );
p448_neg ( &L4, &a->y );
p448_sqr ( &L2, &L4 );
p448_mulw ( &L7, &L2, 1527402724 );
p448_mulw ( &L8, &L3, 6108985600 );
p448_add ( &a->y, &L8, &L7 );
p448_mulw ( &L8, &L2, 6109454568 );
p448_sub ( &L7, &a->y, &L8 );
p448_mulw ( &L6, &a->y, 78160 );
p448_mul ( &L5, &L7, &L6 );
p448_mul ( &L8, &L5, &L4 );
p448_mul ( &L4, &L5, &L6 );
p448_mul ( &L5, &L7, &L8 );
p448_mul ( &L8, &L5, &L4 );
p448_mul ( &L4, &L7, &L8 );
p448_isr ( &L6, &L4 );
p448_mul ( &L4, &L5, &L6 );
p448_sqr ( &L5, &L6 );
p448_mul ( &L6, &L8, &L5 );
p448_mul ( &L8, &L7, &L6 );
p448_mul ( &L7, &L8, &L6 );
p448_copy ( &L6, &a->x );
p448_subw ( &L6, 1 );
p448_addw ( &a->x, 1 );
p448_mul ( &L5, &a->x, &L8 );
p448_sub ( &a->x, &L6, &L5 );
p448_mul ( &L5, &L4, &a->x );
p448_mulw ( &L4, &L5, 78160 );
p448_neg ( &a->x, &L4 );
p448_add ( &L4, &L3, &L3 );
p448_add ( &L3, &L4, &L2 );
p448_subw ( &L3, 2 );
p448_mul ( &L2, &L3, &L8 );
p448_mulw ( &L3, &L2, 3054649120 );
p448_add ( &L2, &L3, &a->y );
p448_mul ( &a->y, &L7, &L2 );
L1 = p448_is_zero( &L8 );
L0 = - L1;
p448_addw ( &a->y, L0 );
}

mask_t
validate_affine (
const struct affine_t* a
) {
struct p448_t L0, L1, L2, L3;
p448_sqr ( &L0, &a->y );
p448_sqr ( &L2, &a->x );
p448_add ( &L3, &L2, &L0 );
p448_subw ( &L3, 1 );
p448_mulw ( &L1, &L2, 39081 );
p448_neg ( &L2, &L1 );
p448_mul ( &L1, &L0, &L2 );
p448_sub ( &L0, &L3, &L1 );
return p448_is_zero( &L0 );
}

mask_t
validate_tw_extensible (
const struct tw_extensible_t* ext
) {
mask_t L0, L1;
struct p448_t L2, L3, L4, L5;
/*
* Check invariant:
* 0 = -x*y + z*t*u
*/
p448_mul ( &L2, &ext->t, &ext->u );
p448_mul ( &L4, &ext->z, &L2 );
p448_addw ( &L4, 0 );
p448_mul ( &L3, &ext->x, &ext->y );
p448_neg ( &L2, &L3 );
p448_add ( &L3, &L2, &L4 );
L1 = p448_is_zero( &L3 );
/*
* Check invariant:
* 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2
*/
p448_sqr ( &L4, &ext->y );
p448_neg ( &L2, &L4 );
p448_addw ( &L2, 0 );
p448_sqr ( &L3, &ext->x );
p448_add ( &L4, &L3, &L2 );
p448_sqr ( &L5, &ext->u );
p448_sqr ( &L3, &ext->t );
p448_mul ( &L2, &L3, &L5 );
p448_mulw ( &L3, &L2, 39081 );
p448_neg ( &L5, &L3 );
p448_add ( &L3, &L5, &L4 );
p448_neg ( &L5, &L2 );
p448_add ( &L4, &L5, &L3 );
p448_sqr ( &L3, &ext->z );
p448_add ( &L2, &L3, &L4 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
}

mask_t
validate_extensible (
const struct extensible_t* ext
) {
mask_t L0, L1;
struct p448_t L2, L3, L4, L5;
/*
* Check invariant:
* 0 = d*t^2*u^2 - x^2 - y^2 + z^2
*/
p448_sqr ( &L4, &ext->y );
p448_neg ( &L3, &L4 );
p448_addw ( &L3, 0 );
p448_sqr ( &L2, &ext->z );
p448_add ( &L4, &L2, &L3 );
p448_sqr ( &L5, &ext->u );
p448_sqr ( &L2, &ext->t );
p448_mul ( &L3, &L2, &L5 );
p448_mulw ( &L5, &L3, 39081 );
p448_neg ( &L2, &L5 );
p448_add ( &L3, &L2, &L4 );
p448_sqr ( &L2, &ext->x );
p448_neg ( &L4, &L2 );
p448_add ( &L2, &L4, &L3 );
L1 = p448_is_zero( &L2 );
/*
* Check invariant:
* 0 = -x*y + z*t*u
*/
p448_mul ( &L3, &ext->t, &ext->u );
p448_mul ( &L4, &ext->z, &L3 );
p448_addw ( &L4, 0 );
p448_mul ( &L2, &ext->x, &ext->y );
p448_neg ( &L3, &L2 );
p448_add ( &L2, &L3, &L4 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
}



+ 477
- 0
src/arch_ref64/p448.c View File

@@ -0,0 +1,477 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/

#include "p448.h"

static __inline__ __uint128_t widemul(
const uint64_t a,
const uint64_t b
) {
return ((__uint128_t)a) * ((__uint128_t)b);
}

static __inline__ uint64_t is_zero(uint64_t a) {
/* let's hope the compiler isn't clever enough to optimize this. */
return (((__uint128_t)a)-1)>>64;
}

void
p448_mul (
p448_t *__restrict__ cs,
const p448_t *as,
const p448_t *bs
) {
const uint64_t *a = as->limb, *b = bs->limb;
uint64_t *c = cs->limb;

__uint128_t accum0 = 0, accum1 = 0, accum2;
uint64_t mask = (1ull<<56) - 1;

uint64_t aa[4], bb[4], bbb[4];

unsigned int i;
for (i=0; i<4; i++) {
aa[i] = a[i] + a[i+4];
bb[i] = b[i] + b[i+4];
bbb[i] = bb[i] + b[i+4];
}

int I_HATE_UNROLLED_LOOPS = 0;

if (I_HATE_UNROLLED_LOOPS) {
/* The compiler probably won't unroll this,
* so it's like 80% slower.
*/
for (i=0; i<4; i++) {
accum2 = 0;

unsigned int j;
for (j=0; j<=i; j++) {
accum2 += widemul(a[j], b[i-j]);
accum1 += widemul(aa[j], bb[i-j]);
accum0 += widemul(a[j+4], b[i-j+4]);
}
for (; j<4; j++) {
accum2 += widemul(a[j], b[i-j+8]);
accum1 += widemul(aa[j], bbb[i-j+4]);
accum0 += widemul(a[j+4], bb[i-j+4]);
}

accum1 -= accum2;
accum0 += accum2;

c[i] = ((uint64_t)(accum0)) & mask;
c[i+4] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;
}
} else {
accum2 = widemul(a[0], b[0]);
accum1 += widemul(aa[0], bb[0]);
accum0 += widemul(a[4], b[4]);

accum2 += widemul(a[1], b[7]);
accum1 += widemul(aa[1], bbb[3]);
accum0 += widemul(a[5], bb[3]);

accum2 += widemul(a[2], b[6]);
accum1 += widemul(aa[2], bbb[2]);
accum0 += widemul(a[6], bb[2]);

accum2 += widemul(a[3], b[5]);
accum1 += widemul(aa[3], bbb[1]);
accum0 += widemul(a[7], bb[1]);

accum1 -= accum2;
accum0 += accum2;

c[0] = ((uint64_t)(accum0)) & mask;
c[4] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

accum2 = widemul(a[0], b[1]);
accum1 += widemul(aa[0], bb[1]);
accum0 += widemul(a[4], b[5]);

accum2 += widemul(a[1], b[0]);
accum1 += widemul(aa[1], bb[0]);
accum0 += widemul(a[5], b[4]);

accum2 += widemul(a[2], b[7]);
accum1 += widemul(aa[2], bbb[3]);
accum0 += widemul(a[6], bb[3]);

accum2 += widemul(a[3], b[6]);
accum1 += widemul(aa[3], bbb[2]);
accum0 += widemul(a[7], bb[2]);

accum1 -= accum2;
accum0 += accum2;

c[1] = ((uint64_t)(accum0)) & mask;
c[5] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

accum2 = widemul(a[0], b[2]);
accum1 += widemul(aa[0], bb[2]);
accum0 += widemul(a[4], b[6]);

accum2 += widemul(a[1], b[1]);
accum1 += widemul(aa[1], bb[1]);
accum0 += widemul(a[5], b[5]);

accum2 += widemul(a[2], b[0]);
accum1 += widemul(aa[2], bb[0]);
accum0 += widemul(a[6], b[4]);

accum2 += widemul(a[3], b[7]);
accum1 += widemul(aa[3], bbb[3]);
accum0 += widemul(a[7], bb[3]);

accum1 -= accum2;
accum0 += accum2;

c[2] = ((uint64_t)(accum0)) & mask;
c[6] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

accum2 = widemul(a[0], b[3]);
accum1 += widemul(aa[0], bb[3]);
accum0 += widemul(a[4], b[7]);

accum2 += widemul(a[1], b[2]);
accum1 += widemul(aa[1], bb[2]);
accum0 += widemul(a[5], b[6]);

accum2 += widemul(a[2], b[1]);
accum1 += widemul(aa[2], bb[1]);
accum0 += widemul(a[6], b[5]);

accum2 += widemul(a[3], b[0]);
accum1 += widemul(aa[3], bb[0]);
accum0 += widemul(a[7], b[4]);

accum1 -= accum2;
accum0 += accum2;

c[3] = ((uint64_t)(accum0)) & mask;
c[7] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;
} /* !I_HATE_UNROLLED_LOOPS */

accum0 += accum1;
accum0 += c[4];
accum1 += c[0];
c[4] = ((uint64_t)(accum0)) & mask;
c[0] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

c[5] += ((uint64_t)(accum0));
c[1] += ((uint64_t)(accum1));
}

void
p448_mulw (
p448_t *__restrict__ cs,
const p448_t *as,
uint64_t b
) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;

__uint128_t accum0 = 0, accum4 = 0;
uint64_t mask = (1ull<<56) - 1;

int i;
for (i=0; i<4; i++) {
accum0 += widemul(b, a[i]);
accum4 += widemul(b, a[i+4]);
c[i] = accum0 & mask; accum0 >>= 56;
c[i+4] = accum4 & mask; accum4 >>= 56;
}
accum0 += accum4 + c[4];
c[4] = accum0 & mask;
c[5] += accum0 >> 56;

accum4 += c[0];
c[0] = accum4 & mask;
c[1] += accum4 >> 56;
}

void
p448_sqr (
p448_t *__restrict__ cs,
const p448_t *as
) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;

__uint128_t accum0 = 0, accum1 = 0, accum2;
uint64_t mask = (1ull<<56) - 1;

uint64_t aa[4];

/* For some reason clang doesn't vectorize this without prompting? */
unsigned int i;
for (i=0; i<4; i++) {
aa[i] = a[i] + a[i+4];
}

accum2 = widemul(a[0],a[3]);
accum0 = widemul(aa[0],aa[3]);
accum1 = widemul(a[4],a[7]);

accum2 += widemul(a[1], a[2]);
accum0 += widemul(aa[1], aa[2]);
accum1 += widemul(a[5], a[6]);

accum0 -= accum2;
accum1 += accum2;

c[3] = ((uint64_t)(accum1))<<1 & mask;
c[7] = ((uint64_t)(accum0))<<1 & mask;

accum0 >>= 55;
accum1 >>= 55;

accum0 += widemul(2*aa[1],aa[3]);
accum1 += widemul(2*a[5], a[7]);
accum0 += widemul(aa[2], aa[2]);
accum1 += accum0;

accum0 -= widemul(2*a[1], a[3]);
accum1 += widemul(a[6], a[6]);
accum2 = widemul(a[0],a[0]);
accum1 -= accum2;
accum0 += accum2;

accum0 -= widemul(a[2], a[2]);
accum1 += widemul(aa[0], aa[0]);
accum0 += widemul(a[4], a[4]);

c[0] = ((uint64_t)(accum0)) & mask;
c[4] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

accum2 = widemul(2*aa[2],aa[3]);
accum0 -= widemul(2*a[2], a[3]);
accum1 += widemul(2*a[6], a[7]);

accum1 += accum2;
accum0 += accum2;

accum2 = widemul(2*a[0],a[1]);
accum1 += widemul(2*aa[0], aa[1]);
accum0 += widemul(2*a[4], a[5]);

accum1 -= accum2;
accum0 += accum2;

c[1] = ((uint64_t)(accum0)) & mask;
c[5] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

accum2 = widemul(aa[3],aa[3]);
accum0 -= widemul(a[3], a[3]);
accum1 += widemul(a[7], a[7]);

accum1 += accum2;
accum0 += accum2;

accum2 = widemul(2*a[0],a[2]);
accum1 += widemul(2*aa[0], aa[2]);
accum0 += widemul(2*a[4], a[6]);

accum2 += widemul(a[1], a[1]);
accum1 += widemul(aa[1], aa[1]);
accum0 += widemul(a[5], a[5]);

accum1 -= accum2;
accum0 += accum2;

c[2] = ((uint64_t)(accum0)) & mask;
c[6] = ((uint64_t)(accum1)) & mask;

accum0 >>= 56;
accum1 >>= 56;

accum0 += c[3];
accum1 += c[7];
c[3] = ((uint64_t)(accum0)) & mask;
c[7] = ((uint64_t)(accum1)) & mask;

/* we could almost stop here, but it wouldn't be stable, so... */

accum0 >>= 56;
accum1 >>= 56;
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
c[0] += ((uint64_t)(accum1));
}

void
p448_strong_reduce (
p448_t *a
) {
uint64_t mask = (1ull<<56)-1;

/* first, clear high */
a->limb[4] += a->limb[7]>>56;
a->limb[0] += a->limb[7]>>56;
a->limb[7] &= mask;

/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

__int128_t scarry = 0;
int i;
for (i=0; i<8; i++) {
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask);
a->limb[i] = scarry & mask;
scarry >>= 56;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448
* so let's add back in p. will carry back off the top for 2^448.
*/

assert(is_zero(scarry) | is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<8; i++) {
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 56;
}

assert(is_zero(carry + scarry));
}

mask_t
p448_is_zero (
const struct p448_t *a
) {
struct p448_t b;
p448_copy(&b,a);
p448_strong_reduce(&b);

uint64_t any = 0;
int i;
for (i=0; i<8; i++) {
any |= b.limb[i];
}
return is_zero(any);
}

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
) {
int i,j;
p448_t red;
p448_copy(&red, x);
p448_strong_reduce(&red);
for (i=0; i<8; i++) {
for (j=0; j<7; j++) {
serial[7*i+j] = red.limb[i];
red.limb[i] >>= 8;
}
assert(red.limb[i] == 0);
}
}

mask_t
p448_deserialize (
p448_t *x,
const uint8_t serial[56]
) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[i] = out;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint64_t ge = -1, mask = (1ull<<56)-1;
for (i=0; i<4; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[4] + 1)) | is_zero(x->limb[4] ^ mask);
/* Propagate the rest */
for (i=5; i<8; i++) {
ge &= x->limb[i];
}
return ~is_zero(ge ^ mask);
}

void
simultaneous_invert_p448(
struct p448_t *__restrict__ out,
const struct p448_t *in,
unsigned int n
) {
if (n==0) {
return;
} else if (n==1) {
p448_inverse(out,in);
return;
}
p448_copy(&out[1], &in[0]);
int i;
for (i=1; i<(int) (n-1); i++) {
p448_mul(&out[i+1], &out[i], &in[i]);
}
p448_mul(&out[0], &out[n-1], &in[n-1]);
struct p448_t tmp;
p448_inverse(&tmp, &out[0]);
p448_copy(&out[0], &tmp);
/* at this point, out[0] = product(in[i]) ^ -1
* out[i] = product(in[0]..in[i-1]) if i != 0
*/
for (i=n-1; i>0; i--) {
p448_mul(&tmp, &out[i], &out[0]);
p448_copy(&out[i], &tmp);
p448_mul(&tmp, &out[0], &in[i]);
p448_copy(&out[0], &tmp);
}
}

+ 373
- 0
src/arch_ref64/p448.h View File

@@ -0,0 +1,373 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __P448_H__
#define __P448_H__ 1

#include <stdint.h>
#include <assert.h>
#include <string.h>

#include "word.h"

typedef struct p448_t {
uint64_t limb[8];
} __attribute__((aligned(32))) p448_t;

#ifdef __cplusplus
extern "C" {
#endif

static __inline__ void
p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused));

static __inline__ void
p448_add (
p448_t *out,
const p448_t *a,
const p448_t *b
) __attribute__((unused));
static __inline__ void
p448_sub (
p448_t *out,
const p448_t *a,
const p448_t *b
) __attribute__((unused));
static __inline__ void
p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused));

static __inline__ void
p448_addw (
p448_t *a,
uint64_t x
) __attribute__((unused));
static __inline__ void
p448_subw (
p448_t *a,
uint64_t x
) __attribute__((unused));
static __inline__ void
p448_copy (
p448_t *out,
const p448_t *a
) __attribute__((unused));
static __inline__ void
p448_weak_reduce (
p448_t *inout
) __attribute__((unused));
void
p448_strong_reduce (
p448_t *inout
);

mask_t
p448_is_zero (
const p448_t *in
);

static __inline__ void
p448_bias (
p448_t *inout,
int amount
) __attribute__((unused));

static __inline__ void
p448_really_bias (
p448_t *inout,
int amount
) __attribute__((unused));
void
p448_mul (
p448_t *__restrict__ out,
const p448_t *a,
const p448_t *b
);

void
p448_mulw (
p448_t *__restrict__ out,
const p448_t *a,
uint64_t b
);

void
p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused));

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
);

mask_t
p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);
void
simultaneous_invert_p448 (
struct p448_t *__restrict__ out,
const struct p448_t *in,
unsigned int n
);

static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

void
p448_set_ui (
p448_t *out,
uint64_t x
) {
int i;
out->limb[0] = x;
for (i=1; i<8; i++) {
out->limb[i] = 0;
}
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
unsigned int i;
for (i=0; i<8; i++) {
uint64_t x = doswap & (a->limb[i]^b->limb[i]);
a->limb[i] ^= x;
b->limb[i] ^= x;
}
}

void
p448_add (
p448_t *out,
const p448_t *a,
const p448_t *b
) {
unsigned int i;
for (i=0; i<8; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
p448_weak_reduce(out);
}

void
p448_sub (
p448_t *out,
const p448_t *a,
const p448_t *b
) {
unsigned int i;
uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
for (i=0; i<8; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
}
p448_weak_reduce(out);
}

void
p448_neg (
struct p448_t *out,
const p448_t *a
) {
unsigned int i;
uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
for (i=0; i<8; i++) {
out->limb[i] = ((i==4) ? co2 : co1) - a->limb[i];
}
p448_weak_reduce(out);
}

void
p448_cond_neg(
struct p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<8; i++) {
a->limb[i] = ( a->limb[i] & ~doNegate )
| ( negated.limb[i] & doNegate );
}
}

void
p448_addw (
p448_t *a,
uint64_t x
) {
a->limb[0] += x;
a->limb[1] += a->limb[0]>>56;
a->limb[0] &= (1ull<<56)-1;
}
void
p448_subw (
p448_t *a,
uint64_t x
) {
a->limb[0] -= x;
p448_really_bias(a, 1);
p448_weak_reduce(a);
}

void
p448_copy (
p448_t *out,
const p448_t *a
) {
memcpy(out,a,sizeof(*a));
}

void
p448_really_bias (
p448_t *a,
int amt
) {
uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
int i;
for (i=0; i<8; i++) {
a->limb[i] += (i==4) ? co2 : co1;
}
}

void
p448_bias (
p448_t *a,
int amt
) {
(void) a;
(void) amt;
}

void
p448_weak_reduce (
p448_t *a
) {
uint64_t mask = (1ull<<56) - 1;
uint64_t tmp = a->limb[7] >> 56;
int i;
a->limb[4] += tmp;
for (i=7; i>0; i--) {
a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
}
a->limb[0] = (a->limb[0] & mask) + tmp;
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_sub(&ra, &ra, &rb);
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<8; i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif

#endif /* __P448_H__ */

+ 4
- 10
src/goldilocks.c View File

@@ -442,11 +442,8 @@ goldilocks_verify (
goldilocks_global.wnafs, WNAF_PRECMP_BITS );
untwist_and_double_and_serialize( &pk, &pk_text );
field_sub(&eph, &eph, &pk);
field_bias(&eph, 2);
succ = field_is_zero(&eph);

succ = field_eq(&eph, &pk);
return succ ? 0 : GOLDI_EINVAL;
}
#endif
@@ -533,11 +530,8 @@ goldilocks_verify_precomputed (
if (!succ) return GOLDI_EINVAL;
untwist_and_double_and_serialize( &pk, &pk_text );
field_sub(&eph, &eph, &pk);
field_bias(&eph, 2);
succ = field_is_zero(&eph);

succ = field_eq(&eph, &pk);
return succ ? 0 : GOLDI_EINVAL;
}



+ 1
- 0
src/include/field.h View File

@@ -24,6 +24,7 @@
#define field_cond_neg p448_cond_neg
#define field_serialize p448_serialize
#define field_deserialize p448_deserialize
#define field_eq p448_eq
#define field_is_zero p448_is_zero
#define simultaneous_invert simultaneous_invert_p448 /* FUTURE: consistency */



Loading…
Cancel
Save