From a9c72b5a8de3b23337cd2d1dc3fb4003aed80fb7 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Thu, 18 Sep 2014 21:21:01 -0700 Subject: [PATCH] Begin ref impl, currently an arch option (arch_ref64). --- HISTORY.txt | 10 + src/arch_ref64/ec_point.c | 825 ++++++++++++++++++++++++++++++++++++++ src/arch_ref64/p448.c | 477 ++++++++++++++++++++++ src/arch_ref64/p448.h | 373 +++++++++++++++++ src/goldilocks.c | 14 +- src/include/field.h | 1 + 6 files changed, 1690 insertions(+), 10 deletions(-) create mode 100644 src/arch_ref64/ec_point.c create mode 100644 src/arch_ref64/p448.c create mode 100644 src/arch_ref64/p448.h diff --git a/HISTORY.txt b/HISTORY.txt index 017b226..f5b0e0b 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -1,3 +1,13 @@ +September 18, 2014: + Begin work on a "ref" implementation. Currently this is just the + arch_ref64 architecture. The ref implementation always weak_reduces + after arithmetic, and doesn't use vectors or other hackery. Currently + it still must declare field elements as vector aligned, though, + other code outside the arch directory can be vectorized. + + Change goldilocks.c to use field_eq instead of calling deep into field + apis. + September 6, 2014: Pull in minor changes from David Leon Gil and Nicholas Wilson, with some adjustments. I hope the adjustments don't break their compiles. diff --git a/src/arch_ref64/ec_point.c b/src/arch_ref64/ec_point.c new file mode 100644 index 0000000..978b3f9 --- /dev/null +++ b/src/arch_ref64/ec_point.c @@ -0,0 +1,825 @@ +/** + * @cond internal + * @file ec_point.c + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * @warning This file was automatically generated. + */ + +#include "ec_point.h" + + +void +p448_isr ( + struct p448_t* a, + const struct p448_t* x +) { + struct p448_t L0, L1, L2; + p448_sqr ( &L1, x ); + p448_mul ( &L2, x, &L1 ); + p448_sqr ( &L1, &L2 ); + p448_mul ( &L2, x, &L1 ); + p448_sqrn ( &L1, &L2, 3 ); + p448_mul ( &L0, &L2, &L1 ); + p448_sqrn ( &L1, &L0, 3 ); + p448_mul ( &L0, &L2, &L1 ); + p448_sqrn ( &L2, &L0, 9 ); + p448_mul ( &L1, &L0, &L2 ); + p448_sqr ( &L0, &L1 ); + p448_mul ( &L2, x, &L0 ); + p448_sqrn ( &L0, &L2, 18 ); + p448_mul ( &L2, &L1, &L0 ); + p448_sqrn ( &L0, &L2, 37 ); + p448_mul ( &L1, &L2, &L0 ); + p448_sqrn ( &L0, &L1, 37 ); + p448_mul ( &L1, &L2, &L0 ); + p448_sqrn ( &L0, &L1, 111 ); + p448_mul ( &L2, &L1, &L0 ); + p448_sqr ( &L0, &L2 ); + p448_mul ( &L1, x, &L0 ); + p448_sqrn ( &L0, &L1, 223 ); + p448_mul ( a, &L2, &L0 ); +} + +void +p448_inverse ( + struct p448_t* a, + const struct p448_t* x +) { + struct p448_t L0, L1; + p448_isr ( &L0, x ); + p448_sqr ( &L1, &L0 ); + p448_sqr ( &L0, &L1 ); + p448_mul ( a, x, &L0 ); +} + +void +add_tw_niels_to_tw_extensible ( + struct tw_extensible_t* d, + const struct tw_niels_t* e +) { + struct p448_t L0, L1; + p448_sub ( &L1, &d->y, &d->x ); + p448_mul ( &L0, &e->a, &L1 ); + p448_add ( &L1, &d->x, &d->y ); + p448_mul ( &d->y, &e->b, &L1 ); + p448_mul ( &L1, &d->u, &d->t ); + p448_mul ( &d->x, &e->c, &L1 ); + p448_add ( &d->u, &L0, &d->y ); + p448_sub ( &d->t, &d->y, &L0 ); + p448_sub ( &d->y, &d->z, &d->x ); + p448_add ( &L0, &d->x, &d->z ); + p448_mul ( &d->z, &L0, &d->y ); + p448_mul ( &d->x, &d->y, &d->t ); + p448_mul ( &d->y, &L0, &d->u ); +} + +void +sub_tw_niels_from_tw_extensible ( + struct tw_extensible_t* d, + const struct tw_niels_t* e +) { + struct p448_t L0, L1; + p448_sub ( &L1, &d->y, &d->x ); + p448_mul ( &L0, &e->b, &L1 ); + p448_add ( &L1, &d->x, &d->y ); + p448_mul ( &d->y, &e->a, &L1 ); + p448_mul ( &L1, &d->u, &d->t ); + p448_mul ( &d->x, &e->c, &L1 ); + p448_add ( &d->u, &L0, &d->y ); + p448_sub ( &d->t, &d->y, &L0 ); + p448_add ( &d->y, &d->x, &d->z ); + p448_sub ( &L0, &d->z, &d->x ); + p448_mul ( &d->z, &L0, &d->y ); + p448_mul ( &d->x, &d->y, &d->t ); + p448_mul ( &d->y, &L0, &d->u ); +} + +void +add_tw_pniels_to_tw_extensible ( + struct tw_extensible_t* e, + const struct tw_pniels_t* a +) { + struct p448_t L0; + p448_mul ( &L0, &e->z, &a->z ); + p448_copy ( &e->z, &L0 ); + add_tw_niels_to_tw_extensible( e, &a->n ); +} + +void +sub_tw_pniels_from_tw_extensible ( + struct tw_extensible_t* e, + const struct tw_pniels_t* a +) { + struct p448_t L0; + p448_mul ( &L0, &e->z, &a->z ); + p448_copy ( &e->z, &L0 ); + sub_tw_niels_from_tw_extensible( e, &a->n ); +} + +void +double_tw_extensible ( + struct tw_extensible_t* a +) { + struct p448_t L0, L1, L2; + p448_sqr ( &L2, &a->x ); + p448_sqr ( &L0, &a->y ); + p448_add ( &a->u, &L2, &L0 ); + p448_add ( &a->t, &a->y, &a->x ); + p448_sqr ( &L1, &a->t ); + p448_sub ( &a->t, &L1, &a->u ); + p448_sub ( &L1, &L0, &L2 ); + p448_sqr ( &a->x, &a->z ); + p448_add ( &a->z, &a->x, &a->x ); + p448_sub ( &L0, &a->z, &L1 ); + p448_mul ( &a->z, &L1, &L0 ); + p448_mul ( &a->x, &L0, &a->t ); + p448_mul ( &a->y, &L1, &a->u ); +} + +void +double_extensible ( + struct extensible_t* a +) { + struct p448_t L0, L1, L2; + p448_sqr ( &L2, &a->x ); + p448_sqr ( &L0, &a->y ); + p448_add ( &L1, &L2, &L0 ); + p448_add ( &a->t, &a->y, &a->x ); + p448_sqr ( &a->u, &a->t ); + p448_sub ( &a->t, &a->u, &L1 ); + p448_sub ( &a->u, &L0, &L2 ); + p448_sqr ( &a->x, &a->z ); + p448_add ( &a->z, &a->x, &a->x ); + p448_sub ( &L0, &a->z, &L1 ); + p448_mul ( &a->z, &L1, &L0 ); + p448_mul ( &a->x, &L0, &a->t ); + p448_mul ( &a->y, &L1, &a->u ); +} + +void +twist_and_double ( + struct tw_extensible_t* b, + const struct extensible_t* a +) { + struct p448_t L0; + p448_sqr ( &b->x, &a->x ); + p448_sqr ( &b->z, &a->y ); + p448_add ( &b->u, &b->x, &b->z ); + p448_add ( &b->t, &a->y, &a->x ); + p448_sqr ( &L0, &b->t ); + p448_sub ( &b->t, &L0, &b->u ); + p448_sub ( &L0, &b->z, &b->x ); + p448_sqr ( &b->x, &a->z ); + p448_add ( &b->z, &b->x, &b->x ); + p448_sub ( &b->y, &b->z, &b->u ); + p448_mul ( &b->z, &L0, &b->y ); + p448_mul ( &b->x, &b->y, &b->t ); + p448_mul ( &b->y, &L0, &b->u ); +} + +void +untwist_and_double ( + struct extensible_t* b, + const struct tw_extensible_t* a +) { + struct p448_t L0; + p448_sqr ( &b->x, &a->x ); + p448_sqr ( &b->z, &a->y ); + p448_add ( &L0, &b->x, &b->z ); + p448_add ( &b->t, &a->y, &a->x ); + p448_sqr ( &b->u, &b->t ); + p448_sub ( &b->t, &b->u, &L0 ); + p448_sub ( &b->u, &b->z, &b->x ); + p448_sqr ( &b->x, &a->z ); + p448_add ( &b->z, &b->x, &b->x ); + p448_sub ( &b->y, &b->z, &b->u ); + p448_mul ( &b->z, &L0, &b->y ); + p448_mul ( &b->x, &b->y, &b->t ); + p448_mul ( &b->y, &L0, &b->u ); +} + +void +convert_tw_affine_to_tw_pniels ( + struct tw_pniels_t* b, + const struct tw_affine_t* a +) { + p448_sub ( &b->n.a, &a->y, &a->x ); + p448_add ( &b->n.b, &a->x, &a->y ); + p448_mul ( &b->n.c, &a->y, &a->x ); + p448_mulw ( &b->z, &b->n.c, 78164 ); + p448_neg ( &b->n.c, &b->z ); + p448_set_ui( &b->z, 2 ); +} + +void +convert_tw_affine_to_tw_extensible ( + struct tw_extensible_t* b, + const struct tw_affine_t* a +) { + p448_copy ( &b->x, &a->x ); + p448_copy ( &b->y, &a->y ); + p448_set_ui( &b->z, 1 ); + p448_copy ( &b->t, &a->x ); + p448_copy ( &b->u, &a->y ); +} + +void +convert_affine_to_extensible ( + struct extensible_t* b, + const struct affine_t* a +) { + p448_copy ( &b->x, &a->x ); + p448_copy ( &b->y, &a->y ); + p448_set_ui( &b->z, 1 ); + p448_copy ( &b->t, &a->x ); + p448_copy ( &b->u, &a->y ); +} + +void +convert_tw_extensible_to_tw_pniels ( + struct tw_pniels_t* b, + const struct tw_extensible_t* a +) { + p448_sub ( &b->n.a, &a->y, &a->x ); + p448_add ( &b->n.b, &a->x, &a->y ); + p448_mul ( &b->n.c, &a->u, &a->t ); + p448_mulw ( &b->z, &b->n.c, 78164 ); + p448_neg ( &b->n.c, &b->z ); + p448_add ( &b->z, &a->z, &a->z ); +} + +void +convert_tw_pniels_to_tw_extensible ( + struct tw_extensible_t* e, + const struct tw_pniels_t* d +) { + p448_add ( &e->u, &d->n.b, &d->n.a ); + p448_sub ( &e->t, &d->n.b, &d->n.a ); + p448_mul ( &e->x, &d->z, &e->t ); + p448_mul ( &e->y, &d->z, &e->u ); + p448_sqr ( &e->z, &d->z ); +} + +void +convert_tw_niels_to_tw_extensible ( + struct tw_extensible_t* e, + const struct tw_niels_t* d +) { + p448_add ( &e->y, &d->b, &d->a ); + p448_sub ( &e->x, &d->b, &d->a ); + p448_set_ui( &e->z, 1 ); + p448_copy ( &e->t, &e->x ); + p448_copy ( &e->u, &e->y ); +} + +void +montgomery_step ( + struct montgomery_t* a +) { + struct p448_t L0, L1; + p448_add ( &L0, &a->zd, &a->xd ); + p448_sub ( &L1, &a->xd, &a->zd ); + p448_sub ( &a->zd, &a->xa, &a->za ); + p448_mul ( &a->xd, &L0, &a->zd ); + p448_add ( &a->zd, &a->za, &a->xa ); + p448_mul ( &a->za, &L1, &a->zd ); + p448_add ( &a->xa, &a->za, &a->xd ); + p448_sqr ( &a->zd, &a->xa ); + p448_mul ( &a->xa, &a->z0, &a->zd ); + p448_sub ( &a->zd, &a->xd, &a->za ); + p448_sqr ( &a->za, &a->zd ); + p448_sqr ( &a->xd, &L0 ); + p448_sqr ( &L0, &L1 ); + p448_mulw ( &a->zd, &a->xd, 39082 ); + p448_sub ( &L1, &a->xd, &L0 ); + p448_mul ( &a->xd, &L0, &a->zd ); + p448_sub ( &L0, &a->zd, &L1 ); + p448_mul ( &a->zd, &L0, &L1 ); +} + +void +deserialize_montgomery ( + struct montgomery_t* a, + const struct p448_t* sbz +) { + p448_sqr ( &a->z0, sbz ); + p448_set_ui( &a->xd, 1 ); + p448_set_ui( &a->zd, 0 ); + p448_set_ui( &a->xa, 1 ); + p448_copy ( &a->za, &a->z0 ); +} + +mask_t +serialize_montgomery ( + struct p448_t* b, + const struct montgomery_t* a, + const struct p448_t* sbz +) { + mask_t L0, L1, L2; + struct p448_t L3, L4, L5, L6; + p448_mul ( &L6, &a->z0, &a->zd ); + p448_sub ( &L4, &L6, &a->xd ); + p448_mul ( &L6, &a->za, &L4 ); + p448_mul ( &L5, &a->z0, &a->xd ); + p448_sub ( &L4, &L5, &a->zd ); + p448_mul ( &L3, &a->xa, &L4 ); + p448_add ( &L5, &L3, &L6 ); + p448_sub ( &L4, &L6, &L3 ); + p448_mul ( &L6, &L4, &L5 ); + p448_copy ( &L5, &a->z0 ); + p448_addw ( &L5, 1 ); + p448_sqr ( &L4, &L5 ); + p448_mulw ( &L5, &L4, 39082 ); + p448_neg ( &L4, &L5 ); + p448_add ( &L3, &a->z0, &a->z0 ); + p448_add ( &L5, &L3, &L3 ); + p448_add ( &L3, &L5, &L4 ); + p448_mul ( &L5, &a->xd, &L3 ); + L1 = p448_is_zero( &a->zd ); + L2 = - L1; + p448_mask ( &L4, &L5, L1 ); + p448_add ( &L5, &L4, &a->zd ); + L0 = ~ L1; + p448_mul ( &L4, sbz, &L6 ); + p448_addw ( &L4, L2 ); + p448_mul ( &L6, &L5, &L4 ); + p448_mul ( &L4, &L6, &L5 ); + p448_mul ( &L5, &L6, &a->xd ); + p448_mul ( &L6, &L4, &L5 ); + p448_isr ( &L3, &L6 ); + p448_mul ( &L5, &L4, &L3 ); + p448_sqr ( &L4, &L3 ); + p448_mul ( &L3, &L6, &L4 ); + p448_mask ( b, &L5, L0 ); + p448_subw ( &L3, 1 ); + L1 = p448_is_zero( &L3 ); + L0 = p448_is_zero( sbz ); + return L1 | L0; +} + +void +serialize_extensible ( + struct p448_t* b, + const struct extensible_t* a +) { + struct p448_t L0, L1, L2; + p448_sub ( &L0, &a->y, &a->z ); + p448_add ( b, &a->z, &a->y ); + p448_mul ( &L1, &a->z, &a->x ); + p448_mul ( &L2, &L0, &L1 ); + p448_mul ( &L1, &L2, &L0 ); + p448_mul ( &L0, &L2, b ); + p448_mul ( &L2, &L1, &L0 ); + p448_isr ( &L0, &L2 ); + p448_mul ( b, &L1, &L0 ); + p448_sqr ( &L1, &L0 ); + p448_mul ( &L0, &L2, &L1 ); +} + +void +untwist_and_double_and_serialize ( + struct p448_t* b, + const struct tw_extensible_t* a +) { + struct p448_t L0, L1, L2, L3; + p448_mul ( &L3, &a->y, &a->x ); + p448_add ( b, &a->y, &a->x ); + p448_sqr ( &L1, b ); + p448_add ( &L2, &L3, &L3 ); + p448_sub ( b, &L1, &L2 ); + p448_sqr ( &L2, &a->z ); + p448_sqr ( &L1, &L2 ); + p448_add ( &L2, b, b ); + p448_mulw ( b, &L2, 39082 ); + p448_neg ( &L2, b ); + p448_mulw ( &L0, &L2, 39082 ); + p448_neg ( b, &L0 ); + p448_mul ( &L0, &L2, &L1 ); + p448_mul ( &L2, b, &L0 ); + p448_isr ( &L0, &L2 ); + p448_mul ( &L1, b, &L0 ); + p448_sqr ( b, &L0 ); + p448_mul ( &L0, &L2, b ); + p448_mul ( b, &L1, &L3 ); +} + +void +twist_even ( + struct tw_extensible_t* b, + const struct extensible_t* a +) { + mask_t L0, L1; + p448_sqr ( &b->y, &a->z ); + p448_sqr ( &b->z, &a->x ); + p448_sub ( &b->u, &b->y, &b->z ); + p448_sub ( &b->z, &a->z, &a->x ); + p448_mul ( &b->y, &b->z, &a->y ); + p448_sub ( &b->z, &a->z, &a->y ); + p448_mul ( &b->x, &b->z, &b->y ); + p448_mul ( &b->t, &b->x, &b->u ); + p448_mul ( &b->y, &b->x, &b->t ); + p448_isr ( &b->t, &b->y ); + p448_mul ( &b->u, &b->x, &b->t ); + p448_sqr ( &b->x, &b->t ); + p448_mul ( &b->t, &b->y, &b->x ); + p448_mul ( &b->x, &a->x, &b->u ); + p448_mul ( &b->y, &a->y, &b->u ); + L1 = p448_is_zero( &b->z ); + L0 = - L1; + p448_addw ( &b->y, L0 ); + p448_set_ui( &b->z, 1 ); + p448_copy ( &b->t, &b->x ); + p448_copy ( &b->u, &b->y ); +} + +void +test_only_twist ( + struct tw_extensible_t* b, + const struct extensible_t* a +) { + mask_t L0, L1; + struct p448_t L2, L3; + p448_sqr ( &b->u, &a->z ); + p448_sqr ( &b->y, &a->x ); + p448_sub ( &b->z, &b->u, &b->y ); + p448_add ( &b->y, &b->z, &b->z ); + p448_add ( &b->u, &b->y, &b->y ); + p448_sub ( &b->y, &a->z, &a->x ); + p448_mul ( &b->x, &b->y, &a->y ); + p448_sub ( &b->z, &a->z, &a->y ); + p448_mul ( &b->t, &b->z, &b->x ); + p448_mul ( &L3, &b->t, &b->u ); + p448_mul ( &b->x, &b->t, &L3 ); + p448_isr ( &L2, &b->x ); + p448_mul ( &b->u, &b->t, &L2 ); + p448_sqr ( &L3, &L2 ); + p448_mul ( &b->t, &b->x, &L3 ); + p448_add ( &L3, &a->y, &a->x ); + p448_sub ( &L2, &a->x, &a->y ); + p448_mul ( &b->x, &b->t, &L2 ); + p448_add ( &L2, &b->x, &L3 ); + p448_sub ( &b->t, &L3, &b->x ); + p448_mul ( &b->x, &L2, &b->u ); + L0 = p448_is_zero( &b->y ); + L1 = - L0; + p448_addw ( &b->x, L1 ); + p448_mul ( &b->y, &b->t, &b->u ); + L0 = p448_is_zero( &b->z ); + L1 = - L0; + p448_addw ( &b->y, L1 ); + L1 = p448_is_zero( &a->y ); + L0 = L1 + 1; + p448_set_ui( &b->z, L0 ); + p448_copy ( &b->t, &b->x ); + p448_copy ( &b->u, &b->y ); +} + +mask_t +is_square ( + const struct p448_t* x +) { + mask_t L0, L1; + struct p448_t L2, L3; + p448_isr ( &L2, x ); + p448_sqr ( &L3, &L2 ); + p448_mul ( &L2, x, &L3 ); + p448_subw ( &L2, 1 ); + L1 = p448_is_zero( &L2 ); + L0 = p448_is_zero( x ); + return L1 | L0; +} + +mask_t +is_even_pt ( + const struct extensible_t* a +) { + struct p448_t L0, L1, L2; + p448_sqr ( &L2, &a->z ); + p448_sqr ( &L1, &a->x ); + p448_sub ( &L0, &L2, &L1 ); + return is_square ( &L0 ); +} + +mask_t +is_even_tw ( + const struct tw_extensible_t* a +) { + struct p448_t L0, L1, L2; + p448_sqr ( &L2, &a->z ); + p448_sqr ( &L1, &a->x ); + p448_add ( &L0, &L1, &L2 ); + return is_square ( &L0 ); +} + +mask_t +deserialize_affine ( + struct affine_t* a, + const struct p448_t* sz +) { + struct p448_t L0, L1, L2, L3; + p448_sqr ( &L1, sz ); + p448_copy ( &L3, &L1 ); + p448_addw ( &L3, 1 ); + p448_sqr ( &a->x, &L3 ); + p448_mulw ( &L3, &a->x, 39082 ); + p448_neg ( &a->x, &L3 ); + p448_add ( &L3, &L1, &L1 ); + p448_add ( &a->y, &L3, &L3 ); + p448_add ( &L3, &a->y, &a->x ); + p448_copy ( &a->y, &L1 ); + p448_subw ( &a->y, 1 ); + p448_neg ( &a->x, &a->y ); + p448_mul ( &a->y, &a->x, &L3 ); + p448_sqr ( &L2, &a->x ); + p448_mul ( &L0, &L2, &a->y ); + p448_mul ( &a->y, &a->x, &L0 ); + p448_isr ( &L3, &a->y ); + p448_mul ( &a->y, &L2, &L3 ); + p448_sqr ( &L2, &L3 ); + p448_mul ( &L3, &L0, &L2 ); + p448_mul ( &L0, &a->x, &L3 ); + p448_add ( &L2, &a->y, &a->y ); + p448_mul ( &a->x, sz, &L2 ); + p448_addw ( &L1, 1 ); + p448_mul ( &a->y, &L1, &L3 ); + p448_subw ( &L0, 1 ); + return p448_is_zero( &L0 ); +} + +mask_t +deserialize_and_twist_approx ( + struct tw_extensible_t* a, + const struct p448_t* sdm1, + const struct p448_t* sz +) { + struct p448_t L0, L1; + p448_sqr ( &a->z, sz ); + p448_copy ( &a->y, &a->z ); + p448_addw ( &a->y, 1 ); + p448_sqr ( &a->x, &a->y ); + p448_mulw ( &a->y, &a->x, 39082 ); + p448_neg ( &a->x, &a->y ); + p448_add ( &a->y, &a->z, &a->z ); + p448_add ( &a->u, &a->y, &a->y ); + p448_add ( &a->y, &a->u, &a->x ); + p448_sqr ( &a->x, &a->z ); + p448_subw ( &a->x, 1 ); + p448_neg ( &a->u, &a->x ); + p448_mul ( &a->x, sdm1, &a->u ); + p448_mul ( &L0, &a->x, &a->y ); + p448_mul ( &a->t, &L0, &a->y ); + p448_mul ( &a->u, &a->x, &a->t ); + p448_mul ( &a->t, &a->u, &L0 ); + p448_mul ( &a->y, &a->x, &a->t ); + p448_isr ( &L0, &a->y ); + p448_mul ( &a->y, &a->u, &L0 ); + p448_sqr ( &L1, &L0 ); + p448_mul ( &a->u, &a->t, &L1 ); + p448_mul ( &a->t, &a->x, &a->u ); + p448_add ( &a->x, sz, sz ); + p448_mul ( &L0, &a->u, &a->x ); + p448_copy ( &a->x, &a->z ); + p448_subw ( &a->x, 1 ); + p448_neg ( &L1, &a->x ); + p448_mul ( &a->x, &L1, &L0 ); + p448_mul ( &L0, &a->u, &a->y ); + p448_addw ( &a->z, 1 ); + p448_mul ( &a->y, &a->z, &L0 ); + p448_subw ( &a->t, 1 ); + mask_t ret = p448_is_zero( &a->t ); + p448_set_ui( &a->z, 1 ); + p448_copy ( &a->t, &a->x ); + p448_copy ( &a->u, &a->y ); + return ret; +} + +void +set_identity_extensible ( + struct extensible_t* a +) { + p448_set_ui( &a->x, 0 ); + p448_set_ui( &a->y, 1 ); + p448_set_ui( &a->z, 1 ); + p448_set_ui( &a->t, 0 ); + p448_set_ui( &a->u, 0 ); +} + +void +set_identity_tw_extensible ( + struct tw_extensible_t* a +) { + p448_set_ui( &a->x, 0 ); + p448_set_ui( &a->y, 1 ); + p448_set_ui( &a->z, 1 ); + p448_set_ui( &a->t, 0 ); + p448_set_ui( &a->u, 0 ); +} + +void +set_identity_affine ( + struct affine_t* a +) { + p448_set_ui( &a->x, 0 ); + p448_set_ui( &a->y, 1 ); +} + +mask_t +eq_affine ( + const struct affine_t* a, + const struct affine_t* b +) { + mask_t L0, L1; + struct p448_t L2; + p448_sub ( &L2, &a->x, &b->x ); + L1 = p448_is_zero( &L2 ); + p448_sub ( &L2, &a->y, &b->y ); + L0 = p448_is_zero( &L2 ); + return L1 & L0; +} + +mask_t +eq_extensible ( + const struct extensible_t* a, + const struct extensible_t* b +) { + mask_t L0, L1; + struct p448_t L2, L3, L4; + p448_mul ( &L4, &b->z, &a->x ); + p448_mul ( &L3, &a->z, &b->x ); + p448_sub ( &L2, &L4, &L3 ); + L1 = p448_is_zero( &L2 ); + p448_mul ( &L4, &b->z, &a->y ); + p448_mul ( &L3, &a->z, &b->y ); + p448_sub ( &L2, &L4, &L3 ); + L0 = p448_is_zero( &L2 ); + return L1 & L0; +} + +mask_t +eq_tw_extensible ( + const struct tw_extensible_t* a, + const struct tw_extensible_t* b +) { + mask_t L0, L1; + struct p448_t L2, L3, L4; + p448_mul ( &L4, &b->z, &a->x ); + p448_mul ( &L3, &a->z, &b->x ); + p448_sub ( &L2, &L4, &L3 ); + L1 = p448_is_zero( &L2 ); + p448_mul ( &L4, &b->z, &a->y ); + p448_mul ( &L3, &a->z, &b->y ); + p448_sub ( &L2, &L4, &L3 ); + L0 = p448_is_zero( &L2 ); + return L1 & L0; +} + +void +elligator_2s_inject ( + struct affine_t* a, + const struct p448_t* r +) { + mask_t L0, L1; + struct p448_t L2, L3, L4, L5, L6, L7, L8; + p448_sqr ( &a->x, r ); + p448_sqr ( &L3, &a->x ); + p448_copy ( &a->y, &L3 ); + p448_subw ( &a->y, 1 ); + p448_neg ( &L4, &a->y ); + p448_sqr ( &L2, &L4 ); + p448_mulw ( &L7, &L2, 1527402724 ); + p448_mulw ( &L8, &L3, 6108985600 ); + p448_add ( &a->y, &L8, &L7 ); + p448_mulw ( &L8, &L2, 6109454568 ); + p448_sub ( &L7, &a->y, &L8 ); + p448_mulw ( &L6, &a->y, 78160 ); + p448_mul ( &L5, &L7, &L6 ); + p448_mul ( &L8, &L5, &L4 ); + p448_mul ( &L4, &L5, &L6 ); + p448_mul ( &L5, &L7, &L8 ); + p448_mul ( &L8, &L5, &L4 ); + p448_mul ( &L4, &L7, &L8 ); + p448_isr ( &L6, &L4 ); + p448_mul ( &L4, &L5, &L6 ); + p448_sqr ( &L5, &L6 ); + p448_mul ( &L6, &L8, &L5 ); + p448_mul ( &L8, &L7, &L6 ); + p448_mul ( &L7, &L8, &L6 ); + p448_copy ( &L6, &a->x ); + p448_subw ( &L6, 1 ); + p448_addw ( &a->x, 1 ); + p448_mul ( &L5, &a->x, &L8 ); + p448_sub ( &a->x, &L6, &L5 ); + p448_mul ( &L5, &L4, &a->x ); + p448_mulw ( &L4, &L5, 78160 ); + p448_neg ( &a->x, &L4 ); + p448_add ( &L4, &L3, &L3 ); + p448_add ( &L3, &L4, &L2 ); + p448_subw ( &L3, 2 ); + p448_mul ( &L2, &L3, &L8 ); + p448_mulw ( &L3, &L2, 3054649120 ); + p448_add ( &L2, &L3, &a->y ); + p448_mul ( &a->y, &L7, &L2 ); + L1 = p448_is_zero( &L8 ); + L0 = - L1; + p448_addw ( &a->y, L0 ); +} + +mask_t +validate_affine ( + const struct affine_t* a +) { + struct p448_t L0, L1, L2, L3; + p448_sqr ( &L0, &a->y ); + p448_sqr ( &L2, &a->x ); + p448_add ( &L3, &L2, &L0 ); + p448_subw ( &L3, 1 ); + p448_mulw ( &L1, &L2, 39081 ); + p448_neg ( &L2, &L1 ); + p448_mul ( &L1, &L0, &L2 ); + p448_sub ( &L0, &L3, &L1 ); + return p448_is_zero( &L0 ); +} + +mask_t +validate_tw_extensible ( + const struct tw_extensible_t* ext +) { + mask_t L0, L1; + struct p448_t L2, L3, L4, L5; + /* + * Check invariant: + * 0 = -x*y + z*t*u + */ + p448_mul ( &L2, &ext->t, &ext->u ); + p448_mul ( &L4, &ext->z, &L2 ); + p448_addw ( &L4, 0 ); + p448_mul ( &L3, &ext->x, &ext->y ); + p448_neg ( &L2, &L3 ); + p448_add ( &L3, &L2, &L4 ); + L1 = p448_is_zero( &L3 ); + /* + * Check invariant: + * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 + */ + p448_sqr ( &L4, &ext->y ); + p448_neg ( &L2, &L4 ); + p448_addw ( &L2, 0 ); + p448_sqr ( &L3, &ext->x ); + p448_add ( &L4, &L3, &L2 ); + p448_sqr ( &L5, &ext->u ); + p448_sqr ( &L3, &ext->t ); + p448_mul ( &L2, &L3, &L5 ); + p448_mulw ( &L3, &L2, 39081 ); + p448_neg ( &L5, &L3 ); + p448_add ( &L3, &L5, &L4 ); + p448_neg ( &L5, &L2 ); + p448_add ( &L4, &L5, &L3 ); + p448_sqr ( &L3, &ext->z ); + p448_add ( &L2, &L3, &L4 ); + L0 = p448_is_zero( &L2 ); + return L1 & L0; +} + +mask_t +validate_extensible ( + const struct extensible_t* ext +) { + mask_t L0, L1; + struct p448_t L2, L3, L4, L5; + /* + * Check invariant: + * 0 = d*t^2*u^2 - x^2 - y^2 + z^2 + */ + p448_sqr ( &L4, &ext->y ); + p448_neg ( &L3, &L4 ); + p448_addw ( &L3, 0 ); + p448_sqr ( &L2, &ext->z ); + p448_add ( &L4, &L2, &L3 ); + p448_sqr ( &L5, &ext->u ); + p448_sqr ( &L2, &ext->t ); + p448_mul ( &L3, &L2, &L5 ); + p448_mulw ( &L5, &L3, 39081 ); + p448_neg ( &L2, &L5 ); + p448_add ( &L3, &L2, &L4 ); + p448_sqr ( &L2, &ext->x ); + p448_neg ( &L4, &L2 ); + p448_add ( &L2, &L4, &L3 ); + L1 = p448_is_zero( &L2 ); + /* + * Check invariant: + * 0 = -x*y + z*t*u + */ + p448_mul ( &L3, &ext->t, &ext->u ); + p448_mul ( &L4, &ext->z, &L3 ); + p448_addw ( &L4, 0 ); + p448_mul ( &L2, &ext->x, &ext->y ); + p448_neg ( &L3, &L2 ); + p448_add ( &L2, &L3, &L4 ); + L0 = p448_is_zero( &L2 ); + return L1 & L0; +} + + diff --git a/src/arch_ref64/p448.c b/src/arch_ref64/p448.c new file mode 100644 index 0000000..0f23613 --- /dev/null +++ b/src/arch_ref64/p448.c @@ -0,0 +1,477 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ + +#include "p448.h" + +static __inline__ __uint128_t widemul( + const uint64_t a, + const uint64_t b +) { + return ((__uint128_t)a) * ((__uint128_t)b); +} + +static __inline__ uint64_t is_zero(uint64_t a) { + /* let's hope the compiler isn't clever enough to optimize this. */ + return (((__uint128_t)a)-1)>>64; +} + +void +p448_mul ( + p448_t *__restrict__ cs, + const p448_t *as, + const p448_t *bs +) { + const uint64_t *a = as->limb, *b = bs->limb; + uint64_t *c = cs->limb; + + __uint128_t accum0 = 0, accum1 = 0, accum2; + uint64_t mask = (1ull<<56) - 1; + + uint64_t aa[4], bb[4], bbb[4]; + + unsigned int i; + for (i=0; i<4; i++) { + aa[i] = a[i] + a[i+4]; + bb[i] = b[i] + b[i+4]; + bbb[i] = bb[i] + b[i+4]; + } + + int I_HATE_UNROLLED_LOOPS = 0; + + if (I_HATE_UNROLLED_LOOPS) { + /* The compiler probably won't unroll this, + * so it's like 80% slower. + */ + for (i=0; i<4; i++) { + accum2 = 0; + + unsigned int j; + for (j=0; j<=i; j++) { + accum2 += widemul(a[j], b[i-j]); + accum1 += widemul(aa[j], bb[i-j]); + accum0 += widemul(a[j+4], b[i-j+4]); + } + for (; j<4; j++) { + accum2 += widemul(a[j], b[i-j+8]); + accum1 += widemul(aa[j], bbb[i-j+4]); + accum0 += widemul(a[j+4], bb[i-j+4]); + } + + accum1 -= accum2; + accum0 += accum2; + + c[i] = ((uint64_t)(accum0)) & mask; + c[i+4] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + } + } else { + accum2 = widemul(a[0], b[0]); + accum1 += widemul(aa[0], bb[0]); + accum0 += widemul(a[4], b[4]); + + accum2 += widemul(a[1], b[7]); + accum1 += widemul(aa[1], bbb[3]); + accum0 += widemul(a[5], bb[3]); + + accum2 += widemul(a[2], b[6]); + accum1 += widemul(aa[2], bbb[2]); + accum0 += widemul(a[6], bb[2]); + + accum2 += widemul(a[3], b[5]); + accum1 += widemul(aa[3], bbb[1]); + accum0 += widemul(a[7], bb[1]); + + accum1 -= accum2; + accum0 += accum2; + + c[0] = ((uint64_t)(accum0)) & mask; + c[4] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + accum2 = widemul(a[0], b[1]); + accum1 += widemul(aa[0], bb[1]); + accum0 += widemul(a[4], b[5]); + + accum2 += widemul(a[1], b[0]); + accum1 += widemul(aa[1], bb[0]); + accum0 += widemul(a[5], b[4]); + + accum2 += widemul(a[2], b[7]); + accum1 += widemul(aa[2], bbb[3]); + accum0 += widemul(a[6], bb[3]); + + accum2 += widemul(a[3], b[6]); + accum1 += widemul(aa[3], bbb[2]); + accum0 += widemul(a[7], bb[2]); + + accum1 -= accum2; + accum0 += accum2; + + c[1] = ((uint64_t)(accum0)) & mask; + c[5] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + accum2 = widemul(a[0], b[2]); + accum1 += widemul(aa[0], bb[2]); + accum0 += widemul(a[4], b[6]); + + accum2 += widemul(a[1], b[1]); + accum1 += widemul(aa[1], bb[1]); + accum0 += widemul(a[5], b[5]); + + accum2 += widemul(a[2], b[0]); + accum1 += widemul(aa[2], bb[0]); + accum0 += widemul(a[6], b[4]); + + accum2 += widemul(a[3], b[7]); + accum1 += widemul(aa[3], bbb[3]); + accum0 += widemul(a[7], bb[3]); + + accum1 -= accum2; + accum0 += accum2; + + c[2] = ((uint64_t)(accum0)) & mask; + c[6] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + accum2 = widemul(a[0], b[3]); + accum1 += widemul(aa[0], bb[3]); + accum0 += widemul(a[4], b[7]); + + accum2 += widemul(a[1], b[2]); + accum1 += widemul(aa[1], bb[2]); + accum0 += widemul(a[5], b[6]); + + accum2 += widemul(a[2], b[1]); + accum1 += widemul(aa[2], bb[1]); + accum0 += widemul(a[6], b[5]); + + accum2 += widemul(a[3], b[0]); + accum1 += widemul(aa[3], bb[0]); + accum0 += widemul(a[7], b[4]); + + accum1 -= accum2; + accum0 += accum2; + + c[3] = ((uint64_t)(accum0)) & mask; + c[7] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + } /* !I_HATE_UNROLLED_LOOPS */ + + accum0 += accum1; + accum0 += c[4]; + accum1 += c[0]; + c[4] = ((uint64_t)(accum0)) & mask; + c[0] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + c[5] += ((uint64_t)(accum0)); + c[1] += ((uint64_t)(accum1)); +} + +void +p448_mulw ( + p448_t *__restrict__ cs, + const p448_t *as, + uint64_t b +) { + const uint64_t *a = as->limb; + uint64_t *c = cs->limb; + + __uint128_t accum0 = 0, accum4 = 0; + uint64_t mask = (1ull<<56) - 1; + + int i; + for (i=0; i<4; i++) { + accum0 += widemul(b, a[i]); + accum4 += widemul(b, a[i+4]); + c[i] = accum0 & mask; accum0 >>= 56; + c[i+4] = accum4 & mask; accum4 >>= 56; + } + + accum0 += accum4 + c[4]; + c[4] = accum0 & mask; + c[5] += accum0 >> 56; + + accum4 += c[0]; + c[0] = accum4 & mask; + c[1] += accum4 >> 56; +} + +void +p448_sqr ( + p448_t *__restrict__ cs, + const p448_t *as +) { + const uint64_t *a = as->limb; + uint64_t *c = cs->limb; + + __uint128_t accum0 = 0, accum1 = 0, accum2; + uint64_t mask = (1ull<<56) - 1; + + uint64_t aa[4]; + + /* For some reason clang doesn't vectorize this without prompting? */ + unsigned int i; + for (i=0; i<4; i++) { + aa[i] = a[i] + a[i+4]; + } + + accum2 = widemul(a[0],a[3]); + accum0 = widemul(aa[0],aa[3]); + accum1 = widemul(a[4],a[7]); + + accum2 += widemul(a[1], a[2]); + accum0 += widemul(aa[1], aa[2]); + accum1 += widemul(a[5], a[6]); + + accum0 -= accum2; + accum1 += accum2; + + c[3] = ((uint64_t)(accum1))<<1 & mask; + c[7] = ((uint64_t)(accum0))<<1 & mask; + + accum0 >>= 55; + accum1 >>= 55; + + accum0 += widemul(2*aa[1],aa[3]); + accum1 += widemul(2*a[5], a[7]); + accum0 += widemul(aa[2], aa[2]); + accum1 += accum0; + + accum0 -= widemul(2*a[1], a[3]); + accum1 += widemul(a[6], a[6]); + + accum2 = widemul(a[0],a[0]); + accum1 -= accum2; + accum0 += accum2; + + accum0 -= widemul(a[2], a[2]); + accum1 += widemul(aa[0], aa[0]); + accum0 += widemul(a[4], a[4]); + + c[0] = ((uint64_t)(accum0)) & mask; + c[4] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + accum2 = widemul(2*aa[2],aa[3]); + accum0 -= widemul(2*a[2], a[3]); + accum1 += widemul(2*a[6], a[7]); + + accum1 += accum2; + accum0 += accum2; + + accum2 = widemul(2*a[0],a[1]); + accum1 += widemul(2*aa[0], aa[1]); + accum0 += widemul(2*a[4], a[5]); + + accum1 -= accum2; + accum0 += accum2; + + c[1] = ((uint64_t)(accum0)) & mask; + c[5] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + accum2 = widemul(aa[3],aa[3]); + accum0 -= widemul(a[3], a[3]); + accum1 += widemul(a[7], a[7]); + + accum1 += accum2; + accum0 += accum2; + + accum2 = widemul(2*a[0],a[2]); + accum1 += widemul(2*aa[0], aa[2]); + accum0 += widemul(2*a[4], a[6]); + + accum2 += widemul(a[1], a[1]); + accum1 += widemul(aa[1], aa[1]); + accum0 += widemul(a[5], a[5]); + + accum1 -= accum2; + accum0 += accum2; + + c[2] = ((uint64_t)(accum0)) & mask; + c[6] = ((uint64_t)(accum1)) & mask; + + accum0 >>= 56; + accum1 >>= 56; + + accum0 += c[3]; + accum1 += c[7]; + c[3] = ((uint64_t)(accum0)) & mask; + c[7] = ((uint64_t)(accum1)) & mask; + + /* we could almost stop here, but it wouldn't be stable, so... */ + + accum0 >>= 56; + accum1 >>= 56; + c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); + c[0] += ((uint64_t)(accum1)); +} + +void +p448_strong_reduce ( + p448_t *a +) { + uint64_t mask = (1ull<<56)-1; + + /* first, clear high */ + a->limb[4] += a->limb[7]>>56; + a->limb[0] += a->limb[7]>>56; + a->limb[7] &= mask; + + /* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ + + /* compute total_value - p. No need to reduce mod p. */ + + __int128_t scarry = 0; + int i; + for (i=0; i<8; i++) { + scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); + a->limb[i] = scarry & mask; + scarry >>= 56; + } + + /* uncommon case: it was >= p, so now scarry = 0 and this = x + * common case: it was < p, so now scarry = -1 and this = x - p + 2^448 + * so let's add back in p. will carry back off the top for 2^448. + */ + + assert(is_zero(scarry) | is_zero(scarry+1)); + + uint64_t scarry_mask = scarry & mask; + __uint128_t carry = 0; + + /* add it back */ + for (i=0; i<8; i++) { + carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); + a->limb[i] = carry & mask; + carry >>= 56; + } + + assert(is_zero(carry + scarry)); +} + +mask_t +p448_is_zero ( + const struct p448_t *a +) { + struct p448_t b; + p448_copy(&b,a); + p448_strong_reduce(&b); + + uint64_t any = 0; + int i; + for (i=0; i<8; i++) { + any |= b.limb[i]; + } + return is_zero(any); +} + +void +p448_serialize ( + uint8_t *serial, + const struct p448_t *x +) { + int i,j; + p448_t red; + p448_copy(&red, x); + p448_strong_reduce(&red); + for (i=0; i<8; i++) { + for (j=0; j<7; j++) { + serial[7*i+j] = red.limb[i]; + red.limb[i] >>= 8; + } + assert(red.limb[i] == 0); + } +} + +mask_t +p448_deserialize ( + p448_t *x, + const uint8_t serial[56] +) { + int i,j; + for (i=0; i<8; i++) { + uint64_t out = 0; + for (j=0; j<7; j++) { + out |= ((uint64_t)serial[7*i+j])<<(8*j); + } + x->limb[i] = out; + } + + /* Check for reduction. + * + * The idea is to create a variable ge which is all ones (rather, 56 ones) + * if and only if the low $i$ words of $x$ are >= those of p. + * + * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) + */ + uint64_t ge = -1, mask = (1ull<<56)-1; + for (i=0; i<4; i++) { + ge &= x->limb[i]; + } + + /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ + ge = (ge & (x->limb[4] + 1)) | is_zero(x->limb[4] ^ mask); + + /* Propagate the rest */ + for (i=5; i<8; i++) { + ge &= x->limb[i]; + } + + return ~is_zero(ge ^ mask); +} + +void +simultaneous_invert_p448( + struct p448_t *__restrict__ out, + const struct p448_t *in, + unsigned int n +) { + if (n==0) { + return; + } else if (n==1) { + p448_inverse(out,in); + return; + } + + p448_copy(&out[1], &in[0]); + int i; + for (i=1; i<(int) (n-1); i++) { + p448_mul(&out[i+1], &out[i], &in[i]); + } + p448_mul(&out[0], &out[n-1], &in[n-1]); + + struct p448_t tmp; + p448_inverse(&tmp, &out[0]); + p448_copy(&out[0], &tmp); + + /* at this point, out[0] = product(in[i]) ^ -1 + * out[i] = product(in[0]..in[i-1]) if i != 0 + */ + for (i=n-1; i>0; i--) { + p448_mul(&tmp, &out[i], &out[0]); + p448_copy(&out[i], &tmp); + + p448_mul(&tmp, &out[0], &in[i]); + p448_copy(&out[0], &tmp); + } +} diff --git a/src/arch_ref64/p448.h b/src/arch_ref64/p448.h new file mode 100644 index 0000000..431c04f --- /dev/null +++ b/src/arch_ref64/p448.h @@ -0,0 +1,373 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ +#ifndef __P448_H__ +#define __P448_H__ 1 + +#include +#include +#include + +#include "word.h" + +typedef struct p448_t { + uint64_t limb[8]; +} __attribute__((aligned(32))) p448_t; + +#ifdef __cplusplus +extern "C" { +#endif + +static __inline__ void +p448_set_ui ( + p448_t *out, + uint64_t x +) __attribute__((unused)); + +static __inline__ void +p448_cond_swap ( + p448_t *a, + p448_t *b, + mask_t do_swap +) __attribute__((unused)); + +static __inline__ void +p448_add ( + p448_t *out, + const p448_t *a, + const p448_t *b +) __attribute__((unused)); + +static __inline__ void +p448_sub ( + p448_t *out, + const p448_t *a, + const p448_t *b +) __attribute__((unused)); + +static __inline__ void +p448_neg ( + p448_t *out, + const p448_t *a +) __attribute__((unused)); + +static __inline__ void +p448_cond_neg ( + p448_t *a, + mask_t doNegate +) __attribute__((unused)); + +static __inline__ void +p448_addw ( + p448_t *a, + uint64_t x +) __attribute__((unused)); + +static __inline__ void +p448_subw ( + p448_t *a, + uint64_t x +) __attribute__((unused)); + +static __inline__ void +p448_copy ( + p448_t *out, + const p448_t *a +) __attribute__((unused)); + +static __inline__ void +p448_weak_reduce ( + p448_t *inout +) __attribute__((unused)); + +void +p448_strong_reduce ( + p448_t *inout +); + +mask_t +p448_is_zero ( + const p448_t *in +); + +static __inline__ void +p448_bias ( + p448_t *inout, + int amount +) __attribute__((unused)); + +static __inline__ void +p448_really_bias ( + p448_t *inout, + int amount +) __attribute__((unused)); + +void +p448_mul ( + p448_t *__restrict__ out, + const p448_t *a, + const p448_t *b +); + +void +p448_mulw ( + p448_t *__restrict__ out, + const p448_t *a, + uint64_t b +); + +void +p448_sqr ( + p448_t *__restrict__ out, + const p448_t *a +); + +static __inline__ void +p448_sqrn ( + p448_t *__restrict__ y, + const p448_t *x, + int n +) __attribute__((unused)); + +void +p448_serialize ( + uint8_t *serial, + const struct p448_t *x +); + +mask_t +p448_deserialize ( + p448_t *x, + const uint8_t serial[56] +); + +static __inline__ void +p448_mask( + struct p448_t *a, + const struct p448_t *b, + mask_t mask +) __attribute__((unused)); + +/** +* Returns 1/x. +* +* If x=0, returns 0. +*/ +void +p448_inverse ( + struct p448_t* a, + const struct p448_t* x +); + +void +simultaneous_invert_p448 ( + struct p448_t *__restrict__ out, + const struct p448_t *in, + unsigned int n +); + +static inline mask_t +p448_eq ( + const struct p448_t *a, + const struct p448_t *b +) __attribute__((always_inline,unused)); + +/* -------------- Inline functions begin here -------------- */ + +void +p448_set_ui ( + p448_t *out, + uint64_t x +) { + int i; + out->limb[0] = x; + for (i=1; i<8; i++) { + out->limb[i] = 0; + } +} + +void +p448_cond_swap ( + p448_t *a, + p448_t *b, + mask_t doswap +) { + unsigned int i; + for (i=0; i<8; i++) { + uint64_t x = doswap & (a->limb[i]^b->limb[i]); + a->limb[i] ^= x; + b->limb[i] ^= x; + } +} + +void +p448_add ( + p448_t *out, + const p448_t *a, + const p448_t *b +) { + unsigned int i; + for (i=0; i<8; i++) { + out->limb[i] = a->limb[i] + b->limb[i]; + } + p448_weak_reduce(out); +} + +void +p448_sub ( + p448_t *out, + const p448_t *a, + const p448_t *b +) { + unsigned int i; + uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; + for (i=0; i<8; i++) { + out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1); + } + p448_weak_reduce(out); +} + +void +p448_neg ( + struct p448_t *out, + const p448_t *a +) { + unsigned int i; + uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; + for (i=0; i<8; i++) { + out->limb[i] = ((i==4) ? co2 : co1) - a->limb[i]; + } + p448_weak_reduce(out); +} + +void +p448_cond_neg( + struct p448_t *a, + mask_t doNegate +) { + unsigned int i; + struct p448_t negated; + + p448_neg(&negated, a); + p448_bias(&negated, 2); + + for (i=0; i<8; i++) { + a->limb[i] = ( a->limb[i] & ~doNegate ) + | ( negated.limb[i] & doNegate ); + } +} + +void +p448_addw ( + p448_t *a, + uint64_t x +) { + a->limb[0] += x; + a->limb[1] += a->limb[0]>>56; + a->limb[0] &= (1ull<<56)-1; +} + +void +p448_subw ( + p448_t *a, + uint64_t x +) { + a->limb[0] -= x; + p448_really_bias(a, 1); + p448_weak_reduce(a); +} + +void +p448_copy ( + p448_t *out, + const p448_t *a +) { + memcpy(out,a,sizeof(*a)); +} + +void +p448_really_bias ( + p448_t *a, + int amt +) { + uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; + int i; + for (i=0; i<8; i++) { + a->limb[i] += (i==4) ? co2 : co1; + } +} + +void +p448_bias ( + p448_t *a, + int amt +) { + (void) a; + (void) amt; +} + +void +p448_weak_reduce ( + p448_t *a +) { + uint64_t mask = (1ull<<56) - 1; + uint64_t tmp = a->limb[7] >> 56; + int i; + a->limb[4] += tmp; + for (i=7; i>0; i--) { + a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56); + } + a->limb[0] = (a->limb[0] & mask) + tmp; +} + +void +p448_sqrn ( + p448_t *__restrict__ y, + const p448_t *x, + int n +) { + p448_t tmp; + assert(n>0); + if (n&1) { + p448_sqr(y,x); + n--; + } else { + p448_sqr(&tmp,x); + p448_sqr(y,&tmp); + n-=2; + } + for (; n; n-=2) { + p448_sqr(&tmp,y); + p448_sqr(y,&tmp); + } +} + +mask_t +p448_eq ( + const struct p448_t *a, + const struct p448_t *b +) { + struct p448_t ra, rb; + p448_copy(&ra, a); + p448_copy(&rb, b); + p448_sub(&ra, &ra, &rb); + return p448_is_zero(&ra); +} + +void +p448_mask ( + struct p448_t *a, + const struct p448_t *b, + mask_t mask +) { + unsigned int i; + for (i=0; i<8; i++) { + a->limb[i] = b->limb[i] & mask; + } +} + +#ifdef __cplusplus +}; /* extern "C" */ +#endif + +#endif /* __P448_H__ */ diff --git a/src/goldilocks.c b/src/goldilocks.c index 94ba665..11ccdfb 100644 --- a/src/goldilocks.c +++ b/src/goldilocks.c @@ -442,11 +442,8 @@ goldilocks_verify ( goldilocks_global.wnafs, WNAF_PRECMP_BITS ); untwist_and_double_and_serialize( &pk, &pk_text ); - field_sub(&eph, &eph, &pk); - field_bias(&eph, 2); - - succ = field_is_zero(&eph); - + + succ = field_eq(&eph, &pk); return succ ? 0 : GOLDI_EINVAL; } #endif @@ -533,11 +530,8 @@ goldilocks_verify_precomputed ( if (!succ) return GOLDI_EINVAL; untwist_and_double_and_serialize( &pk, &pk_text ); - field_sub(&eph, &eph, &pk); - field_bias(&eph, 2); - - succ = field_is_zero(&eph); - + + succ = field_eq(&eph, &pk); return succ ? 0 : GOLDI_EINVAL; } diff --git a/src/include/field.h b/src/include/field.h index 6231aba..6e1eb96 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -24,6 +24,7 @@ #define field_cond_neg p448_cond_neg #define field_serialize p448_serialize #define field_deserialize p448_deserialize +#define field_eq p448_eq #define field_is_zero p448_is_zero #define simultaneous_invert simultaneous_invert_p448 /* FUTURE: consistency */