From 942066a16dcb76ae01c5d1187f033022c5e28ae8 Mon Sep 17 00:00:00 2001
From: Michael Hamburg <mike@shiftleft.org>
Date: Thu, 22 Jan 2015 14:37:45 -0800
Subject: [PATCH] remove probably-unnecessary optimizations (still needs
 benching to make sure)

---
 src/arithmetic.c                       |   5 +-
 src/ec_point.c                         | 398 +++++++++----------------
 src/include/field.h                    | 123 +++++++-
 src/p448/arch_32/p448.h                |  12 +-
 src/p448/arch_arm_32/p448.h            |  12 +-
 src/p448/arch_neon/p448.h              |  12 +-
 src/p448/arch_neon_experimental/p448.h |  12 +-
 src/p448/arch_ref64/p448.h             |  12 +-
 src/p448/arch_x86_64/p448.h            |  12 +-
 src/p448/f_field.h                     |  11 +-
 src/p480/arch_x86_64/p480.h            |  12 +-
 src/p480/f_field.h                     |  11 +-
 src/p521/arch_ref64/p521.h             |  12 +-
 src/p521/arch_x86_64_r12/p521.h        |  12 +-
 src/p521/f_field.h                     |   9 +-
 test/bench.c                           |   5 -
 test/test_arithmetic.c                 |  10 +-
 17 files changed, 321 insertions(+), 359 deletions(-)

diff --git a/src/arithmetic.c b/src/arithmetic.c
index add3b49..4530aa3 100644
--- a/src/arithmetic.c
+++ b/src/arithmetic.c
@@ -21,7 +21,7 @@ field_eq (
     field_copy(&rb, b);
     field_weak_reduce(&ra);
     field_weak_reduce(&rb);
-    field_sub(&ra, &ra, &rb);
+    field_sub_RAW(&ra, &ra, &rb);
     field_bias(&ra, 2);
     return field_is_zero(&ra);
 }
@@ -47,8 +47,7 @@ field_is_square (
     field_isr  (   &L0,     x );
     field_sqr  (   &L1,   &L0 );
     field_mul  (   &L0,     x,   &L1 );
-    field_subw (   &L0,     1 );
-    field_bias (   &L0,     1 );
+    field_subw(   &L0,     1 );
        L3 = field_is_zero(   &L0 );
        L2 = field_is_zero(     x );
     return    L3 |    L2;
diff --git a/src/ec_point.c b/src/ec_point.c
index a625641..a486df1 100644
--- a/src/ec_point.c
+++ b/src/ec_point.c
@@ -12,84 +12,23 @@
 #include "ec_point.h"
 #include "magic.h"
 
-#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448)
-/* TODO XXX PERF FIXME: better detection of overflow conditions */
-
-/* I wanted to just use if (is32)
- * But clang's -Wunreachable-code flags it.
- * I wanted to keep that warning on.
- */
-#if (is32)
-#define IF32(s) (s)
-#else
-#define IF32(s)
-#endif
-
-/* Multiply by signed curve constant */
-static __inline__ void
-field_mulw_scc (
-    struct field_t* __restrict__ out,
-    const struct field_t *a,
-    int64_t scc
-) {
-    if (scc >= 0) {
-        field_mulw(out, a, scc);
-    } else {
-        field_mulw(out, a, -scc);
-        field_neg(out,out);
-        field_bias(out,2);
-    }
-}
-
-/* Multiply by signed curve constant and weak reduce if biased */
-static __inline__ void
-field_mulw_scc_wr (
-    struct field_t* __restrict__ out,
-    const struct field_t *a,
-    int64_t scc
-) {
-    field_mulw_scc(out, a, scc);
-    if (scc < 0)
-        field_weak_reduce(out);
-}
-
-static __inline__ void
-field_subx (
-    struct field_t *d,
-    const struct field_t *a,
-    const struct field_t *b
-) {
-    field_sub ( d, a, b );
-    field_bias( d, 2 );
-    IF32( field_weak_reduce ( d ) );
-}
-
-static __inline__ void
-field_negx (
-    struct field_t *d,
-    const struct field_t *a
-) {
-    field_neg ( d, a );
-    field_bias( d, 2 );
-    IF32( field_weak_reduce ( d ) );
-}
-
 void
 add_tw_niels_to_tw_extensible (
     struct tw_extensible_t*  d,
     const struct tw_niels_t* e
 ) {
+    ANALYZE_THIS_ROUTINE_CAREFULLY;
     struct field_t L0, L1;
-    field_subx (   &L1, &d->y, &d->x );
+    field_sub (   &L1, &d->y, &d->x );
     field_mul  (   &L0, &e->a,   &L1 );
-    field_add  (   &L1, &d->x, &d->y );
+    field_add_nr  (   &L1, &d->x, &d->y );
     field_mul  ( &d->y, &e->b,   &L1 );
     field_mul  (   &L1, &d->u, &d->t );
     field_mul  ( &d->x, &e->c,   &L1 );
-    field_add  ( &d->u,   &L0, &d->y );
-    field_subx ( &d->t, &d->y,   &L0 );
-    field_subx ( &d->y, &d->z, &d->x );
-    field_add  (   &L0, &d->x, &d->z );
+    field_add_nr  ( &d->u,   &L0, &d->y );
+    field_subx_nr ( &d->t, &d->y,   &L0 );
+    field_subx_nr ( &d->y, &d->z, &d->x );
+    field_add_nr  (   &L0, &d->x, &d->z );
     field_mul  ( &d->z,   &L0, &d->y );
     field_mul  ( &d->x, &d->y, &d->t );
     field_mul  ( &d->y,   &L0, &d->u );
@@ -100,17 +39,18 @@ sub_tw_niels_from_tw_extensible (
     struct tw_extensible_t*  d,
     const struct tw_niels_t* e
 ) {
+    ANALYZE_THIS_ROUTINE_CAREFULLY;
     struct field_t L0, L1;
-    field_subx (   &L1, &d->y, &d->x );
+    field_subx_nr (   &L1, &d->y, &d->x );
     field_mul  (   &L0, &e->b,   &L1 );
-    field_add  (   &L1, &d->x, &d->y );
+    field_add_nr  (   &L1, &d->x, &d->y );
     field_mul  ( &d->y, &e->a,   &L1 );
     field_mul  (   &L1, &d->u, &d->t );
     field_mul  ( &d->x, &e->c,   &L1 );
-    field_add  ( &d->u,   &L0, &d->y );
-    field_subx ( &d->t, &d->y,   &L0 );
-    field_add  ( &d->y, &d->x, &d->z );
-    field_subx (   &L0, &d->z, &d->x );
+    field_add_nr  ( &d->u,   &L0, &d->y );
+    field_subx_nr ( &d->t, &d->y,   &L0 );
+    field_add_nr  ( &d->y, &d->x, &d->z );
+    field_subx_nr (   &L0, &d->z, &d->x );
     field_mul  ( &d->z,   &L0, &d->y );
     field_mul  ( &d->x, &d->y, &d->t );
     field_mul  ( &d->y,   &L0, &d->u );
@@ -142,20 +82,21 @@ void
 double_tw_extensible (
     struct tw_extensible_t* a
 ) {
+    ANALYZE_THIS_ROUTINE_CAREFULLY;
     struct field_t L0, L1, L2;
     field_sqr  (   &L2, &a->x );
     field_sqr  (   &L0, &a->y );
-    field_add  ( &a->u,   &L2,   &L0 );
-    field_add  ( &a->t, &a->y, &a->x );
+    field_add_nr  ( &a->u,   &L2,   &L0 );
+    field_add_nr  ( &a->t, &a->y, &a->x );
     field_sqr  (   &L1, &a->t );
-    field_sub  ( &a->t,   &L1, &a->u );
+    field_sub_nr  ( &a->t,   &L1, &a->u );
     field_bias ( &a->t,     3 );
     IF32( field_weak_reduce( &a->t ) );
-    field_subx (   &L1,   &L0,   &L2 );
+    field_subx_nr (   &L1,   &L0,   &L2 );
     field_sqr  ( &a->x, &a->z );
     field_bias ( &a->x,     2-is32 /*is32 ? 1 : 2*/ );
-    field_add  ( &a->z, &a->x, &a->x );
-    field_sub  (   &L0, &a->z,   &L1 );
+    field_add_nr  ( &a->z, &a->x, &a->x );
+    field_sub_nr  (   &L0, &a->z,   &L1 );
     IF32( field_weak_reduce(   &L0 ) );
     field_mul  ( &a->z,   &L1,   &L0 );
     field_mul  ( &a->x,   &L0, &a->t );
@@ -166,20 +107,21 @@ void
 double_extensible (
     struct extensible_t* a
 ) {
+    ANALYZE_THIS_ROUTINE_CAREFULLY;
     struct field_t L0, L1, L2;
     field_sqr  (   &L2, &a->x );
     field_sqr  (   &L0, &a->y );
-    field_add  (   &L1,   &L2,   &L0 );
-    field_add  ( &a->t, &a->y, &a->x );
+    field_add_nr  (   &L1,   &L2,   &L0 );
+    field_add_nr  ( &a->t, &a->y, &a->x );
     field_sqr  ( &a->u, &a->t );
-    field_sub  ( &a->t, &a->u,   &L1 );
+    field_sub_nr  ( &a->t, &a->u,   &L1 );
     field_bias ( &a->t,     3 );
     IF32( field_weak_reduce( &a->t ) );
-    field_subx ( &a->u,   &L0,   &L2 );
+    field_subx_nr ( &a->u,   &L0,   &L2 );
     field_sqr  ( &a->x, &a->z );
     field_bias ( &a->x,     2 );
-    field_add  ( &a->z, &a->x, &a->x );
-    field_sub  (   &L0, &a->z,   &L1 );
+    field_add_nr  ( &a->z, &a->x, &a->x );
+    field_sub_nr  (   &L0, &a->z,   &L1 );
     IF32( field_weak_reduce(   &L0 ) );
     field_mul  ( &a->z,   &L1,   &L0 );
     field_mul  ( &a->x,   &L0, &a->t );
@@ -194,18 +136,14 @@ twist_and_double (
     struct field_t L0;
     field_sqr  ( &b->x, &a->x );
     field_sqr  ( &b->z, &a->y );
-    field_add  ( &b->u, &b->x, &b->z );
-    field_add  ( &b->t, &a->y, &a->x );
+    field_add ( &b->u, &b->x, &b->z );
+    field_add ( &b->t, &a->y, &a->x );
     field_sqr  (   &L0, &b->t );
-    field_sub  ( &b->t,   &L0, &b->u );
-    field_bias ( &b->t,     3 );
-    IF32( field_weak_reduce( &b->t ) );
-    field_subx (   &L0, &b->z, &b->x );
+    field_sub ( &b->t,   &L0, &b->u );
+    field_sub (   &L0, &b->z, &b->x );
     field_sqr  ( &b->x, &a->z );
-    field_bias ( &b->x,     2 );
-    field_add  ( &b->z, &b->x, &b->x );
-    field_sub  ( &b->y, &b->z, &b->u );
-    IF32( field_weak_reduce( &b->y ) );
+    field_add ( &b->z, &b->x, &b->x );
+    field_sub ( &b->y, &b->z, &b->u );
     field_mul  ( &b->z,   &L0, &b->y );
     field_mul  ( &b->x, &b->y, &b->t );
     field_mul  ( &b->y,   &L0, &b->u );
@@ -219,18 +157,14 @@ untwist_and_double (
     struct field_t L0;
     field_sqr  ( &b->x, &a->x );
     field_sqr  ( &b->z, &a->y );
-    field_add  (   &L0, &b->x, &b->z );
-    field_add  ( &b->t, &a->y, &a->x );
+    field_add (   &L0, &b->x, &b->z );
+    field_add ( &b->t, &a->y, &a->x );
     field_sqr  ( &b->u, &b->t );
-    field_sub  ( &b->t, &b->u,   &L0 );
-    field_bias ( &b->t,     3 );
-    IF32( field_weak_reduce( &b->t ) );
-    field_subx ( &b->u, &b->z, &b->x );
+    field_sub ( &b->t, &b->u,   &L0 );
+    field_sub ( &b->u, &b->z, &b->x );
     field_sqr  ( &b->x, &a->z );
-    field_bias ( &b->x,     2-is32 /*is32 ? 1 : 2*/ );
-    field_add  ( &b->z, &b->x, &b->x );
-    field_sub  ( &b->y, &b->z, &b->u );
-    IF32( field_weak_reduce( &b->y ) );
+    field_add ( &b->z, &b->x, &b->x );
+    field_sub ( &b->y, &b->z, &b->u );
     field_mul  ( &b->z,   &L0, &b->y );
     field_mul  ( &b->x, &b->y, &b->t );
     field_mul  ( &b->y,   &L0, &b->u );
@@ -241,11 +175,8 @@ convert_tw_affine_to_tw_pniels (
     struct tw_pniels_t*       b,
     const struct tw_affine_t* a
 ) {
-    field_sub  ( &b->n.a, &a->y, &a->x );
-    field_bias ( &b->n.a,     2 );
-    field_weak_reduce( &b->n.a );
-    field_add  ( &b->n.b, &a->x, &a->y );
-    field_weak_reduce( &b->n.b );
+    field_sub ( &b->n.a, &a->y, &a->x );
+    field_add ( &b->n.b, &a->x, &a->y );
     field_mul  ( &b->z, &a->y, &a->x );
     field_mulw_scc_wr ( &b->n.c, &b->z, 2*EDWARDS_D-2 );
     field_set_ui( &b->z,     2 );
@@ -280,15 +211,11 @@ convert_tw_extensible_to_tw_pniels (
     struct tw_pniels_t*           b,
     const struct tw_extensible_t* a
 ) {
-    field_sub  ( &b->n.a, &a->y, &a->x );
-    field_bias ( &b->n.a,     2 );
-    field_weak_reduce( &b->n.a );
-    field_add  ( &b->n.b, &a->x, &a->y );
-    field_weak_reduce( &b->n.b );
+    field_sub ( &b->n.a, &a->y, &a->x );
+    field_add ( &b->n.b, &a->x, &a->y );
     field_mul  ( &b->z, &a->u, &a->t );
     field_mulw_scc_wr ( &b->n.c, &b->z, 2*EDWARDS_D-2 );
-    field_add  ( &b->z, &a->z, &a->z );
-    field_weak_reduce( &b->z );
+    field_add ( &b->z, &a->z, &a->z );
 }
 
 void
@@ -296,8 +223,8 @@ convert_tw_pniels_to_tw_extensible (
     struct tw_extensible_t*   e,
     const struct tw_pniels_t* d
 ) {
-    field_add  ( &e->u, &d->n.b, &d->n.a );
-    field_subx ( &e->t, &d->n.b, &d->n.a );
+    field_add ( &e->u, &d->n.b, &d->n.a );
+    field_sub ( &e->t, &d->n.b, &d->n.a );
     field_mul  ( &e->x, &d->z, &e->t );
     field_mul  ( &e->y, &d->z, &e->u );
     field_sqr  ( &e->z, &d->z );
@@ -308,11 +235,8 @@ convert_tw_niels_to_tw_extensible (
     struct tw_extensible_t*  e,
     const struct tw_niels_t* d
 ) {
-    field_add  ( &e->y, &d->b, &d->a );
-    field_weak_reduce( &e->y );
-    field_sub  ( &e->x, &d->b, &d->a );
-    field_bias ( &e->x,     2 );
-    field_weak_reduce( &e->x );
+    field_add ( &e->y, &d->b, &d->a );
+    field_sub ( &e->x, &d->b, &d->a );
     field_set_ui( &e->z,     1 );
     field_copy ( &e->t, &e->x );
     field_copy ( &e->u, &e->y );
@@ -322,24 +246,25 @@ void
 montgomery_step (
     struct montgomery_t* a
 ) {
+    ANALYZE_THIS_ROUTINE_CAREFULLY;
     struct field_t L0, L1;
-    field_add  (   &L0, &a->zd, &a->xd );
-    field_subx (   &L1, &a->xd, &a->zd );
-    field_subx ( &a->zd, &a->xa, &a->za );
+    field_add_nr  (   &L0, &a->zd, &a->xd );
+    field_sub (   &L1, &a->xd, &a->zd );
+    field_sub ( &a->zd, &a->xa, &a->za );
     field_mul  ( &a->xd,   &L0, &a->zd );
-    field_add  ( &a->zd, &a->za, &a->xa );
+    field_add_nr  ( &a->zd, &a->za, &a->xa );
     field_mul  ( &a->za,   &L1, &a->zd );
-    field_add  ( &a->xa, &a->za, &a->xd );
+    field_add_nr  ( &a->xa, &a->za, &a->xd );
     field_sqr  ( &a->zd, &a->xa );
     field_mul  ( &a->xa, &a->z0, &a->zd );
-    field_subx ( &a->zd, &a->xd, &a->za );
+    field_sub ( &a->zd, &a->xd, &a->za );
     field_sqr  ( &a->za, &a->zd );
     field_sqr  ( &a->xd,   &L0 );
     field_sqr  (   &L0,   &L1 );
     field_mulw_scc ( &a->zd, &a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */
-    field_subx (   &L1, &a->xd,   &L0 );
+    field_sub (   &L1, &a->xd,   &L0 );
     field_mul  ( &a->xd,   &L0, &a->zd );
-    field_sub  (   &L0, &a->zd,   &L1 );
+    field_sub_nr  (   &L0, &a->zd,   &L1 );
     field_bias (   &L0,     4 - 2*is32 /*is32 ? 2 : 4*/ );
     IF32( field_weak_reduce(   &L0 ) );
     field_mul  ( &a->zd,   &L0,   &L1 );
@@ -366,27 +291,26 @@ serialize_montgomery (
     mask_t L4, L5, L6;
     struct field_t L0, L1, L2, L3;
     field_mul  (   &L3, &a->z0, &a->zd );
-    field_subx (   &L1,   &L3, &a->xd );
+    field_sub (   &L1,   &L3, &a->xd );
     field_mul  (   &L3, &a->za,   &L1 );
     field_mul  (   &L2, &a->z0, &a->xd );
-    field_subx (   &L1,   &L2, &a->zd );
+    field_sub (   &L1,   &L2, &a->zd );
     field_mul  (   &L0, &a->xa,   &L1 );
-    field_add  (   &L2,   &L0,   &L3 );
-    field_subx (   &L1,   &L3,   &L0 );
+    field_add (   &L2,   &L0,   &L3 );
+    field_sub (   &L1,   &L3,   &L0 );
     field_mul  (   &L3,   &L1,   &L2 );
     field_copy (   &L2, &a->z0 );
     field_addw (   &L2,     1 );
     field_sqr  (   &L0,   &L2 );
     field_mulw_scc_wr (   &L1,   &L0, EDWARDS_D-1 );
-    field_add  (   &L2, &a->z0, &a->z0 );
-    field_add  (   &L0,   &L2,   &L2 );
-    field_add  (   &L2,   &L0,   &L1 );
-    IF32( field_weak_reduce(   &L2 ) );
+    field_add (   &L2, &a->z0, &a->z0 );
+    field_add (   &L0,   &L2,   &L2 );
+    field_add (   &L2,   &L0,   &L1 );
     field_mul  (   &L0, &a->xd,   &L2 );
        L5 = field_is_zero( &a->zd );
        L6 = -   L5;
     constant_time_mask (   &L1,   &L0, sizeof(L1), L5 );
-    field_add  (   &L2,   &L1, &a->zd );
+    field_add (   &L2,   &L1, &a->zd );
        L4 = ~   L5;
     field_mul  (   &L1,   sbz,   &L3 );
     field_addw (   &L1,    L6 );
@@ -399,8 +323,7 @@ serialize_montgomery (
     field_sqr  (   &L1,   &L0 );
     field_mul  (   &L0,   &L3,   &L1 );
     constant_time_mask (     b,   &L2, sizeof(L1), L4 );
-    field_subw (   &L0,     1 );
-    field_bias (   &L0,     1 );
+    field_subw(   &L0,     1 );
        L5 = field_is_zero(   &L0 );
        L4 = field_is_zero(   sbz );
     return    L5 |    L4;
@@ -412,8 +335,8 @@ serialize_extensible (
     const struct extensible_t* a
 ) {
     struct field_t L0, L1, L2;
-    field_subx (   &L0, &a->y, &a->z );
-    field_add  (     b, &a->z, &a->y );
+    field_sub (   &L0, &a->y, &a->z );
+    field_add (     b, &a->z, &a->y );
     field_mul  (   &L1, &a->z, &a->x );
     field_mul  (   &L2,   &L0,   &L1 );
     field_mul  (   &L1,   &L2,   &L0 );
@@ -432,15 +355,13 @@ untwist_and_double_and_serialize (
 ) {
     struct field_t L0, L1, L2, L3;
     field_mul  (   &L3, &a->y, &a->x );
-    field_add  (     b, &a->y, &a->x );
+    field_add (     b, &a->y, &a->x );
     field_sqr  (   &L1,     b );
-    field_add  (   &L2,   &L3,   &L3 );
-    field_sub  (     b,   &L1,   &L2 );
-    field_bias (     b,     3 );
-    IF32( field_weak_reduce(     b ) );
+    field_add (   &L2,   &L3,   &L3 );
+    field_sub (     b,   &L1,   &L2 );
     field_sqr  (   &L2, &a->z );
     field_sqr  (   &L1,   &L2 );
-    field_add  (   b,     b,     b );
+    field_add (   b,     b,     b );
     field_mulw_scc (     &L2,   b, EDWARDS_D-1 );
     field_mulw_scc (   b,   &L2, EDWARDS_D-1 );
     field_mul  (   &L0,   &L2,   &L1 );
@@ -457,13 +378,12 @@ twist_even (
     struct tw_extensible_t*    b,
     const struct extensible_t* a
 ) {
-    mask_t L0, L1;
     field_sqr  ( &b->y, &a->z );
     field_sqr  ( &b->z, &a->x );
-    field_subx ( &b->u, &b->y, &b->z );
-    field_subx ( &b->z, &a->z, &a->x );
+    field_sub ( &b->u, &b->y, &b->z );
+    field_sub ( &b->z, &a->z, &a->x );
     field_mul  ( &b->y, &b->z, &a->y );
-    field_subx ( &b->z, &a->z, &a->y );
+    field_sub ( &b->z, &a->z, &a->y );
     field_mul  ( &b->x, &b->z, &b->y );
     field_mul  ( &b->t, &b->x, &b->u );
     field_mul  ( &b->y, &b->x, &b->t );
@@ -473,10 +393,7 @@ twist_even (
     field_mul  ( &b->t, &b->y, &b->x );
     field_mul  ( &b->x, &a->x, &b->u );
     field_mul  ( &b->y, &a->y, &b->u );
-       L1 = field_is_zero( &b->z );
-       L0 = -   L1;
-    field_addw ( &b->y,    L0 );
-    field_weak_reduce( &b->y );
+    field_addw ( &b->y,    -field_is_zero( &b->z ) );
     field_set_ui( &b->z,     1 );
     field_copy ( &b->t, &b->x );
     field_copy ( &b->u, &b->y );
@@ -487,18 +404,15 @@ test_only_twist (
     struct tw_extensible_t*    b,
     const struct extensible_t* a
 ) {
-    mask_t L2, L3;
     struct field_t L0, L1;
     field_sqr  ( &b->u, &a->z );
     field_sqr  ( &b->y, &a->x );
-    field_sub  ( &b->z, &b->u, &b->y );
-    field_bias ( &b->z,     2 );
-    field_add  ( &b->y, &b->z, &b->z );
-    field_add  ( &b->u, &b->y, &b->y );
-    IF32( field_weak_reduce( &b->u ) );
-    field_subx ( &b->y, &a->z, &a->x );
+    field_sub ( &b->z, &b->u, &b->y );
+    field_add ( &b->y, &b->z, &b->z );
+    field_add ( &b->u, &b->y, &b->y );
+    field_sub ( &b->y, &a->z, &a->x );
     field_mul  ( &b->x, &b->y, &a->y );
-    field_subx ( &b->z, &a->z, &a->y );
+    field_sub ( &b->z, &a->z, &a->y );
     field_mul  ( &b->t, &b->z, &b->x );
     field_mul  (   &L1, &b->t, &b->u );
     field_mul  ( &b->x, &b->t,   &L1 );
@@ -506,25 +420,16 @@ test_only_twist (
     field_mul  ( &b->u, &b->t,   &L0 );
     field_sqr  (   &L1,   &L0 );
     field_mul  ( &b->t, &b->x,   &L1 );
-    field_add  (   &L1, &a->y, &a->x );
-    IF32( field_weak_reduce(   &L1 ) );
-    field_subx (   &L0, &a->x, &a->y );
+    field_add (   &L1, &a->y, &a->x );
+    field_sub (   &L0, &a->x, &a->y );
     field_mul  ( &b->x, &b->t,   &L0 );
-    field_add  (   &L0, &b->x,   &L1 );
-    field_subx ( &b->t,   &L1, &b->x );
+    field_add (   &L0, &b->x,   &L1 );
+    field_sub ( &b->t,   &L1, &b->x );
     field_mul  ( &b->x,   &L0, &b->u );
-       L2 = field_is_zero( &b->y );
-       L3 = -   L2;
-    field_addw ( &b->x,    L3 );
-    field_weak_reduce( &b->x );
+    field_addw ( &b->x, -field_is_zero( &b->y ) );
     field_mul  ( &b->y, &b->t, &b->u );
-       L2 = field_is_zero( &b->z );
-       L3 = -   L2;
-    field_addw ( &b->y,    L3 );
-    field_weak_reduce( &b->y );
-       L3 = field_is_zero( &a->y );
-       L2 =    L3 +     1;
-    field_set_ui( &b->z,    L2 );
+    field_addw ( &b->y, -field_is_zero( &b->z ) );
+    field_set_ui( &b->z, 1+field_is_zero( &a->y ) );
     field_copy ( &b->t, &b->x );
     field_copy ( &b->u, &b->y );
 }
@@ -536,7 +441,7 @@ is_even_pt (
     struct field_t L0, L1, L2;
     field_sqr  (   &L2, &a->z );
     field_sqr  (   &L1, &a->x );
-    field_subx (   &L0,   &L2,   &L1 );
+    field_sub (   &L0,   &L2,   &L1 );
     return field_is_square (   &L0 );
 }
 
@@ -547,8 +452,7 @@ is_even_tw (
     struct field_t L0, L1, L2;
     field_sqr  (   &L2, &a->z );
     field_sqr  (   &L1, &a->x );
-    field_add  (   &L0,   &L1,   &L2 );
-    field_weak_reduce(   &L0 );
+    field_add (   &L0,   &L1,   &L2 );
     return field_is_square (   &L0 );
 }
 
@@ -563,13 +467,12 @@ deserialize_affine (
     field_addw (   &L3,     1 );
     field_sqr  (   &L2,   &L3 );
     field_mulw_scc (   &a->x,   &L2, EDWARDS_D-1 ); /* PERF MULW */
-    field_add  (   &L3,   &L1,   &L1 ); /* FIXME: i adjusted the bias here, was it right? */
-    field_add  ( &a->y,   &L3,   &L3 );
-    field_add  (   &L3, &a->y, &a->x );
-    IF32( field_weak_reduce(   &L3 ) );
+    field_add (   &L3,   &L1,   &L1 ); /* FIXME: i adjusted the bias here, was it right? */
+    field_add ( &a->y,   &L3,   &L3 );
+    field_add (   &L3, &a->y, &a->x );
     field_copy ( &a->y,   &L1 );
-    field_subw ( &a->y,     1 );
     field_negx ( &a->x, &a->y );
+    field_addw ( &a->x,     1 );
     field_mul  ( &a->y, &a->x,   &L3 );
     field_sqr  (   &L2, &a->x );
     field_mul  (   &L0,   &L2, &a->y );
@@ -579,12 +482,11 @@ deserialize_affine (
     field_sqr  (   &L2,   &L3 );
     field_mul  (   &L3,   &L0,   &L2 );
     field_mul  (   &L0, &a->x,   &L3 );
-    field_add  (   &L2, &a->y, &a->y );
+    field_add (   &L2, &a->y, &a->y );
     field_mul  ( &a->x,    sz,   &L2 );
     field_addw (   &L1,     1 );
     field_mul  ( &a->y,   &L1,   &L3 );
-    field_subw (   &L0,     1 );
-    field_bias (   &L0,     1 );
+    field_subw(   &L0,     1 );
     return field_is_zero(   &L0 );
 }
 
@@ -600,13 +502,12 @@ deserialize_and_twist_approx (
     field_addw ( &a->y,     1 );
     field_sqr  ( &L0, &a->y );
     field_mulw_scc ( &a->x, &L0, EDWARDS_D-1 );
-    field_add  ( &a->y, &a->z, &a->z );
-    field_add  ( &a->u, &a->y, &a->y );
-    field_add  ( &a->y, &a->u, &a->x );
-    IF32( field_weak_reduce( &a->y ) );
+    field_add ( &a->y, &a->z, &a->z );
+    field_add ( &a->u, &a->y, &a->y );
+    field_add ( &a->y, &a->u, &a->x );
     field_sqr  ( &a->x, &a->z );
-    field_subw ( &a->x,     1 );
     field_negx ( &a->u, &a->x );
+    field_addw ( &a->u,     1 );
     field_mul  ( &a->x,  sdm1, &a->u );
     field_mul  (   &L0, &a->x, &a->y );
     field_mul  ( &a->t,   &L0, &a->y );
@@ -618,17 +519,16 @@ deserialize_and_twist_approx (
     field_sqr  (   &L1,   &L0 );
     field_mul  ( &a->u, &a->t,   &L1 );
     field_mul  ( &a->t, &a->x, &a->u );
-    field_add  ( &a->x,    sz,    sz );
+    field_add ( &a->x,    sz,    sz );
     field_mul  (   &L0, &a->u, &a->x );
     field_copy ( &a->x, &a->z );
-    field_subw ( &a->x,     1 );
     field_negx (   &L1, &a->x );
+    field_addw (   &L1,     1 );
     field_mul  ( &a->x,   &L1,   &L0 );
     field_mul  (   &L0, &a->u, &a->y );
     field_addw ( &a->z,     1 );
     field_mul  ( &a->y, &a->z,   &L0 );
-    field_subw ( &a->t,     1 );
-    field_bias ( &a->t,     1 );
+    field_subw( &a->t,     1 );
     mask_t ret = field_is_zero( &a->t );
     field_set_ui( &a->z,     1 );
     field_copy ( &a->t, &a->x );
@@ -673,11 +573,9 @@ eq_affine (
 ) {
     mask_t L1, L2;
     struct field_t L0;
-    field_sub  (   &L0, &a->x, &b->x );
-    field_bias (   &L0,     2 );
+    field_sub (   &L0, &a->x, &b->x );
        L2 = field_is_zero(   &L0 );
-    field_sub  (   &L0, &a->y, &b->y );
-    field_bias (   &L0,     2 );
+    field_sub (   &L0, &a->y, &b->y );
        L1 = field_is_zero(   &L0 );
     return    L2 &    L1;
 }
@@ -691,13 +589,11 @@ eq_extensible (
     struct field_t L0, L1, L2;
     field_mul  (   &L2, &b->z, &a->x );
     field_mul  (   &L1, &a->z, &b->x );
-    field_sub  (   &L0,   &L2,   &L1 );
-    field_bias (   &L0,     2 );
+    field_sub (   &L0,   &L2,   &L1 );
        L4 = field_is_zero(   &L0 );
     field_mul  (   &L2, &b->z, &a->y );
     field_mul  (   &L1, &a->z, &b->y );
-    field_sub  (   &L0,   &L2,   &L1 );
-    field_bias (   &L0,     2 );
+    field_sub (   &L0,   &L2,   &L1 );
        L3 = field_is_zero(   &L0 );
     return    L4 &    L3;
 }
@@ -711,13 +607,11 @@ eq_tw_extensible (
     struct field_t L0, L1, L2;
     field_mul  (   &L2, &b->z, &a->x );
     field_mul  (   &L1, &a->z, &b->x );
-    field_sub  (   &L0,   &L2,   &L1 );
-    field_bias (   &L0,     2 );
+    field_sub (   &L0,   &L2,   &L1 );
        L4 = field_is_zero(   &L0 );
     field_mul  (   &L2, &b->z, &a->y );
     field_mul  (   &L1, &a->z, &b->y );
-    field_sub  (   &L0,   &L2,   &L1 );
-    field_bias (   &L0,     2 );
+    field_sub (   &L0,   &L2,   &L1 );
        L3 = field_is_zero(   &L0 );
     return    L4 &    L3;
 }
@@ -727,22 +621,18 @@ elligator_2s_inject (
     struct affine_t*     a,
     const struct field_t* r
 ) {
-    mask_t L0, L1;
     struct field_t L2, L3, L4, L5, L6, L7, L8;
     field_sqr  ( &a->x,     r );
     field_sqr  (   &L3, &a->x );
     field_copy ( &a->y,   &L3 );
-    field_subw ( &a->y,     1 );
     field_negx (   &L4, &a->y );
+    field_addw (   &L4,     1 );
     field_sqr  (   &L2,   &L4 );
     field_mulw (   &L7,   &L2, (EDWARDS_D-1)*(EDWARDS_D-1) );
     field_mulw (   &L8,   &L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) );
-    field_add  ( &a->y,   &L8,   &L7 );
-    IF32( field_weak_reduce( &a->y ) );
+    field_add ( &a->y,   &L8,   &L7 );
     field_mulw (   &L8,   &L2, 4*(EDWARDS_D)*(EDWARDS_D-1) );
-    field_sub  (   &L7, &a->y,   &L8 );
-    field_bias (   &L7,     2 );
-    IF32( field_weak_reduce(   &L7 ) );
+    field_sub (   &L7, &a->y,   &L8 );
     field_mulw_scc (   &L6, &a->y, -2-2*EDWARDS_D );
     field_mul  (   &L5,   &L7,   &L6 );
         /* FIXME Stability problem (API stability, not crash) / possible bug.
@@ -769,27 +659,20 @@ elligator_2s_inject (
     field_mul  (   &L8,   &L7,   &L6 );
     field_mul  (   &L7,   &L8,   &L6 );
     field_copy (   &L6, &a->x );
-    field_subw (   &L6,     1 );
     field_addw ( &a->x,     1 );
     field_mul  (   &L5, &a->x,   &L8 );
-    field_sub  ( &a->x,   &L6,   &L5 );
-    field_bias ( &a->x,     3 );
-    IF32( field_weak_reduce( &a->x ) );
+    field_addw (   &L5,     1 );
+    field_sub ( &a->x,   &L6,   &L5 );
     field_mul  (   &L5,   &L4, &a->x );
     field_mulw_scc_wr (   &a->x,   &L5, -2-2*EDWARDS_D );
-    field_add  (   &L4,   &L3,   &L3 );
-    field_add  (   &L3,   &L4,   &L2 );
-    field_subw (   &L3,     2 );
-    field_bias (   &L3,     1 );
-    IF32( field_weak_reduce(   &L3 ) );
+    field_add (   &L4,   &L3,   &L3 );
+    field_add (   &L3,   &L4,   &L2 );
+    field_subw(   &L3,     2 );
     field_mul  (   &L2,   &L3,   &L8 );
     field_mulw (   &L3,   &L2, 2*(EDWARDS_D+1)*(EDWARDS_D-1) );
-    field_add  (   &L2,   &L3, &a->y );
+    field_add (   &L2,   &L3, &a->y );
     field_mul  ( &a->y,   &L7,   &L2 );
-       L1 = field_is_zero(   &L8 );
-       L0 = -   L1;
-    field_addw ( &a->y,    L0 );
-    field_weak_reduce( &a->y );
+    field_addw ( &a->y,    -field_is_zero( &L8 ) );
 }
 
 mask_t
@@ -799,12 +682,11 @@ validate_affine (
     struct field_t L0, L1, L2, L3;
     field_sqr  (   &L0, &a->y );
     field_sqr  (   &L1, &a->x );
-    field_add  (   &L3,   &L1,   &L0 );
-    field_subw (   &L3,     1 );
+    field_add (   &L3,   &L1,   &L0 );
     field_mulw_scc (   &L2,   &L1, EDWARDS_D );
     field_mul  (   &L1,   &L0,   &L2 );
-    field_sub  (   &L0,   &L3,   &L1 );
-    field_bias (   &L0,     3 );
+    field_addw (   &L1,     1 );
+    field_sub (   &L0,   &L3,   &L1 );
     return field_is_zero(   &L0 );
 }
 
@@ -821,28 +703,26 @@ validate_tw_extensible (
     field_mul  (   &L1, &ext->t, &ext->u );
     field_mul  (   &L2, &ext->z,   &L1 );
     field_mul  (   &L0, &ext->x, &ext->y );
-    field_neg  (   &L1,   &L0 );
-    field_add  (   &L0,   &L1,   &L2 );
-    field_bias (   &L0,     2 );
+    field_negx (   &L1,   &L0 );
+    field_add (   &L0,   &L1,   &L2 );
        L5 = field_is_zero(   &L0 );
     /*
      * Check invariant:
      * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2
      */
     field_sqr  (   &L2, &ext->y );
-    field_neg  (   &L1,   &L2 );
+    field_negx (   &L1,   &L2 );
     field_sqr  (   &L0, &ext->x );
-    field_add  (   &L2,   &L0,   &L1 );
+    field_add (   &L2,   &L0,   &L1 );
     field_sqr  (   &L3, &ext->u );
     field_sqr  (   &L0, &ext->t );
     field_mul  (   &L1,   &L0,   &L3 );
     field_mulw_scc (   &L3,   &L1, EDWARDS_D );
-    field_add  (   &L0,   &L3,   &L2 );
-    field_neg  (   &L3,   &L1 );
-    field_add  (   &L2,   &L3,   &L0 );
+    field_add (   &L0,   &L3,   &L2 );
+    field_negx (   &L3,   &L1 );
+    field_add (   &L2,   &L3,   &L0 );
     field_sqr  (   &L1, &ext->z );
-    field_add  (   &L0,   &L1,   &L2 );
-    field_bias (   &L0,     2 );
+    field_add (   &L0,   &L1,   &L2 );
        L4 = field_is_zero(   &L0 );
     return    L5 & L4 &~ field_is_zero(&ext->z);
 }
@@ -858,18 +738,17 @@ validate_extensible (
      * 0 = d*t^2*u^2 - x^2 - y^2 + z^2
      */
     field_sqr  (   &L2, &ext->y );
-    field_neg  (   &L1,   &L2 );
+    field_negx (   &L1,   &L2 );
     field_sqr  (   &L0, &ext->z );
-    field_add  (   &L2,   &L0,   &L1 );
+    field_add (   &L2,   &L0,   &L1 );
     field_sqr  (   &L3, &ext->u );
     field_sqr  (   &L0, &ext->t );
     field_mul  (   &L1,   &L0,   &L3 );
     field_mulw_scc (   &L0,   &L1, EDWARDS_D );
-    field_add  (   &L1,   &L0,   &L2 );
+    field_add (   &L1,   &L0,   &L2 );
     field_sqr  (   &L0, &ext->x );
-    field_neg  (   &L2,   &L0 );
-    field_add  (   &L0,   &L2,   &L1 );
-    field_bias (   &L0,     2 );
+    field_negx (   &L2,   &L0 );
+    field_add (   &L0,   &L2,   &L1 );
        L5 = field_is_zero(   &L0 );
     /*
      * Check invariant:
@@ -878,9 +757,8 @@ validate_extensible (
     field_mul  (   &L1, &ext->t, &ext->u );
     field_mul  (   &L2, &ext->z,   &L1 );
     field_mul  (   &L0, &ext->x, &ext->y );
-    field_neg  (   &L1,   &L0 );
-    field_add  (   &L0,   &L1,   &L2 );
-    field_bias (   &L0,     2 );
+    field_negx (   &L1,   &L0 );
+    field_add (   &L0,   &L1,   &L2 );
        L4 = field_is_zero(   &L0 );
     return L5 & L4 &~ field_is_zero(&ext->z);
 }
diff --git a/src/include/field.h b/src/include/field.h
index 6a9b0e7..d375c09 100644
--- a/src/include/field.h
+++ b/src/include/field.h
@@ -14,6 +14,13 @@
 #include "f_field.h"
 #include <string.h>
 
+#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448)
+#if (is32)
+#define IF32(s) (s)
+#else
+#define IF32(s)
+#endif
+
 /** @brief Bytes in a field element */
 #define FIELD_BYTES          (1+(FIELD_BITS-1)/8)
 
@@ -53,21 +60,6 @@ field_copy (
     memcpy(a,b,sizeof(*a));
 }
 
-/**
- * Negate a in place if doNegate.
- */
-static inline void
-__attribute__((unused,always_inline)) 
-field_cond_neg(
-    field_t *a,
-    mask_t doNegate
-) {
-	struct field_t negated;
-    field_neg(&negated, a);
-    field_bias(&negated, 2);
-	constant_time_select(a, &negated, a, sizeof(negated), doNegate);
-}
-
 /**
  * Returns 1/sqrt(+- x).
  * 
@@ -140,4 +132,105 @@ field_sqrn (
     }
 }
 
+/* Multiply by signed curve constant */
+static __inline__ void
+field_mulw_scc (
+    struct field_t* __restrict__ out,
+    const struct field_t *a,
+    int64_t scc
+) {
+    if (scc >= 0) {
+        field_mulw(out, a, scc);
+    } else {
+        field_mulw(out, a, -scc);
+        field_neg_RAW(out,out);
+        field_bias(out,2);
+    }
+}
+
+/* Multiply by signed curve constant and weak reduce if biased */
+static __inline__ void
+field_mulw_scc_wr (
+    struct field_t* __restrict__ out,
+    const struct field_t *a,
+    int64_t scc
+) {
+    field_mulw_scc(out, a, scc);
+    if (scc < 0)
+        field_weak_reduce(out);
+}
+
+static __inline__ void
+field_subx_RAW (
+    struct field_t *d,
+    const struct field_t *a,
+    const struct field_t *b
+) {
+    field_sub_RAW ( d, a, b );
+    field_bias( d, 2 );
+    IF32( field_weak_reduce ( d ) );
+}
+
+static __inline__ void
+field_sub (
+    struct field_t *d,
+    const struct field_t *a,
+    const struct field_t *b
+) {
+    field_sub_RAW ( d, a, b );
+    field_bias( d, 2 );
+    field_weak_reduce ( d );
+}
+
+static __inline__ void
+field_add (
+    struct field_t *d,
+    const struct field_t *a,
+    const struct field_t *b
+) {
+    field_add_RAW ( d, a, b );
+    field_weak_reduce ( d );
+}
+
+static __inline__ void
+field_subw (
+    struct field_t *d,
+    word_t c
+) {
+    field_subw_RAW ( d, c );
+    field_bias( d, 1 );
+    field_weak_reduce ( d );
+}
+
+static __inline__ void
+field_negx (
+    struct field_t *d,
+    const struct field_t *a
+) {
+    field_neg_RAW ( d, a );
+    field_bias( d, 2 );
+    field_weak_reduce ( d );
+}
+
+/**
+ * Negate a in place if doNegate.
+ */
+static inline void
+__attribute__((unused,always_inline)) 
+field_cond_neg (
+    field_t *a,
+    mask_t doNegate
+) {
+	struct field_t negated;
+    field_negx(&negated, a);
+	constant_time_select(a, &negated, a, sizeof(negated), doNegate);
+}
+
+/** Require the warning annotation on raw routines */
+#define ANALYZE_THIS_ROUTINE_CAREFULLY const int ANNOTATE___ANALYZE_THIS_ROUTINE_CAREFULLY = 0;
+#define MUST_BE_CAREFUL (void) ANNOTATE___ANALYZE_THIS_ROUTINE_CAREFULLY
+#define field_add_nr(a,b,c) { MUST_BE_CAREFUL; field_add_RAW(a,b,c); }
+#define field_sub_nr(a,b,c) { MUST_BE_CAREFUL; field_sub_RAW(a,b,c); }
+#define field_subx_nr(a,b,c) { MUST_BE_CAREFUL; field_subx_RAW(a,b,c); }
+
 #endif // __FIELD_H__
diff --git a/src/p448/arch_32/p448.h b/src/p448/arch_32/p448.h
index cf90611..f0406cd 100644
--- a/src/p448/arch_32/p448.h
+++ b/src/p448/arch_32/p448.h
@@ -24,21 +24,21 @@ p448_set_ui (
 ) __attribute__((unused,always_inline));
 
 static __inline__ void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) __attribute__((unused,always_inline));
@@ -130,7 +130,7 @@ p448_set_ui (
 }
 
 void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -148,7 +148,7 @@ p448_add (
 }
 
 void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -166,7 +166,7 @@ p448_sub (
 }
 
 void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) {
diff --git a/src/p448/arch_arm_32/p448.h b/src/p448/arch_arm_32/p448.h
index cf90611..f0406cd 100644
--- a/src/p448/arch_arm_32/p448.h
+++ b/src/p448/arch_arm_32/p448.h
@@ -24,21 +24,21 @@ p448_set_ui (
 ) __attribute__((unused,always_inline));
 
 static __inline__ void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) __attribute__((unused,always_inline));
@@ -130,7 +130,7 @@ p448_set_ui (
 }
 
 void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -148,7 +148,7 @@ p448_add (
 }
 
 void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -166,7 +166,7 @@ p448_sub (
 }
 
 void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) {
diff --git a/src/p448/arch_neon/p448.h b/src/p448/arch_neon/p448.h
index cf90611..f0406cd 100644
--- a/src/p448/arch_neon/p448.h
+++ b/src/p448/arch_neon/p448.h
@@ -24,21 +24,21 @@ p448_set_ui (
 ) __attribute__((unused,always_inline));
 
 static __inline__ void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) __attribute__((unused,always_inline));
@@ -130,7 +130,7 @@ p448_set_ui (
 }
 
 void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -148,7 +148,7 @@ p448_add (
 }
 
 void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -166,7 +166,7 @@ p448_sub (
 }
 
 void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) {
diff --git a/src/p448/arch_neon_experimental/p448.h b/src/p448/arch_neon_experimental/p448.h
index 144d86c..f7d338a 100644
--- a/src/p448/arch_neon_experimental/p448.h
+++ b/src/p448/arch_neon_experimental/p448.h
@@ -27,21 +27,21 @@ p448_set_ui (
 ) __attribute__((unused,always_inline));
 
 static __inline__ void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) __attribute__((unused,always_inline));
@@ -133,7 +133,7 @@ p448_set_ui (
 }
 
 void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -145,7 +145,7 @@ p448_add (
 }
 
 void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -163,7 +163,7 @@ p448_sub (
 }
 
 void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) {
diff --git a/src/p448/arch_ref64/p448.h b/src/p448/arch_ref64/p448.h
index bf43b79..d6670c3 100644
--- a/src/p448/arch_ref64/p448.h
+++ b/src/p448/arch_ref64/p448.h
@@ -25,21 +25,21 @@ p448_set_ui (
 ) __attribute__((unused));
 
 static __inline__ void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused));
              
 static __inline__ void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused));
              
 static __inline__ void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) __attribute__((unused));
@@ -136,7 +136,7 @@ p448_set_ui (
 }
 
 void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -149,7 +149,7 @@ p448_add (
 }
 
 void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -163,7 +163,7 @@ p448_sub (
 }
 
 void
-p448_neg (
+p448_neg_RAW (
     struct p448_t *out,
     const p448_t *a
 ) {
diff --git a/src/p448/arch_x86_64/p448.h b/src/p448/arch_x86_64/p448.h
index 0772d23..20b7597 100644
--- a/src/p448/arch_x86_64/p448.h
+++ b/src/p448/arch_x86_64/p448.h
@@ -24,21 +24,21 @@ p448_set_ui (
 ) __attribute__((unused,always_inline));
 
 static __inline__ void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p448_neg (
+p448_neg_RAW (
     p448_t *out,
     const p448_t *a
 ) __attribute__((unused,always_inline));
@@ -129,7 +129,7 @@ p448_set_ui (
 }
 
 void
-p448_add (
+p448_add_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -147,7 +147,7 @@ p448_add (
 }
 
 void
-p448_sub (
+p448_sub_RAW (
     p448_t *out,
     const p448_t *a,
     const p448_t *b
@@ -165,7 +165,7 @@ p448_sub (
 }
 
 void
-p448_neg (
+p448_neg_RAW (
     struct p448_t *out,
     const p448_t *a
 ) {
diff --git a/src/p448/f_field.h b/src/p448/f_field.h
index c743c8d..7284194 100644
--- a/src/p448/f_field.h
+++ b/src/p448/f_field.h
@@ -9,23 +9,22 @@
 #ifndef __F_FIELD_H__
 #define __F_FIELD_H__ 1
 
-#include <string.h>
 #include "constant_time.h"
+#include <string.h>
 
 #include "p448.h"
 #define FIELD_BITS           448
 #define field_t              p448_t
 #define field_mul            p448_mul
 #define field_sqr            p448_sqr
-#define field_add            p448_add
-#define field_sub            p448_sub
+#define field_add_RAW        p448_add_RAW
+#define field_sub_RAW        p448_sub_RAW
 #define field_mulw           p448_mulw
 #define field_addw           p448_addw
-#define field_subw           p448_subw
-#define field_neg            p448_neg
+#define field_subw_RAW       p448_subw
+#define field_neg_RAW        p448_neg_RAW
 #define field_set_ui         p448_set_ui
 #define field_bias           p448_bias
-#define field_cond_neg       p448_cond_neg
 #define field_inverse        p448_inverse
 #define field_eq             p448_eq
 #define field_isr            p448_isr
diff --git a/src/p480/arch_x86_64/p480.h b/src/p480/arch_x86_64/p480.h
index a49c6d0..ea841aa 100644
--- a/src/p480/arch_x86_64/p480.h
+++ b/src/p480/arch_x86_64/p480.h
@@ -24,21 +24,21 @@ p480_set_ui (
 ) __attribute__((unused,always_inline));
 
 static __inline__ void
-p480_add (
+p480_add_RAW (
     p480_t *out,
     const p480_t *a,
     const p480_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p480_sub (
+p480_sub_RAW (
     p480_t *out,
     const p480_t *a,
     const p480_t *b
 ) __attribute__((unused,always_inline));
              
 static __inline__ void
-p480_neg (
+p480_neg_RAW (
     p480_t *out,
     const p480_t *a
 ) __attribute__((unused,always_inline));
@@ -129,7 +129,7 @@ p480_set_ui (
 }
 
 void
-p480_add (
+p480_add_RAW (
     p480_t *out,
     const p480_t *a,
     const p480_t *b
@@ -147,7 +147,7 @@ p480_add (
 }
 
 void
-p480_sub (
+p480_sub_RAW (
     p480_t *out,
     const p480_t *a,
     const p480_t *b
@@ -165,7 +165,7 @@ p480_sub (
 }
 
 void
-p480_neg (
+p480_neg_RAW (
     struct p480_t *out,
     const p480_t *a
 ) {
diff --git a/src/p480/f_field.h b/src/p480/f_field.h
index 397f83d..c681bd3 100644
--- a/src/p480/f_field.h
+++ b/src/p480/f_field.h
@@ -9,23 +9,22 @@
 #ifndef __F_FIELD_H__
 #define __F_FIELD_H__ 1
 
-#include <string.h>
 #include "constant_time.h"
+#include <string.h>
 
 #include "p480.h"
 #define FIELD_BITS           480
 #define field_t              p480_t
 #define field_mul            p480_mul
 #define field_sqr            p480_sqr
-#define field_add            p480_add
-#define field_sub            p480_sub
+#define field_add_RAW        p480_add_RAW
+#define field_sub_RAW        p480_sub_RAW
 #define field_mulw           p480_mulw
 #define field_addw           p480_addw
-#define field_subw           p480_subw
-#define field_neg            p480_neg
+#define field_subw_RAW       p480_subw
+#define field_neg_RAW        p480_neg_RAW
 #define field_set_ui         p480_set_ui
 #define field_bias           p480_bias
-#define field_cond_neg       p480_cond_neg
 #define field_inverse        p480_inverse
 #define field_eq             p480_eq
 #define field_isr            p480_isr
diff --git a/src/p521/arch_ref64/p521.h b/src/p521/arch_ref64/p521.h
index c4dbf69..ff458a6 100644
--- a/src/p521/arch_ref64/p521.h
+++ b/src/p521/arch_ref64/p521.h
@@ -25,21 +25,21 @@ p521_set_ui (
 ) __attribute__((unused));
 
 static __inline__ void
-p521_add (
+p521_add_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
 ) __attribute__((unused));
              
 static __inline__ void
-p521_sub (
+p521_sub_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
 ) __attribute__((unused));
              
 static __inline__ void
-p521_neg (
+p521_neg_RAW (
     p521_t *out,
     const p521_t *a
 ) __attribute__((unused));
@@ -136,7 +136,7 @@ p521_set_ui (
 }
 
 void
-p521_add (
+p521_add_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
@@ -149,7 +149,7 @@ p521_add (
 }
 
 void
-p521_sub (
+p521_sub_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
@@ -163,7 +163,7 @@ p521_sub (
 }
 
 void
-p521_neg (
+p521_neg_RAW (
     struct p521_t *out,
     const p521_t *a
 ) {
diff --git a/src/p521/arch_x86_64_r12/p521.h b/src/p521/arch_x86_64_r12/p521.h
index f51e91b..568784b 100644
--- a/src/p521/arch_x86_64_r12/p521.h
+++ b/src/p521/arch_x86_64_r12/p521.h
@@ -29,21 +29,21 @@ p521_set_ui (
 ) __attribute__((unused));
 
 static __inline__ void
-p521_add (
+p521_add_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
 ) __attribute__((unused));
              
 static __inline__ void
-p521_sub (
+p521_sub_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
 ) __attribute__((unused));
              
 static __inline__ void
-p521_neg (
+p521_neg_RAW (
     p521_t *out,
     const p521_t *a
 ) __attribute__((unused));
@@ -147,7 +147,7 @@ p521_set_ui (
 }
 
 void
-p521_add (
+p521_add_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
@@ -159,7 +159,7 @@ p521_add (
 }
 
 void
-p521_sub (
+p521_sub_RAW (
     p521_t *out,
     const p521_t *a,
     const p521_t *b
@@ -171,7 +171,7 @@ p521_sub (
 }
 
 void
-p521_neg (
+p521_neg_RAW (
     struct p521_t *out,
     const p521_t *a
 ) {
diff --git a/src/p521/f_field.h b/src/p521/f_field.h
index f17fe3d..6331072 100644
--- a/src/p521/f_field.h
+++ b/src/p521/f_field.h
@@ -17,15 +17,14 @@
 #define field_t              p521_t
 #define field_mul            p521_mul
 #define field_sqr            p521_sqr
-#define field_add            p521_add
-#define field_sub            p521_sub
+#define field_add_RAW        p521_add_RAW
+#define field_sub_RAW        p521_sub_RAW
 #define field_mulw           p521_mulw
 #define field_addw           p521_addw
-#define field_subw           p521_subw
-#define field_neg            p521_neg
+#define field_subw_RAW       p521_subw
+#define field_neg_RAW        p521_neg_RAW
 #define field_set_ui         p521_set_ui
 #define field_bias           p521_bias
-#define field_cond_neg       p521_cond_neg
 #define field_inverse        p521_inverse
 #define field_eq             p521_eq
 #define field_isr            p521_isr
diff --git a/test/bench.c b/test/bench.c
index ddf8097..31fd9eb 100644
--- a/test/bench.c
+++ b/test/bench.c
@@ -177,7 +177,6 @@ int main(int argc, char **argv) {
         field_mul(&c,&b,&a);
         field_sqr(&b,&c);
         field_subw(&b,1);
-        field_bias(&b,1);
         if (!field_is_zero(&b)) {
             printf("ISR validation failure!\n");
             field_print("a", &a);
@@ -232,7 +231,6 @@ int main(int argc, char **argv) {
             convert_affine_to_extensible(&exta,&affine);
             serialize_extensible(&b, &exta);
             field_sub(&c,&b,&a);
-            field_bias(&c,2);
             if (!field_is_zero(&c)) {
                 printf("Reserialize validation failure!\n");
                 field_print("a", &a);
@@ -635,7 +633,6 @@ int main(int argc, char **argv) {
         ignore_result(montgomery_ladder(&b,&a,&z,WORD_BITS,0));
         
         field_sub(&d,&b,&c);
-        field_bias(&d,2);
 		if (!field_is_zero(&d)) {
             printf("Odd ladder validation failure %d!\n", ++failures);
             field_print("a", &a);
@@ -661,7 +658,6 @@ int main(int argc, char **argv) {
         untwist_and_double_and_serialize(&c, &ext);
         
         field_sub(&d,&b,&c);
-        field_bias(&d,2);
         
         if (good && !field_is_zero(&d)){
             printf("Iso+serial validation failure %d!\n", ++failures);
@@ -717,7 +713,6 @@ int main(int argc, char **argv) {
         serialize_extensible(&c, &exta);
         
         field_sub(&d,&b,&c);
-        field_bias(&d,2);
         
         if (!field_is_zero(&d)){
             printf("PreWNAF combo validation failure %d!\n", ++failures);
diff --git a/test/test_arithmetic.c b/test/test_arithmetic.c
index bbdbf43..7c45407 100644
--- a/test/test_arithmetic.c
+++ b/test/test_arithmetic.c
@@ -83,7 +83,7 @@ static mask_t field_assert_eq_gmp(
     return MASK_SUCCESS;
 }
 
-static mask_t test_add_sub (
+static mask_t test_add_sub_RAW (
     const mpz_t x,
     const mpz_t y,
     word_t word
@@ -95,11 +95,11 @@ static mask_t test_add_sub (
     succ &= mpz_to_field(&yy,y);
     mpz_init(t);
     
-    field_add(&tt,&xx,&yy);
+    field_add_RAW(&tt,&xx,&yy);
     mpz_add(t,x,y);
     succ &= field_assert_eq_gmp("add",&xx,&yy,&tt,t,0,2.1);
     
-    field_sub(&tt,&xx,&yy);
+    field_sub_RAW(&tt,&xx,&yy);
     field_bias(&tt,2);
     mpz_sub(t,x,y);
     succ &= field_assert_eq_gmp("sub",&xx,&yy,&tt,t,0,3.1);
@@ -232,13 +232,13 @@ int test_arithmetic (void) {
         
         word_t word = gmp_urandomm_ui (state, 1ull<<radix_bits);
         
-        succ &= test_add_sub(x,y,word);
+        succ &= test_add_sub_RAW(x,y,word);
         succ &= test_mul_sqr(x,y,word);
         
         if (j < 1000)
             succ &= test_isr(x);
         
-        // TODO: test neg, cond_neg, set_ui, wrd, srd, inv, ...?
+        // TODO: test neg, cond_neg_RAW, set_ui, wrd, srd, inv, ...?
     }
     
     mpz_clear(x);