From 06be9ef25303b9c939e7036c5fef57e266dfeeeb Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Thu, 29 Jan 2015 15:02:00 -0800 Subject: [PATCH] decaf double and double-scalarmul, but needs to be tested --- include/decaf.h | 32 +++++++++++++++++++++++++++++++ src/decaf.c | 51 ++++++++++++++++++++++++++++++++++++++++++++----- test/bench.c | 7 +++++++ 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/include/decaf.h b/include/decaf.h index 4349819..4632913 100644 --- a/include/decaf.h +++ b/include/decaf.h @@ -28,6 +28,7 @@ #define NONNULL1 __attribute__((nonnull(1))) #define NONNULL2 __attribute__((nonnull(1,2))) #define NONNULL3 __attribute__((nonnull(1,2,3))) +#define NONNULL5 __attribute__((nonnull(1,2,3,4,5))) /** Types of internal words. TODO: ARCH: make 32-bit clean */ typedef uint64_t decaf_word_t, decaf_bool_t; @@ -236,6 +237,18 @@ void decaf_point_add ( const decaf_point_t b ) API_VIS NONNULL3; +/** + * @brief Double a point. Equivalent to + * decaf_point_add(two_a,a,a), but potentially faster. + * + * @param [out] sum The sum a+a. + * @param [in] a A point. + */ +void decaf_point_double ( + decaf_point_t two_a, + const decaf_point_t a +) API_VIS NONNULL2; + /** * @brief Subtract two points to produce a third point. The * input points and output point can be pointers to the same @@ -264,6 +277,24 @@ void decaf_point_scalarmul ( const decaf_scalar_t scalar ) API_VIS NONNULL3; +/** + * @brief Multiply two base points by two scalars. + * + * @param [out] scaled The scaled point base*scalar + * @param [in] base1 A first point to be scaled. + * @param [in] scalar1 A first scalar to multilpy by. + * @param [in] base2 A second point to be scaled. + * @param [in] scalar2 A second scalar to multilpy by. + * @TODO: test + */ +void decaf_point_double_scalarmul ( + decaf_point_t combo, + const decaf_point_t base1, + const decaf_scalar_t scalar1, + const decaf_point_t base2, + const decaf_scalar_t scalar2 +) API_VIS NONNULL5; + /** * @brief Test that a point is valid, for debugging purposes. * @@ -327,6 +358,7 @@ void decaf_point_from_hash_uniform ( #undef NONNULL1 #undef NONNULL2 #undef NONNULL3 +#undef NONNULL5 #ifdef __cplusplus }; /* extern "C" */ diff --git a/src/decaf.c b/src/decaf.c index ed2e847..b3f1f48 100644 --- a/src/decaf.c +++ b/src/decaf.c @@ -463,8 +463,10 @@ void decaf_point_add(decaf_point_t a, const decaf_point_t b, const decaf_point_t decaf_point_add_sub(a,b,c,0); } -/* No dedicated point double (PERF) */ -#define decaf_dbl(a,b) decaf_point_add(a,b,b) +/* No dedicated point double yet (PERF) */ +void decaf_point_double(decaf_point_t a, const decaf_point_t b) { + decaf_point_add(a,b,b); +} void decaf_copy ( decaf_point_t a, @@ -522,16 +524,16 @@ void decaf_point_scalarmul ( * possibly-odd number of unmasked bits, may need to mask. */ decaf_point_t w,b3,tmp; - decaf_dbl(w,b); + decaf_point_double(w,b); /* b3 = b*3 */ decaf_point_add(b3,w,b); int i; for (i=DECAF_SCALAR_LIMBS*WBITS-2; i>0; i-=2) { decaf_word_t bits = scalar->limb[i/WBITS]>>(i%WBITS); decaf_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1); - decaf_dbl(w,w); + decaf_point_double(w,w); decaf_point_add_sub(w,w,tmp,((bits>>1)&1)-1); - decaf_dbl(w,w); + decaf_point_double(w,w); } decaf_point_add_sub(w,w,b,((scalar->limb[0]>>1)&1)-1); /* low bit is special because fo signed window */ @@ -539,6 +541,45 @@ void decaf_point_scalarmul ( decaf_point_sub(a,w,tmp); } +void decaf_point_double_scalarmul ( + decaf_point_t a, + const decaf_point_t b, + const decaf_scalar_t scalarb, + const decaf_point_t c, + const decaf_scalar_t scalarc +) { + /* w=2 signed window uses about 1.5 adds per bit. + * I figured a few extra lines was worth the 25% speedup. + * NB: if adapting this function to scalarmul by a + * possibly-odd number of unmasked bits, may need to mask. + */ + decaf_point_t w,b3,c3,tmp; + decaf_point_double(w,b); + decaf_point_double(tmp,c); + /* b3 = b*3 */ + decaf_point_add(b3,w,b); + decaf_point_add(c3,tmp,c); + decaf_point_add(w,w,tmp); + int i; + for (i=DECAF_SCALAR_LIMBS*WBITS-2; i>0; i-=2) { + decaf_point_double(w,w); + decaf_word_t bits = scalarb->limb[i/WBITS]>>(i%WBITS); + decaf_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1); + decaf_point_add_sub(w,w,tmp,((bits>>1)&1)-1); + bits = scalarc->limb[i/WBITS]>>(i%WBITS); + decaf_cond_sel(tmp,c,c3,((bits^(bits>>1))&1)-1); + decaf_point_add_sub(w,w,tmp,((bits>>1)&1)-1); + decaf_point_double(w,w); + } + decaf_point_add_sub(w,w,b,((scalarb->limb[0]>>1)&1)-1); + decaf_point_add_sub(w,w,c,((scalarc->limb[0]>>1)&1)-1); + /* low bit is special because of signed window */ + decaf_cond_sel(tmp,b,decaf_point_identity,-(scalarb->limb[0]&1)); + decaf_point_sub(w,w,tmp); + decaf_cond_sel(tmp,c,decaf_point_identity,-(scalarc->limb[0]&1)); + decaf_point_sub(a,w,tmp); +} + decaf_bool_t decaf_point_eq ( const decaf_point_t p, const decaf_point_t q ) { /* equality mod 2-torsion compares x/y */ gf a, b; diff --git a/test/bench.c b/test/bench.c index 6152e68..1b4c273 100644 --- a/test/bench.c +++ b/test/bench.c @@ -372,6 +372,13 @@ int main(int argc, char **argv) { } when = now() - when; printf("decaf slow: %5.1fµs\n", when * 1e6 / i); + + when = now(); + for (i=0; i