|
|
@@ -15,7 +15,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { |
|
|
|
|
|
|
|
/* For some reason clang doesn't vectorize this without prompting? */ |
|
|
|
unsigned int i; |
|
|
|
for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { |
|
|
|
for (i=0; i<sizeof(aa)/(sizeof(uint64xn_t)); i++) { |
|
|
|
((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; |
|
|
|
((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; |
|
|
|
((uint64xn_t*)bbb)[i] = ((const uint64xn_t*)bb)[i] + ((const uint64xn_t*)(&b[4]))[i]; |
|
|
@@ -190,7 +190,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { |
|
|
|
|
|
|
|
/* For some reason clang doesn't vectorize this without prompting? */ |
|
|
|
unsigned int i; |
|
|
|
for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { |
|
|
|
for (i=0; i<sizeof(aa)/(sizeof(uint64xn_t)); i++) { |
|
|
|
((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; |
|
|
|
} |
|
|
|
|
|
|
|