Browse Source

gcc-clean, though the code is slow when compiled by gcc

master
Michael Hamburg 10 years ago
parent
commit
8ebdfaee0b
4 changed files with 21 additions and 19 deletions
  1. +2
    -1
      Makefile
  2. +2
    -2
      src/arch_x86_64/p448.h
  3. +14
    -13
      src/include/word.h
  4. +3
    -3
      src/sha512.c

+ 2
- 1
Makefile View File

@@ -11,6 +11,7 @@ else
CC = gcc CC = gcc
endif endif
LD = $(CC) LD = $(CC)
ASM ?= $(CC)


ifneq (,$(findstring x86_64,$(MACHINE))) ifneq (,$(findstring x86_64,$(MACHINE)))
ARCH ?= arch_x86_64 ARCH ?= arch_x86_64
@@ -99,7 +100,7 @@ build/timestamp:
touch $@ touch $@


build/%.o: build/%.s build/%.o: build/%.s
$(CC) $(ASFLAGS) -c -o $@ $<
$(ASM) $(ASFLAGS) -c -o $@ $<


build/%.s: src/%.c $(HEADERS) build/%.s: src/%.c $(HEADERS)
$(CC) $(CFLAGS) -S -c -o $@ $< $(CC) $(CFLAGS) -S -c -o $@ $<


+ 2
- 2
src/arch_x86_64/p448.h View File

@@ -187,7 +187,7 @@ p448_cond_swap (
) { ) {
big_register_t *aa = (big_register_t*)a; big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b; big_register_t *bb = (big_register_t*)b;
big_register_t m = doswap;
big_register_t m = br_set_to_mask(doswap);


unsigned int i; unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
@@ -259,7 +259,7 @@ p448_cond_neg(
struct p448_t negated; struct p448_t negated;
big_register_t *aa = (big_register_t *)a; big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated; big_register_t *nn = (big_register_t*)&negated;
big_register_t m = doNegate;
big_register_t m = br_set_to_mask(doNegate);
p448_neg(&negated, a); p448_neg(&negated, a);
p448_bias(&negated, 2); p448_bias(&negated, 2);


+ 14
- 13
src/include/word.h View File

@@ -71,18 +71,18 @@ static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -1;
typedef uint32x4_t vecmask_t; typedef uint32x4_t vecmask_t;
#else #else
/* FIXME this only works on clang */ /* FIXME this only works on clang */
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
typedef int64_t int64x2_t __attribute__((ext_vector_type(2)));
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
typedef int64_t int64x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
typedef int32_t int32x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
typedef int32_t int32x2_t __attribute__((ext_vector_type(2)));
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
typedef int32_t int32x8_t __attribute__((ext_vector_type(8)));
typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
typedef int64_t int64x2_t __attribute__((vector_size(16)));
typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
typedef int64_t int64x4_t __attribute__((vector_size(32)));
typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
typedef int32_t int32x2_t __attribute__((vector_size(8)));
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
typedef int32_t int32x4_t __attribute__((vector_size(16)));
typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
typedef int32_t int32x8_t __attribute__((vector_size(32)));
/* TODO: vector width for procs like ARM; gcc support */ /* TODO: vector width for procs like ARM; gcc support */
typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
typedef word_t vecmask_t __attribute__((vector_size(32)));
#endif #endif


#if __AVX2__ #if __AVX2__
@@ -111,14 +111,15 @@ br_set_to_mask(mask_t x) {
#else #else
static __inline__ big_register_t static __inline__ big_register_t
br_set_to_mask(mask_t x) { br_set_to_mask(mask_t x) {
return (big_register_t)x;
big_register_t out = {x,x,x,x,x,x,x,x};
return out;
} }
#endif #endif


#if __AVX2__ || __SSE2__ #if __AVX2__ || __SSE2__
static __inline__ big_register_t static __inline__ big_register_t
br_is_zero(big_register_t x) { br_is_zero(big_register_t x) {
return (big_register_t)(x == (big_register_t)0);
return (big_register_t)(x == br_set_to_mask(0));
} }
#elif __ARM_NEON__ #elif __ARM_NEON__
static __inline__ big_register_t static __inline__ big_register_t


+ 3
- 3
src/sha512.c View File

@@ -163,9 +163,9 @@ sha512_final (
sha512_process_block(ctx); sha512_process_block(ctx);
fill = 0; fill = 0;
} }
memset(ctx->block + fill, 0, 112-fill);
*((uint64_t *)&ctx->block[112]) = 0;
*((uint64_t *)&ctx->block[120]) = htobe64((ctx->nbytes * 8));
memset(ctx->block + fill, 0, 120-fill);
uint64_t size = htobe64((ctx->nbytes * 8));
memcpy(&ctx->block[120], &size, sizeof(size));
sha512_process_block(ctx); sha512_process_block(ctx);
for (i=0; i<8; i++) { for (i=0; i<8; i++) {
ctx->chain[i] = htobe64(ctx->chain[i]); ctx->chain[i] = htobe64(ctx->chain[i]);


Loading…
Cancel
Save