From 4f98023aa324958113bd274ddd3a80fc082da6d7 Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Sat, 20 Jul 2024 16:54:49 +0800 Subject: [PATCH 1/3] wip --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 999a3a7b..160e8917 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ ARCH_CFLAGS := $(ARCH_CFLAGS)+$(subst $(COMMA),+,$(FEATURE)) endif endif -CXXFLAGS += -Wall -Wcast-qual -I. $(ARCH_CFLAGS) -std=gnu++14 +CXXFLAGS += -Wall -Wcast-qual -I. $(ARCH_CFLAGS) -O2 -std=gnu++14 LDFLAGS += -lm OBJS = \ tests/binding.o \ @@ -77,7 +77,7 @@ $(EXEC): $(OBJS) check: tests/main ifeq ($(processor),$(filter $(processor),aarch64 arm64 arm armv7l)) - $(CC) $(ARCH_CFLAGS) -c sse2neon.h + $(CC) $(ARCH_CFLAGS) -O2 -c sse2neon.h endif $(EXEC_WRAPPER) $^ From c0039916d087eca5b82eab5054460bde89219834 Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Sat, 20 Jul 2024 17:43:59 +0800 Subject: [PATCH 2/3] wippip --- tests/impl.cpp | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tests/impl.cpp b/tests/impl.cpp index ecd4635f..7b52ba52 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -8384,22 +8384,24 @@ result_t test_mm_cvtepu8_epi64(const SSE2NEONTestImpl &impl, uint32_t iter) return validateInt64(ret, i0, i1); } -#define MM_DP_PD_TEST_CASE_WITH(imm8) \ - do { \ - const double *_a = (const double *) impl.mTestFloatPointer1; \ - const double *_b = (const double *) impl.mTestFloatPointer2; \ - const int imm = imm8; \ - double d[2]; \ - double sum = 0; \ - for (size_t i = 0; i < 2; i++) \ - sum += ((imm) & (1 << (i + 4))) ? _a[i] * _b[i] : 0; \ - for (size_t i = 0; i < 2; i++) \ - d[i] = (imm & (1 << i)) ? sum : 0; \ - __m128d a = load_m128d(_a); \ - __m128d b = load_m128d(_b); \ - __m128d ret = _mm_dp_pd(a, b, imm); \ - if (validateDouble(ret, d[0], d[1]) != TEST_SUCCESS) \ - return TEST_FAIL; \ +#define MM_DP_PD_TEST_CASE_WITH(imm8) \ + do { \ + const double _a[] = {impl.mTestFloatPointer1[0], \ + impl.mTestFloatPointer1[1]}; \ + const double _b[] = {impl.mTestFloatPointer2[0], \ + impl.mTestFloatPointer2[1]}; \ + const int imm = imm8; \ + double d[2] = {0}; \ + double sum = 0; \ + for (size_t i = 0; i < 2; i++) \ + sum += ((imm) & (1 << (i + 4))) ? _a[i] * _b[i] : 0; \ + for (size_t i = 0; i < 2; i++) \ + d[i] = (imm & (1 << i)) ? sum : 0; \ + __m128d a = load_m128d(_a); \ + __m128d b = load_m128d(_b); \ + __m128d ret = _mm_dp_pd(a, b, imm); \ + if (validateDouble(ret, d[0], d[1]) != TEST_SUCCESS) \ + return TEST_FAIL; \ } while (0) #define GENERATE_MM_DP_PD_TEST_CASES \ From 46c8b3913d886ef25201c8ca4824e3cf3758adc3 Mon Sep 17 00:00:00 2001 From: alexorlov124 <45552995+alexorlov124@users.noreply.github.com> Date: Sat, 20 Jul 2024 15:31:25 +0200 Subject: [PATCH 3/3] Update main.yml enable O2 optimization --- .github/workflows/main.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 01fdd764..5c26cfb3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -56,10 +56,10 @@ jobs: strategy: matrix: arch_with_features: [ - {arch: armv7, feature: none, arch_cflags: none}, - {arch: aarch64, feature: none, arch_cflags: none}, - {arch: aarch64, feature: crypto+crc, arch_cflags: none}, - {arch: armv7, feature: none, arch_cflags: '-mcpu=cortex-a32 -mfpu=neon-fp-armv8'} + {arch: armv7, feature: none, arch_cflags: '-O2'}, + {arch: aarch64, feature: none, arch_cflags: '-O2'}, + {arch: aarch64, feature: crypto+crc, arch_cflags: '-O2'}, + {arch: armv7, feature: none, arch_cflags: '-mcpu=cortex-a32 -mfpu=neon-fp-armv8 -O2'} ] cxx_compiler: [g++-10, clang++-11] steps: