Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DONT MERGE] Try enable O2 optimization #644

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ jobs:
strategy:
matrix:
arch_with_features: [
{arch: armv7, feature: none, arch_cflags: none},
{arch: aarch64, feature: none, arch_cflags: none},
{arch: aarch64, feature: crypto+crc, arch_cflags: none},
{arch: armv7, feature: none, arch_cflags: '-mcpu=cortex-a32 -mfpu=neon-fp-armv8'}
{arch: armv7, feature: none, arch_cflags: '-O2'},
{arch: aarch64, feature: none, arch_cflags: '-O2'},
{arch: aarch64, feature: crypto+crc, arch_cflags: '-O2'},
{arch: armv7, feature: none, arch_cflags: '-mcpu=cortex-a32 -mfpu=neon-fp-armv8 -O2'}
]
cxx_compiler: [g++-10, clang++-11]
steps:
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ ARCH_CFLAGS := $(ARCH_CFLAGS)+$(subst $(COMMA),+,$(FEATURE))
endif
endif

CXXFLAGS += -Wall -Wcast-qual -I. $(ARCH_CFLAGS) -std=gnu++14
CXXFLAGS += -Wall -Wcast-qual -I. $(ARCH_CFLAGS) -O2 -std=gnu++14
LDFLAGS += -lm
OBJS = \
tests/binding.o \
Expand All @@ -77,7 +77,7 @@ $(EXEC): $(OBJS)

check: tests/main
ifeq ($(processor),$(filter $(processor),aarch64 arm64 arm armv7l))
$(CC) $(ARCH_CFLAGS) -c sse2neon.h
$(CC) $(ARCH_CFLAGS) -O2 -c sse2neon.h
endif
$(EXEC_WRAPPER) $^

Expand Down
34 changes: 18 additions & 16 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8384,22 +8384,24 @@ result_t test_mm_cvtepu8_epi64(const SSE2NEONTestImpl &impl, uint32_t iter)
return validateInt64(ret, i0, i1);
}

#define MM_DP_PD_TEST_CASE_WITH(imm8) \
do { \
const double *_a = (const double *) impl.mTestFloatPointer1; \
const double *_b = (const double *) impl.mTestFloatPointer2; \
const int imm = imm8; \
double d[2]; \
double sum = 0; \
for (size_t i = 0; i < 2; i++) \
sum += ((imm) & (1 << (i + 4))) ? _a[i] * _b[i] : 0; \
for (size_t i = 0; i < 2; i++) \
d[i] = (imm & (1 << i)) ? sum : 0; \
__m128d a = load_m128d(_a); \
__m128d b = load_m128d(_b); \
__m128d ret = _mm_dp_pd(a, b, imm); \
if (validateDouble(ret, d[0], d[1]) != TEST_SUCCESS) \
return TEST_FAIL; \
#define MM_DP_PD_TEST_CASE_WITH(imm8) \
do { \
const double _a[] = {impl.mTestFloatPointer1[0], \
impl.mTestFloatPointer1[1]}; \
const double _b[] = {impl.mTestFloatPointer2[0], \
impl.mTestFloatPointer2[1]}; \
const int imm = imm8; \
double d[2] = {0}; \
double sum = 0; \
for (size_t i = 0; i < 2; i++) \
sum += ((imm) & (1 << (i + 4))) ? _a[i] * _b[i] : 0; \
for (size_t i = 0; i < 2; i++) \
d[i] = (imm & (1 << i)) ? sum : 0; \
__m128d a = load_m128d(_a); \
__m128d b = load_m128d(_b); \
__m128d ret = _mm_dp_pd(a, b, imm); \
if (validateDouble(ret, d[0], d[1]) != TEST_SUCCESS) \
return TEST_FAIL; \
} while (0)

#define GENERATE_MM_DP_PD_TEST_CASES \
Expand Down
Loading