diff --git a/BUILD b/BUILD index 5ba4621981..edf541ddc6 100644 --- a/BUILD +++ b/BUILD @@ -487,12 +487,14 @@ HWY_TESTS = [ ("hwy/", "targets_test"), ("hwy/tests/", "arithmetic_test"), ("hwy/tests/", "bit_permute_test"), + ("hwy/tests/", "blockwise_combine_test"), ("hwy/tests/", "blockwise_shift_test"), ("hwy/tests/", "blockwise_test"), ("hwy/tests/", "cast_test"), ("hwy/tests/", "combine_test"), ("hwy/tests/", "compare_test"), ("hwy/tests/", "compress_test"), + ("hwy/tests/", "concat_test"), ("hwy/tests/", "convert_test"), ("hwy/tests/", "count_test"), ("hwy/tests/", "crypto_test"), diff --git a/CMakeLists.txt b/CMakeLists.txt index 09c9ec5358..9cf044cbc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,33 @@ if(CHECK_PIE_SUPPORTED) endif() endif() +if (CMAKE_CXX_COMPILER_ARCHITECTURE_ID MATCHES "RISCV32|RISCV64|RISCV128" OR CMAKE_SYSTEM_PROCESSOR MATCHES "riscv32|riscv64|riscv128") + include(CheckCSourceCompiles) + check_c_source_compiles(" + #if __riscv_xlen == 64 + int main() { return 0; } + #else + #error Not RISCV-64 + #endif + " IS_RISCV_XLEN_64) + + check_c_source_compiles(" + #if __riscv_xlen == 32 + int main() { return 0; } + #else + #error Not RISCV-32 + #endif + " IS_RISCV_XLEN_32) + + if(IS_RISCV_XLEN_32) + set(RISCV_XLEN 32) + elseif(IS_RISCV_XLEN_64) + set(RISCV_XLEN 64) + else() + message(WARNING "Unable to determine RISC-V XLEN") + endif() +endif() + include(GNUInstallDirs) if (NOT CMAKE_BUILD_TYPE) @@ -72,7 +99,7 @@ set(HWY_CMAKE_ARM7 OFF CACHE BOOL "Set copts for Armv7 with NEON (requires vfpv4 # skipped. For GCC 13.1+, you can also build with -fexcess-precision=standard. set(HWY_CMAKE_SSE2 OFF CACHE BOOL "Set SSE2 as baseline for 32-bit x86?") -# Currently this will compile the entire codebase with `-march=rv64gcv1p0`: +# Currently this will compile the entire codebase with `-march=rvgcv1p0`: set(HWY_CMAKE_RVV ON CACHE BOOL "Set copts for RISCV with RVV?") # Unconditionally adding -Werror risks breaking the build when new warnings @@ -87,6 +114,12 @@ set(HWY_ENABLE_EXAMPLES ON CACHE BOOL "Build examples") set(HWY_ENABLE_INSTALL ON CACHE BOOL "Install library") set(HWY_ENABLE_TESTS ON CACHE BOOL "Enable HWY tests") +if (MSVC) +set(HWY_TEST_STANDALONE ON CACHE BOOL "Disable use of googletest") +else() +set(HWY_TEST_STANDALONE OFF CACHE BOOL "Disable use of googletest") +endif() + if (NOT DEFINED CMAKE_CXX_STANDARD) if ("cxx_std_17" IN_LIST CMAKE_CXX_COMPILE_FEATURES) set(HWY_CXX_STD_TGT_COMPILE_FEATURE cxx_std_17) @@ -378,8 +411,13 @@ else() # we add the gcv compiler flag, which then requires the CPU (now when using # either compiler) to support V. if(HWY_CMAKE_RVV) - list(APPEND HWY_FLAGS -march=rv64gcv1p0) - add_link_options(-march=rv64gcv1p0) + if(RISCV_XLEN EQUAL 64) + list(APPEND HWY_FLAGS -march=rv64gcv1p0) + add_link_options(-march=rv64gcv1p0) + elseif(RISCV_XLEN EQUAL 32) + list(APPEND HWY_FLAGS -march=rv32gcv1p0) + add_link_options(-march=rv32gcv1p0) + endif() if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") list(APPEND HWY_FLAGS -menable-experimental-extensions) endif() @@ -579,6 +617,15 @@ if (HWY_ENABLE_CONTRIB) list(APPEND HWY_PC_FILES libhwy-contrib.pc) endif() # HWY_ENABLE_CONTRIB if (HWY_ENABLE_TESTS) + +if (HWY_TEST_STANDALONE) + set(HWY_PC_HWY_TEST_REQUIRES "") + set(HWY_PC_HWY_TEST_CFLAGS "-DHWY_TEST_STANDALONE=1") +else() + set(HWY_PC_HWY_TEST_REQUIRES "gtest") + set(HWY_PC_HWY_TEST_CFLAGS "") +endif() + list(APPEND HWY_PC_FILES libhwy-test.pc) endif() # HWY_ENABLE_TESTS foreach (pc ${HWY_PC_FILES}) @@ -628,6 +675,8 @@ enable_testing() include(GoogleTest) set(HWY_SYSTEM_GTEST OFF CACHE BOOL "Use pre-installed googletest?") + +if(NOT HWY_TEST_STANDALONE) if(HWY_SYSTEM_GTEST) find_package(GTest REQUIRED) else() @@ -656,6 +705,7 @@ add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src ${CMAKE_CURRENT_BINARY_DIR}/googletest-build EXCLUDE_FROM_ALL) endif() # HWY_SYSTEM_GTEST +endif() # HWY_TEST_STANDALONE set(HWY_TEST_FILES hwy/contrib/algo/copy_test.cc @@ -671,12 +721,14 @@ set(HWY_TEST_FILES hwy/examples/skeleton_test.cc hwy/tests/arithmetic_test.cc hwy/tests/bit_permute_test.cc + hwy/tests/blockwise_combine_test.cc hwy/tests/blockwise_shift_test.cc hwy/tests/blockwise_test.cc hwy/tests/cast_test.cc hwy/tests/combine_test.cc hwy/tests/compare_test.cc hwy/tests/compress_test.cc + hwy/tests/concat_test.cc hwy/tests/convert_test.cc hwy/tests/count_test.cc hwy/tests/crypto_test.cc @@ -745,15 +797,19 @@ list(APPEND HWY_TEST_FILES ) endif() # HWY_ENABLE_CONTRIB -if(HWY_SYSTEM_GTEST) - if (CMAKE_VERSION VERSION_LESS 3.20) - set(HWY_GTEST_LIBS GTest::GTest GTest::Main) +if(HWY_TEST_STANDALONE) + set(HWY_GTEST_LIBS "") +else() + if(HWY_SYSTEM_GTEST) + if (CMAKE_VERSION VERSION_LESS 3.20) + set(HWY_GTEST_LIBS GTest::GTest GTest::Main) + else() + set(HWY_GTEST_LIBS GTest::gtest GTest::gtest_main) + endif() else() - set(HWY_GTEST_LIBS GTest::gtest GTest::gtest_main) + set(HWY_GTEST_LIBS gtest gtest_main) endif() -else() - set(HWY_GTEST_LIBS gtest gtest_main) -endif() +endif() # HWY_TEST_STANDALONE file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests) foreach (TESTFILE IN LISTS HWY_TEST_FILES) @@ -766,6 +822,9 @@ foreach (TESTFILE IN LISTS HWY_TEST_FILES) # cause compile errors because only one may be set, and other CMakeLists.txt # that include us may set them. target_compile_options(${TESTNAME} PRIVATE -DHWY_IS_TEST=1) + if(HWY_TEST_STANDALONE) + target_compile_options(${TESTNAME} PRIVATE -DHWY_TEST_STANDALONE=1) + endif() target_compile_features(${TESTNAME} PRIVATE ${HWY_CXX_STD_TGT_COMPILE_FEATURE}) target_link_libraries(${TESTNAME} PRIVATE ${HWY_TEST_LIBS} ${HWY_GTEST_LIBS}) diff --git a/MODULE.bazel b/MODULE.bazel index bd798803af..1cc76ad269 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,6 +1,10 @@ -module(name = "highway", version = "1.2.0") +module( + name = "highway", + version = "1.2.0", +) -bazel_dep(name = "bazel_skylib", version = "1.3.0") -bazel_dep(name = "googletest", version = "1.12.1") -bazel_dep(name = "rules_cc", version = "0.0.4") -bazel_dep(name = "rules_license", version = "0.0.4") +bazel_dep(name = "bazel_skylib", version = "1.6.1") +bazel_dep(name = "googletest", version = "1.15.2") +bazel_dep(name = "rules_cc", version = "0.0.9") +bazel_dep(name = "rules_license", version = "0.0.7") +bazel_dep(name = "platforms", version = "0.0.10") diff --git a/g3doc/highway_intro.pdf b/g3doc/highway_intro.pdf index e051a2c631..dda64922b4 100644 Binary files a/g3doc/highway_intro.pdf and b/g3doc/highway_intro.pdf differ diff --git a/hwy/abort.cc b/hwy/abort.cc index 8273737335..a40ee59e6e 100644 --- a/hwy/abort.cc +++ b/hwy/abort.cc @@ -62,9 +62,10 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) // Now terminate the program: #if HWY_ARCH_RISCV exit(1); // trap/abort just freeze Spike. -#elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC +#elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC && !HWY_ARCH_ARM // Facilitates breaking into a debugger, but don't use this in non-debug // builds because it looks like "illegal instruction", which is misleading. + // Also does not work on Arm. __builtin_trap(); #else abort(); // Compile error without this due to HWY_NORETURN. diff --git a/hwy/abort_test.cc b/hwy/abort_test.cc index e75e060ee2..804ae06ace 100644 --- a/hwy/abort_test.cc +++ b/hwy/abort_test.cc @@ -13,14 +13,13 @@ #include "hwy/tests/test_util-inl.h" // HWY_ASSERT_EQ namespace hwy { +namespace { #ifdef GTEST_HAS_DEATH_TEST -namespace { std::string GetBaseName(std::string const& file_name) { auto last_slash = file_name.find_last_of("/\\"); return file_name.substr(last_slash + 1); } -} // namespace TEST(AbortDeathTest, AbortDefault) { std::string expected = std::string("Abort at ") + GetBaseName(__FILE__) + @@ -68,6 +67,7 @@ TEST(AbortTest, AbortOverrideChain) { HWY_ASSERT(GetAbortFunc() == nullptr); } +} // namespace } // namespace hwy HWY_TEST_MAIN(); diff --git a/hwy/aligned_allocator.cc b/hwy/aligned_allocator.cc index b88a64e24e..e857b2288f 100644 --- a/hwy/aligned_allocator.cc +++ b/hwy/aligned_allocator.cc @@ -27,7 +27,8 @@ namespace hwy { namespace { -#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 +#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \ + __riscv_v_intrinsic >= 11000 // Not actually an upper bound on the size, but this value prevents crossing a // 4K boundary (relevant on Andes). constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096); diff --git a/hwy/aligned_allocator.h b/hwy/aligned_allocator.h index 6274c5d192..e738c8be65 100644 --- a/hwy/aligned_allocator.h +++ b/hwy/aligned_allocator.h @@ -181,14 +181,14 @@ static inline constexpr size_t ShiftCount(size_t n) { template T* AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void* opaque_ptr) { - constexpr size_t size = sizeof(T); + constexpr size_t kSize = sizeof(T); - constexpr bool is_pow2 = (size & (size - 1)) == 0; - constexpr size_t bits = ShiftCount(size); - static_assert(!is_pow2 || (1ull << bits) == size, "ShiftCount is incorrect"); + constexpr bool kIsPow2 = (kSize & (kSize - 1)) == 0; + constexpr size_t kBits = ShiftCount(kSize); + static_assert(!kIsPow2 || (1ull << kBits) == kSize, "ShiftCount has a bug"); - const size_t bytes = is_pow2 ? items << bits : items * size; - const size_t check = is_pow2 ? bytes >> bits : bytes / size; + const size_t bytes = kIsPow2 ? items << kBits : items * kSize; + const size_t check = kIsPow2 ? bytes >> kBits : bytes / kSize; if (check != items) { return nullptr; // overflowed } diff --git a/hwy/aligned_allocator_test.cc b/hwy/aligned_allocator_test.cc index 83e77534a3..e35a8dc2ad 100644 --- a/hwy/aligned_allocator_test.cc +++ b/hwy/aligned_allocator_test.cc @@ -84,6 +84,7 @@ class FakeAllocator { } // namespace namespace hwy { +namespace { #if !HWY_TEST_STANDALONE class AlignedAllocatorTest : public testing::Test {}; @@ -283,8 +284,6 @@ TEST(AlignedAllocatorTest, TestDefaultInit) { (addr2 >> (kBits - 1)) >> (kBits - 1)); } -namespace { - using std::array; using std::vector; diff --git a/hwy/base.h b/hwy/base.h index ac7f061ca3..c993833dad 100644 --- a/hwy/base.h +++ b/hwy/base.h @@ -165,7 +165,8 @@ namespace hwy { // Returns a pointer whose type is `type` (T*), while allowing the compiler to // assume that the untyped pointer `ptr` is aligned to a multiple of sizeof(T). #define HWY_RCAST_ALIGNED(type, ptr) \ - reinterpret_cast(HWY_ASSUME_ALIGNED((ptr), alignof(RemovePtr))) + reinterpret_cast( \ + HWY_ASSUME_ALIGNED((ptr), alignof(hwy::RemovePtr))) // Clang and GCC require attributes on each function into which SIMD intrinsics // are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and diff --git a/hwy/base_test.cc b/hwy/base_test.cc index f9f1bc9f98..a22da3aa64 100644 --- a/hwy/base_test.cc +++ b/hwy/base_test.cc @@ -26,6 +26,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { HWY_NOINLINE void TestAllLimits() { HWY_ASSERT_EQ(uint8_t{0}, LimitsMin()); @@ -837,14 +838,15 @@ HWY_NOINLINE void TestAllSpecialFloat() { test(bfloat16_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(BaseTest); HWY_EXPORT_AND_TEST_P(BaseTest, TestAllLimits); HWY_EXPORT_AND_TEST_P(BaseTest, TestAllLowestHighest); @@ -858,6 +860,7 @@ HWY_EXPORT_AND_TEST_P(BaseTest, TestAllMul128); HWY_EXPORT_AND_TEST_P(BaseTest, TestAllEndian); HWY_EXPORT_AND_TEST_P(BaseTest, TestAllSpecialFloat); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/algo/copy_test.cc b/hwy/contrib/algo/copy_test.cc index 828a475844..054a01ce07 100644 --- a/hwy/contrib/algo/copy_test.cc +++ b/hwy/contrib/algo/copy_test.cc @@ -37,6 +37,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // Returns random integer in [0, 128), which fits in any lane type. template @@ -189,19 +190,21 @@ void TestAllCopyIf() { ForUI163264(ForPartialVectors>()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(CopyTest); HWY_EXPORT_AND_TEST_P(CopyTest, TestAllFill); HWY_EXPORT_AND_TEST_P(CopyTest, TestAllCopy); HWY_EXPORT_AND_TEST_P(CopyTest, TestAllCopyIf); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/algo/find_test.cc b/hwy/contrib/algo/find_test.cc index c6ebf4cb6d..8593b60e94 100644 --- a/hwy/contrib/algo/find_test.cc +++ b/hwy/contrib/algo/find_test.cc @@ -42,6 +42,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // Returns random number in [-8, 8] - we use knowledge of the range to Find() // values we know are not present. @@ -210,18 +211,20 @@ void TestAllFindIf() { ForAllTypes(ForPartialVectors>()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(FindTest); HWY_EXPORT_AND_TEST_P(FindTest, TestAllFind); HWY_EXPORT_AND_TEST_P(FindTest, TestAllFindIf); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/algo/transform_test.cc b/hwy/contrib/algo/transform_test.cc index aad8165f3c..fc4fd16a12 100644 --- a/hwy/contrib/algo/transform_test.cc +++ b/hwy/contrib/algo/transform_test.cc @@ -40,6 +40,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { constexpr double kAlpha = 1.5; // arbitrary scalar @@ -440,14 +441,15 @@ void TestAllReplace() { ForFloatTypes(ForPartialVectors>()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(TransformTest); HWY_EXPORT_AND_TEST_P(TransformTest, TestAllGenerate); HWY_EXPORT_AND_TEST_P(TransformTest, TestAllForeach); @@ -456,6 +458,7 @@ HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform1); HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform2); HWY_EXPORT_AND_TEST_P(TransformTest, TestAllReplace); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/bit_pack/bit_pack_test.cc b/hwy/contrib/bit_pack/bit_pack_test.cc index dd30b2b797..2a660b4750 100644 --- a/hwy/contrib/bit_pack/bit_pack_test.cc +++ b/hwy/contrib/bit_pack/bit_pack_test.cc @@ -48,6 +48,7 @@ size_t last_bits = 0; uint64_t best_target = ~0ull; #endif namespace HWY_NAMESPACE { +namespace { template T Random(RandomState& rng) { @@ -222,20 +223,22 @@ void TestAllPack64() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(BitPackTest); HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack8); HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack16); HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack32); HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack64); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/dot/dot_test.cc b/hwy/contrib/dot/dot_test.cc index 1493a7c304..b57eab7ce4 100644 --- a/hwy/contrib/dot/dot_test.cc +++ b/hwy/contrib/dot/dot_test.cc @@ -32,6 +32,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template HWY_NOINLINE T1 SimpleDot(const T1* pa, const T2* pb, size_t num) { @@ -269,19 +270,21 @@ void TestAllDotF32BF16() { // Both bf16. void TestAllDotBF16() { ForShrinkableVectors()(bfloat16_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(DotTest); HWY_EXPORT_AND_TEST_P(DotTest, TestAllDot); HWY_EXPORT_AND_TEST_P(DotTest, TestAllDotF32BF16); HWY_EXPORT_AND_TEST_P(DotTest, TestAllDotBF16); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/image/image_test.cc b/hwy/contrib/image/image_test.cc index 1e526471bc..aebbdcff87 100644 --- a/hwy/contrib/image/image_test.cc +++ b/hwy/contrib/image/image_test.cc @@ -30,6 +30,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // Ensure we can always write full aligned vectors. struct TestAlignedT { @@ -133,18 +134,20 @@ struct TestUnalignedT { void TestUnaligned() { ForUnsignedTypes(TestUnalignedT()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(ImageTest); HWY_EXPORT_AND_TEST_P(ImageTest, TestAligned); HWY_EXPORT_AND_TEST_P(ImageTest, TestUnaligned); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/math/math_test.cc b/hwy/contrib/math/math_test.cc index 1e04f2ee8f..6ac36a6724 100644 --- a/hwy/contrib/math/math_test.cc +++ b/hwy/contrib/math/math_test.cc @@ -34,6 +34,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // We have had test failures caused by excess precision due to keeping // intermediate results in 80-bit x87 registers. One such failure mode is that @@ -625,14 +626,15 @@ HWY_NOINLINE void TestAllHypot() { ForFloat3264Types(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMathTest); HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcos); HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcosh); @@ -656,6 +658,7 @@ HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinCosSin); HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinCosCos); HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllHypot); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/matvec/matvec_test.cc b/hwy/contrib/matvec/matvec_test.cc index 9984aab4c7..968fe24541 100644 --- a/hwy/contrib/matvec/matvec_test.cc +++ b/hwy/contrib/matvec/matvec_test.cc @@ -42,6 +42,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template HWY_NOINLINE void SimpleMatVecAdd(const MatT* HWY_RESTRICT mat, @@ -272,19 +273,21 @@ void TestAllMatVecBF16Both() { ForGEVectors<32, TestMatVecAdd>()(float()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(MatVecTest); HWY_EXPORT_AND_TEST_P(MatVecTest, TestAllMatVecAdd); HWY_EXPORT_AND_TEST_P(MatVecTest, TestAllMatVecBF16); HWY_EXPORT_AND_TEST_P(MatVecTest, TestAllMatVecBF16Both); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/contrib/random/random_test.cc b/hwy/contrib/random/random_test.cc index 44cdd30000..320c124b55 100644 --- a/hwy/contrib/random/random_test.cc +++ b/hwy/contrib/random/random_test.cc @@ -22,6 +22,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { // required: unique per target +namespace { constexpr std::uint64_t tests = 1UL << 10; @@ -287,15 +288,15 @@ void TestUniformCachedXorshiro() { } #endif +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy - HWY_AFTER_NAMESPACE(); // required if not using HWY_ATTR #if HWY_ONCE - -// This macro declares a static array used for dynamic dispatch. namespace hwy { +namespace { HWY_BEFORE_TEST(HwyRandomTest); HWY_EXPORT_AND_TEST_P(HwyRandomTest, TestSeeding); HWY_EXPORT_AND_TEST_P(HwyRandomTest, TestMultiThreadSeeding); @@ -310,6 +311,7 @@ HWY_EXPORT_AND_TEST_P(HwyRandomTest, TestNextFixedNUniformDist); HWY_EXPORT_AND_TEST_P(HwyRandomTest, TestUniformCachedXorshiro); #endif HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/contrib/sort/BUILD b/hwy/contrib/sort/BUILD index c5935f2cb0..7be34a480d 100644 --- a/hwy/contrib/sort/BUILD +++ b/hwy/contrib/sort/BUILD @@ -226,6 +226,8 @@ cc_test( ":vqsort_for_test", "//:hwy", "//:hwy_test_util", + "//:thread_pool", + "//:topology", ] + TEST_MAIN, ) diff --git a/hwy/contrib/sort/sort_test.cc b/hwy/contrib/sort/sort_test.cc index 66099d0b54..2d9f825a99 100644 --- a/hwy/contrib/sort/sort_test.cc +++ b/hwy/contrib/sort/sort_test.cc @@ -13,21 +13,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include // std::iota #include #include +#include "hwy/aligned_allocator.h" // IsAligned +#include "hwy/base.h" +#include "hwy/contrib/sort/vqsort.h" +#include "hwy/contrib/thread_pool/thread_pool.h" +#include "hwy/contrib/thread_pool/topology.h" +#include "hwy/per_target.h" + #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "hwy/contrib/sort/sort_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep -// After foreach_target -#include "hwy/aligned_allocator.h" // IsAligned +#include "hwy/highway.h" +// After highway.h #include "hwy/contrib/sort/algo-inl.h" #include "hwy/contrib/sort/result-inl.h" #include "hwy/contrib/sort/vqsort-inl.h" // BaseCase -#include "hwy/contrib/sort/vqsort.h" -#include "hwy/highway.h" #include "hwy/print-inl.h" #include "hwy/tests/test_util-inl.h" @@ -59,6 +66,39 @@ using detail::OrderDescendingKV128; using detail::Traits128; #endif // !HAVE_INTEL && HWY_TARGET != HWY_SCALAR +template +void TestSortIota(hwy::ThreadPool& pool) { + pool.Run(128, 300, [](uint64_t task, size_t /*thread*/) { + const size_t num = static_cast(task); + Key keys[300]; + std::iota(keys, keys + num, Key{0}); + VQSort(keys, num, hwy::SortAscending()); + for (size_t i = 0; i < num; ++i) { + if (keys[i] != static_cast(i)) { + HWY_ABORT("num %zu i %zu: not iota, got %.0f\n", num, i, + static_cast(keys[i])); + } + } + }); +} + +void TestAllSortIota() { +#if VQSORT_ENABLED + hwy::ThreadPool pool(hwy::HaveThreadingSupport() ? 4 : 0); + TestSortIota(pool); + TestSortIota(pool); + if (hwy::HaveInteger64()) { + TestSortIota(pool); + TestSortIota(pool); + } + TestSortIota(pool); + if (hwy::HaveFloat64()) { + TestSortIota(pool); + } + fprintf(stderr, "Iota OK\n"); +#endif +} + // Supports full/partial sort and select. template void TestAnySort(const std::vector& algos, size_t num_lanes) { @@ -229,13 +269,15 @@ void TestAllSelect() { HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(SortTest); +HWY_EXPORT_AND_TEST_P(SortTest, TestAllSortIota); HWY_EXPORT_AND_TEST_P(SortTest, TestAllSort); HWY_EXPORT_AND_TEST_P(SortTest, TestAllSelect); HWY_EXPORT_AND_TEST_P(SortTest, TestAllPartialSort); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/contrib/sort/sort_unit_test.cc b/hwy/contrib/sort/sort_unit_test.cc index d059cc6171..f7611cf5f8 100644 --- a/hwy/contrib/sort/sort_unit_test.cc +++ b/hwy/contrib/sort/sort_unit_test.cc @@ -18,7 +18,9 @@ #include #include +#include "hwy/aligned_allocator.h" // IsAligned #include "hwy/base.h" +#include "hwy/contrib/sort/vqsort.h" #include "hwy/detect_compiler_arch.h" // clang-format off @@ -26,14 +28,12 @@ #define HWY_TARGET_INCLUDE "hwy/contrib/sort/sort_unit_test.cc" // NOLINT // clang-format on #include "hwy/foreach_target.h" // IWYU pragma: keep -// After foreach_target -#include "hwy/aligned_allocator.h" // IsAligned +#include "hwy/highway.h" +// After highway.h #include "hwy/contrib/sort/algo-inl.h" #include "hwy/contrib/sort/result-inl.h" #include "hwy/contrib/sort/traits128-inl.h" #include "hwy/contrib/sort/vqsort-inl.h" // BaseCase -#include "hwy/contrib/sort/vqsort.h" -#include "hwy/highway.h" #include "hwy/print-inl.h" #include "hwy/tests/test_util-inl.h" @@ -558,8 +558,8 @@ static void TestAllGenerator() {} HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(SortTest); HWY_EXPORT_AND_TEST_P(SortTest, TestAllFloatLargerSmaller); HWY_EXPORT_AND_TEST_P(SortTest, TestAllFloatInf); @@ -568,6 +568,7 @@ HWY_EXPORT_AND_TEST_P(SortTest, TestAllBaseCase); HWY_EXPORT_AND_TEST_P(SortTest, TestAllPartition); HWY_EXPORT_AND_TEST_P(SortTest, TestAllGenerator); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/contrib/thread_pool/thread_pool.h b/hwy/contrib/thread_pool/thread_pool.h index 6ae3e42715..b12211b0c5 100644 --- a/hwy/contrib/thread_pool/thread_pool.h +++ b/hwy/contrib/thread_pool/thread_pool.h @@ -26,6 +26,7 @@ #include // snprintf #include +#include #include //NOLINT // IWYU pragma: end_exports @@ -344,8 +345,8 @@ struct alignas(HWY_ALIGNMENT) PoolMem { class PoolMemOwner { public: explicit PoolMemOwner(size_t num_threads) - // There is at least one worker, the main thread. - : num_workers_(HWY_MAX(num_threads, size_t{1})) { + // The main thread also participates. + : num_workers_(num_threads + 1) { const size_t size = sizeof(PoolMem) + num_workers_ * sizeof(PoolWorker); bytes_ = hwy::AllocateAligned(size); HWY_ASSERT(bytes_); diff --git a/hwy/contrib/thread_pool/thread_pool_test.cc b/hwy/contrib/thread_pool/thread_pool_test.cc index ecf7726878..cc846d70f2 100644 --- a/hwy/contrib/thread_pool/thread_pool_test.cc +++ b/hwy/contrib/thread_pool/thread_pool_test.cc @@ -374,7 +374,7 @@ TEST(ThreadPoolTest, TestCounter) { ThreadPool pool(kNumThreads); for (PoolWaitMode mode : {PoolWaitMode::kSpin, PoolWaitMode::kBlock}) { pool.SetWaitMode(mode); - alignas(128) Counter counters[kNumThreads]; + alignas(128) Counter counters[1+kNumThreads]; const uint64_t kNumTasks = kNumThreads * 19; pool.Run(0, kNumTasks, @@ -387,7 +387,7 @@ TEST(ThreadPoolTest, TestCounter) { expected += i; } - for (size_t i = 1; i < kNumThreads; ++i) { + for (size_t i = 1; i < pool.NumWorkers(); ++i) { counters[0].Assimilate(counters[i]); } HWY_ASSERT_EQ(expected, counters[0].counter.load()); diff --git a/hwy/contrib/thread_pool/topology.cc b/hwy/contrib/thread_pool/topology.cc index 4afd26855d..cb34c2700e 100644 --- a/hwy/contrib/thread_pool/topology.cc +++ b/hwy/contrib/thread_pool/topology.cc @@ -18,9 +18,9 @@ #include #include #include +#include // strchr #include -#include // NOLINT #include #include "hwy/detect_compiler_arch.h" // HWY_OS_WIN @@ -39,10 +39,13 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include +#include #include #include +#include #include -#include +#include // sysconf #endif // HWY_OS_LINUX || HWY_OS_FREEBSD #if HWY_OS_FREEBSD @@ -54,12 +57,6 @@ #include #endif -#if HWY_OS_LINUX -#include -#include -#include -#endif // HWY_OS_LINUX - #include "hwy/base.h" namespace hwy { @@ -77,27 +74,39 @@ HWY_CONTRIB_DLLEXPORT size_t TotalLogicalProcessors() { #if HWY_ARCH_WASM const int num_cores = emscripten_num_logical_cores(); if (num_cores > 0) lp = static_cast(num_cores); -#else - const unsigned concurrency = std::thread::hardware_concurrency(); - if (concurrency != 0) lp = static_cast(concurrency); +#elif HWY_OS_WIN + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); // always succeeds + // WARNING: this is only for the current group, hence limited to 64. + lp = static_cast(sysinfo.dwNumberOfProcessors); +#elif HWY_OS_LINUX + // Use configured, not "online" (_SC_NPROCESSORS_ONLN), because we want an + // upper bound. + const long ret = sysconf(_SC_NPROCESSORS_CONF); // NOLINT(runtime/int) + if (ret < 0) { + fprintf(stderr, "Unexpected value of _SC_NPROCESSORS_CONF: %d\n", + static_cast(ret)); + } else { + lp = static_cast(ret); + } #endif - // WASM or C++ stdlib failed to detect #CPUs. - if (lp == 0) { - if (HWY_IS_DEBUG_BUILD) { - fprintf( - stderr, - "Unknown TotalLogicalProcessors. HWY_OS_: WIN=%d LINUX=%d APPLE=%d;\n" - "HWY_ARCH_: WASM=%d X86=%d PPC=%d ARM=%d RISCV=%d S390X=%d\n", - HWY_OS_WIN, HWY_OS_LINUX, HWY_OS_APPLE, HWY_ARCH_WASM, HWY_ARCH_X86, - HWY_ARCH_PPC, HWY_ARCH_ARM, HWY_ARCH_RISCV, HWY_ARCH_S390X); + if (HWY_UNLIKELY(lp == 0)) { // Failed to detect. + HWY_IF_CONSTEXPR(HWY_IS_DEBUG_BUILD) { + fprintf(stderr, + "Unknown TotalLogicalProcessors, assuming 1. " + "HWY_OS_: WIN=%d LINUX=%d APPLE=%d;\n" + "HWY_ARCH_: WASM=%d X86=%d PPC=%d ARM=%d RISCV=%d S390X=%d\n", + HWY_OS_WIN, HWY_OS_LINUX, HWY_OS_APPLE, HWY_ARCH_WASM, + HWY_ARCH_X86, HWY_ARCH_PPC, HWY_ARCH_ARM, HWY_ARCH_RISCV, + HWY_ARCH_S390X); } return 1; } // Warn that we are clamping. - if (lp > kMaxLogicalProcessors) { - if (HWY_IS_DEBUG_BUILD) { + if (HWY_UNLIKELY(lp > kMaxLogicalProcessors)) { + HWY_IF_CONSTEXPR(HWY_IS_DEBUG_BUILD) { fprintf(stderr, "OS reports %zu processors but clamping to %zu\n", lp, kMaxLogicalProcessors); } @@ -281,39 +290,60 @@ class File { int fd_; }; -// Interprets as base-10 ASCII, handling an K or M suffix if present. -bool ParseSysfs(const char* str, size_t len, size_t* out) { - size_t value = 0; +// Returns bytes read, or 0 on failure. +size_t ReadSysfs(const char* format, size_t lp, char* buf200) { + char path[200]; + const int bytes_written = snprintf(path, sizeof(path), format, lp); + HWY_ASSERT(0 < bytes_written && + bytes_written < static_cast(sizeof(path) - 1)); + + const File file(path); + return file.Read(buf200); +} + +// Interprets [str + pos, str + end) as base-10 ASCII. Stops when any non-digit +// is found, or at end. Returns false if no digits found. +bool ParseDigits(const char* str, const size_t end, size_t& pos, size_t* out) { + HWY_ASSERT(pos <= end); // 9 digits cannot overflow even 32-bit size_t. - size_t pos = 0; - for (; pos < HWY_MIN(len, 9); ++pos) { + const size_t stop = pos + 9; + *out = 0; + for (; pos < HWY_MIN(end, stop); ++pos) { const int c = str[pos]; if (c < '0' || c > '9') break; - value *= 10; - value += static_cast(c - '0'); + *out *= 10; + *out += static_cast(c - '0'); } if (pos == 0) { // No digits found *out = 0; return false; } - if (str[pos] == 'K') value <<= 10; - if (str[pos] == 'M') value <<= 20; - *out = value; return true; } -bool ReadSysfs(const char* format, size_t lp, size_t* out) { - char path[200]; - const int bytes_written = snprintf(path, sizeof(path), format, lp); - HWY_ASSERT(0 < bytes_written && - bytes_written < static_cast(sizeof(path) - 1)); +// Number, plus optional K or M suffix, plus terminator. +bool ParseNumberWithOptionalSuffix(const char* str, size_t len, size_t* out) { + size_t pos = 0; + if (!ParseDigits(str, len, pos, out)) return false; + if (str[pos] == 'K') { + *out <<= 10; + ++pos; + } + if (str[pos] == 'M') { + *out <<= 20; + ++pos; + } + if (str[pos] != '\0' && str[pos] != '\n') { + HWY_ABORT("Expected [suffix] terminator at %zu %s\n", pos, str); + } + return true; +} - const File file(path); +bool ReadNumberWithOptionalSuffix(const char* format, size_t lp, size_t* out) { char buf200[200]; - const size_t pos = file.Read(buf200); + const size_t pos = ReadSysfs(format, lp, buf200); if (pos == 0) return false; - - return ParseSysfs(buf200, pos, out); + return ParseNumberWithOptionalSuffix(buf200, pos, out); } const char* kPackage = @@ -322,6 +352,7 @@ const char* kCluster = "/sys/devices/system/cpu/cpu%zu/cache/index3/id"; const char* kCore = "/sys/devices/system/cpu/cpu%zu/topology/core_id"; const char* kL2Size = "/sys/devices/system/cpu/cpu%zu/cache/index2/size"; const char* kL3Size = "/sys/devices/system/cpu/cpu%zu/cache/index3/size"; +const char* kNode = "/sys/devices/system/node/node%zu/cpulist"; // sysfs values can be arbitrarily large, so store in a map and replace with // indices in order of appearance. @@ -332,7 +363,7 @@ class Remapper { template bool operator()(const char* format, size_t lp, T* HWY_RESTRICT out_index) { size_t opaque; - if (!ReadSysfs(format, lp, &opaque)) return false; + if (!ReadNumberWithOptionalSuffix(format, lp, &opaque)) return false; const auto ib = indices_.insert({opaque, num_}); num_ += ib.second; // increment if inserted @@ -382,6 +413,77 @@ std::vector DetectPackages(std::vector& lps) { return per_package; } +// Sets LP.node for all `lps`. +void SetNodes(std::vector& lps) { + // For each NUMA node found via sysfs: + for (size_t node = 0;; node++) { + // Read its cpulist so we can scatter `node` to all its `lps`. + char buf200[200]; + const size_t bytes_read = ReadSysfs(kNode, node, buf200); + if (bytes_read == 0) break; + + constexpr size_t kNotFound = ~size_t{0}; + size_t pos = 0; + + // Returns first `found_pos >= pos` where `buf200[found_pos] == c`, or + // `kNotFound`. + const auto find = [buf200, &pos](char c) -> size_t { + const char* found_ptr = strchr(buf200 + pos, c); + if (found_ptr == nullptr) return kNotFound; + HWY_ASSERT(found_ptr >= buf200); + const size_t found_pos = static_cast(found_ptr - buf200); + HWY_ASSERT(found_pos >= pos && buf200[found_pos] == c); + return found_pos; + }; + + // Reads LP number and advances `pos`. `end` is for verifying we did not + // read past a known terminator, or the end of string. + const auto parse_lp = [buf200, bytes_read, &pos, + &lps](size_t end) -> size_t { + end = HWY_MIN(end, bytes_read); + size_t lp; + HWY_ASSERT(ParseDigits(buf200, end, pos, &lp)); + HWY_IF_CONSTEXPR(HWY_ARCH_RISCV) { + // On RISC-V, both TotalLogicalProcessors and GetThreadAffinity may + // under-report the count, hence clamp. + lp = HWY_MIN(lp, lps.size() - 1); + } + HWY_ASSERT(lp < lps.size()); + HWY_ASSERT(pos <= end); + return lp; + }; + + // Parse all [first-]last separated by commas. + for (;;) { + // Single number or first of range: ends with dash, comma, or end. + const size_t lp_range_first = parse_lp(HWY_MIN(find('-'), find(','))); + + if (buf200[pos] == '-') { // range + ++pos; // skip dash + // Last of range ends with comma or end. + const size_t lp_range_last = parse_lp(find(',')); + + for (size_t lp = lp_range_first; lp <= lp_range_last; ++lp) { + lps[lp].node = static_cast(node); + } + } else { // single number + lps[lp_range_first].node = static_cast(node); + } + + // Done if reached end of string. + if (pos == bytes_read || buf200[pos] == '\0' || buf200[pos] == '\n') { + break; + } + // Comma means at least one more term is coming. + if (buf200[pos] == ',') { + ++pos; + continue; + } + HWY_ABORT("Unexpected character at %zu in %s\n", pos, buf200); + } // for pos + } // for node +} + } // namespace #endif // HWY_OS_LINUX @@ -390,6 +492,7 @@ HWY_CONTRIB_DLLEXPORT Topology::Topology() { lps.resize(TotalLogicalProcessors()); const std::vector& per_package = DetectPackages(lps); if (per_package.empty()) return; + SetNodes(lps); // Allocate per-package/cluster/core vectors. This indicates to callers that // detection succeeded. @@ -413,10 +516,10 @@ HWY_CONTRIB_DLLEXPORT Topology::Topology() { Cluster& c = p.clusters[ic]; const size_t lp = c.lps.First(); size_t bytes; - if (ReadSysfs(kL2Size, lp, &bytes)) { + if (ReadNumberWithOptionalSuffix(kL2Size, lp, &bytes)) { c.private_kib = bytes >> 10; } - if (ReadSysfs(kL3Size, lp, &bytes)) { + if (ReadNumberWithOptionalSuffix(kL3Size, lp, &bytes)) { c.shared_kib = bytes >> 10; } } diff --git a/hwy/contrib/thread_pool/topology.h b/hwy/contrib/thread_pool/topology.h index f80fc47c06..499b013c92 100644 --- a/hwy/contrib/thread_pool/topology.h +++ b/hwy/contrib/thread_pool/topology.h @@ -95,9 +95,9 @@ struct Topology { uint16_t core = 0; // < packages[package].cores.size() uint8_t package = 0; // < packages.size() uint8_t smt = 0; // < packages[package].cores[core].lps.Count() + uint8_t node = 0; - uint8_t reserved1 = 0; - uint8_t reserved2 = 0; + uint8_t reserved = 0; }; #pragma pack(pop) std::vector lps; // size() == TotalLogicalProcessors(). diff --git a/hwy/contrib/thread_pool/topology_test.cc b/hwy/contrib/thread_pool/topology_test.cc index 89f1b56e90..e1f4409f3a 100644 --- a/hwy/contrib/thread_pool/topology_test.cc +++ b/hwy/contrib/thread_pool/topology_test.cc @@ -49,6 +49,14 @@ TEST(TopologyTest, TestTopology) { if (topology.packages.empty()) return; HWY_ASSERT(!topology.lps.empty()); + LogicalProcessorSet nodes; + for (size_t lp = 0; lp < topology.lps.size(); ++lp) { + const size_t node = static_cast(topology.lps[lp].node); + if (!nodes.Get(node)) { + fprintf(stderr, "Found NUMA node %zu, LP %zu\n", node, lp); + nodes.Set(node); + } + } size_t lps_by_cluster = 0; size_t lps_by_core = 0; diff --git a/hwy/contrib/unroller/unroller_test.cc b/hwy/contrib/unroller/unroller_test.cc index 31a0bbabe6..e3c73e648b 100644 --- a/hwy/contrib/unroller/unroller_test.cc +++ b/hwy/contrib/unroller/unroller_test.cc @@ -30,6 +30,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template T SimpleDot(const T* pa, const T* pb, size_t num) { @@ -472,18 +473,20 @@ struct TestFind { void TestAllFind() { ForFloatTypes(ForPartialVectors()); } +} // namespace } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(UnrollerTest); HWY_EXPORT_AND_TEST_P(UnrollerTest, TestAllDot); HWY_EXPORT_AND_TEST_P(UnrollerTest, TestAllConvert); HWY_EXPORT_AND_TEST_P(UnrollerTest, TestAllFind); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/detect_targets.h b/hwy/detect_targets.h index a8d4a13f7c..175efee2dd 100644 --- a/hwy/detect_targets.h +++ b/hwy/detect_targets.h @@ -223,7 +223,7 @@ #endif // SVE[2] require recent clang or gcc versions. -#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \ +#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1900) || \ (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000) #define HWY_BROKEN_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128) #else @@ -265,6 +265,15 @@ #define HWY_BROKEN_PPC_32BIT 0 #endif +// HWY_RVV fails to compile with GCC < 13 or Clang < 16. +#if HWY_ARCH_RISCV && \ + ((HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1600) || \ + (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1300)) +#define HWY_BROKEN_RVV (HWY_RVV) +#else +#define HWY_BROKEN_RVV 0 +#endif + // Allow the user to override this without any guarantee of success. #ifndef HWY_BROKEN_TARGETS @@ -273,7 +282,7 @@ HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \ HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \ HWY_BROKEN_NEON_BF16 | HWY_BROKEN_SVE | HWY_BROKEN_PPC10 | \ - HWY_BROKEN_PPC_32BIT) + HWY_BROKEN_PPC_32BIT | HWY_BROKEN_RVV) #endif // HWY_BROKEN_TARGETS @@ -738,7 +747,7 @@ #elif HWY_ARCH_S390X #define HWY_ATTAINABLE_TARGETS \ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X) -#elif HWY_ARCH_RVV +#elif HWY_ARCH_RISCV #define HWY_ATTAINABLE_TARGETS \ HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_RISCV) #else diff --git a/hwy/examples/skeleton_test.cc b/hwy/examples/skeleton_test.cc index 35fa450ee8..16a197f4ab 100644 --- a/hwy/examples/skeleton_test.cc +++ b/hwy/examples/skeleton_test.cc @@ -36,6 +36,7 @@ HWY_BEFORE_NAMESPACE(); namespace skeleton { namespace HWY_NAMESPACE { +namespace { namespace hn = hwy::HWY_NAMESPACE; @@ -129,6 +130,7 @@ HWY_NOINLINE void TestAllSumMulAdd() { hn::ForFloatTypes(hn::ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace skeleton @@ -136,9 +138,12 @@ HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace skeleton { +namespace { HWY_BEFORE_TEST(SkeletonTest); HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllFloorLog2); HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllSumMulAdd); HWY_AFTER_TEST(); +} // namespace } // namespace skeleton -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/highway_test.cc b/hwy/highway_test.cc index 140264d90e..9ec94517f6 100644 --- a/hwy/highway_test.cc +++ b/hwy/highway_test.cc @@ -28,6 +28,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template HWY_NOINLINE void TestCappedLimit(T /* tag */) { @@ -577,14 +578,15 @@ HWY_NOINLINE void TestAllBlockDFromD() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HighwayTest); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCapped); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllMaxLanes); @@ -602,6 +604,7 @@ HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllDFromV); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllBlocks); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllBlockDFromD); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/ops/arm_neon-inl.h b/hwy/ops/arm_neon-inl.h index 6d80daabf4..1cf0b4a4a4 100644 --- a/hwy/ops/arm_neon-inl.h +++ b/hwy/ops/arm_neon-inl.h @@ -879,10 +879,10 @@ using Vec16 = Vec128; // FF..FF or 0. template class Mask128 { + public: // Arm C Language Extensions return and expect unsigned type. using Raw = typename detail::Raw128, N>::type; - public: using PrivateT = T; // only for DFromM static constexpr size_t kPrivateN = N; // only for DFromM @@ -5335,10 +5335,62 @@ HWY_API Vec128 NearestInt(const Vec128 v) { #endif // ------------------------------ Floating-point classification + +#if !HWY_COMPILER_CLANG || HWY_COMPILER_CLANG > 1801 || HWY_ARCH_ARM_V7 template HWY_API Mask128 IsNaN(const Vec128 v) { return v != v; } +#else +// Clang up to 18.1 generates less efficient code than the expected FCMEQ, see +// https://github.com/numpy/numpy/issues/27313 and +// https://github.com/numpy/numpy/pull/22954/files and +// https://github.com/llvm/llvm-project/issues/59855 + +#if HWY_HAVE_FLOAT16 +template +HWY_API Mask128 IsNaN(const Vec128 v) { + typename Mask128::Raw ret; + __asm__ volatile("fcmeq %0.8h, %1.8h, %1.8h" : "=w"(ret) : "w"(v.raw)); + return Not(Mask128(ret)); +} +template +HWY_API Mask128 IsNaN(const Vec128 v) { + typename Mask128::Raw ret; + __asm__ volatile("fcmeq %0.4h, %1.4h, %1.4h" : "=w"(ret) : "w"(v.raw)); + return Not(Mask128(ret)); +} +#endif // HWY_HAVE_FLOAT16 + +template +HWY_API Mask128 IsNaN(const Vec128 v) { + typename Mask128::Raw ret; + __asm__ volatile("fcmeq %0.4s, %1.4s, %1.4s" : "=w"(ret) : "w"(v.raw)); + return Not(Mask128(ret)); +} +template +HWY_API Mask128 IsNaN(const Vec128 v) { + typename Mask128::Raw ret; + __asm__ volatile("fcmeq %0.2s, %1.2s, %1.2s" : "=w"(ret) : "w"(v.raw)); + return Not(Mask128(ret)); +} + +#if HWY_HAVE_FLOAT64 +template +HWY_API Mask128 IsNaN(const Vec128 v) { + typename Mask128::Raw ret; + __asm__ volatile("fcmeq %0.2d, %1.2d, %1.2d" : "=w"(ret) : "w"(v.raw)); + return Not(Mask128(ret)); +} +template +HWY_API Mask128 IsNaN(const Vec128 v) { + typename Mask128::Raw ret; + __asm__ volatile("fcmeq %d0, %d1, %d1" : "=w"(ret) : "w"(v.raw)); + return Not(Mask128(ret)); +} +#endif // HWY_HAVE_FLOAT64 + +#endif // HWY_COMPILER_CLANG // ================================================== SWIZZLE diff --git a/hwy/ops/ppc_vsx-inl.h b/hwy/ops/ppc_vsx-inl.h index 1ee9c9b57d..d216c54853 100644 --- a/hwy/ops/ppc_vsx-inl.h +++ b/hwy/ops/ppc_vsx-inl.h @@ -4691,7 +4691,7 @@ HWY_API VFromD ConvertTo(D /* tag */, template HWY_API VFromD ConvertTo(D /* tag */, Vec128().MaxLanes()> v) { -#if defined(__OPTIMIZE__) +#if defined(__OPTIMIZE__) && (!HWY_COMPILER_CLANG || !HWY_S390X_HAVE_Z14) if (detail::IsConstantRawAltivecVect(v.raw)) { constexpr int64_t kMinI64 = LimitsMin(); constexpr int64_t kMaxI64 = LimitsMax(); @@ -4790,7 +4790,7 @@ HWY_API VFromD ConvertTo(D /* tag */, HWY_DIAGNOSTICS_OFF(disable : 5219, ignored "-Wdeprecate-lax-vec-conv-all") #endif -#if defined(__OPTIMIZE__) +#if defined(__OPTIMIZE__) && (!HWY_COMPILER_CLANG || !HWY_S390X_HAVE_Z14) if (detail::IsConstantRawAltivecVect(v.raw)) { constexpr uint64_t kMaxU64 = LimitsMax(); return Dup128VecFromValues( @@ -6255,9 +6255,16 @@ HWY_INLINE V Per128BitBlkRevLanesOnBe(V v) { template HWY_INLINE V I128Subtract(V a, V b) { #if HWY_S390X_HAVE_Z14 +#if HWY_COMPILER_CLANG + // Workaround for bug in vec_sub_u128 in Clang vecintrin.h + typedef __uint128_t VU128 __attribute__((__vector_size__(16))); + const V diff_i128{reinterpret_cast>::type>( + reinterpret_cast(a.raw) - reinterpret_cast(b.raw))}; +#else // !HWY_COMPILER_CLANG const V diff_i128{reinterpret_cast>::type>( vec_sub_u128(reinterpret_cast<__vector unsigned char>(a.raw), reinterpret_cast<__vector unsigned char>(b.raw)))}; +#endif // HWY_COMPILER_CLANG #elif defined(__SIZEOF_INT128__) using VU128 = __vector unsigned __int128; const V diff_i128{reinterpret_cast>::type>( @@ -6752,6 +6759,26 @@ HWY_INLINE VFromD>> SumsOf4( #if HWY_S390X_HAVE_Z14 namespace detail { +#if HWY_COMPILER_CLANG && HWY_HAS_BUILTIN(__builtin_s390_vsumqf) && \ + HWY_HAS_BUILTIN(__builtin_s390_vsumqg) +// Workaround for bug in vec_sum_u128 in Clang vecintrin.h +template +HWY_INLINE Vec128 SumOfU32OrU64LanesAsU128(Vec128 v) { + typedef __uint128_t VU128 __attribute__((__vector_size__(16))); + const DFromV d; + const RebindToUnsigned du; + const VU128 sum = {__builtin_s390_vsumqf(BitCast(du, v).raw, Zero(du).raw)}; + return Vec128{reinterpret_cast::type>(sum)}; +} +template +HWY_INLINE Vec128 SumOfU32OrU64LanesAsU128(Vec128 v) { + typedef __uint128_t VU128 __attribute__((__vector_size__(16))); + const DFromV d; + const RebindToUnsigned du; + const VU128 sum = {__builtin_s390_vsumqg(BitCast(du, v).raw, Zero(du).raw)}; + return Vec128{reinterpret_cast::type>(sum)}; +} +#else template HWY_INLINE Vec128 SumOfU32OrU64LanesAsU128(Vec128 v) { @@ -6760,6 +6787,7 @@ HWY_INLINE Vec128 SumOfU32OrU64LanesAsU128(Vec128 v) { return BitCast( d, Vec128{vec_sum_u128(BitCast(du, v).raw, Zero(du).raw)}); } +#endif } // namespace detail diff --git a/hwy/ops/rvv-inl.h b/hwy/ops/rvv-inl.h index fecc8bc1f5..e65602c664 100644 --- a/hwy/ops/rvv-inl.h +++ b/hwy/ops/rvv-inl.h @@ -5278,6 +5278,12 @@ template constexpr unsigned MaxMaskBits() { return ~0u; } + +template +constexpr int SufficientPow2ForMask() { + return HWY_MAX( + D().Pow2() - 3 - static_cast(FloorLog2(sizeof(TFromD))), -3); +} } // namespace detail template @@ -5304,11 +5310,13 @@ HWY_API MFromD Dup128MaskFromMaskBits(D d, unsigned mask_bits) { template HWY_API MFromD Dup128MaskFromMaskBits(D d, unsigned mask_bits) { #if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 - const ScalableTag du8; - const ScalableTag du16; + const ScalableTag()> du8; + const ScalableTag()> du16; // There are exactly 16 mask bits for 128 vector bits of 8-bit lanes. return detail::U8MaskBitsVecToMask( - d, BitCast(du8, Set(du16, static_cast(mask_bits)))); + d, detail::ChangeLMUL( + ScalableTag(), + BitCast(du8, Set(du16, static_cast(mask_bits))))); #else // Slow fallback for completeness; the above bits to mask cast is preferred. const RebindToUnsigned du8; @@ -5335,10 +5343,11 @@ HWY_API MFromD Dup128MaskFromMaskBits(D d, unsigned mask_bits) { if (kN < 8) mask_bits &= detail::MaxMaskBits(); #if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 - const ScalableTag du8; + const ScalableTag()> du8; // There are exactly 8 mask bits for 128 vector bits of 16-bit lanes. - return detail::U8MaskBitsVecToMask(d, - Set(du8, static_cast(mask_bits))); + return detail::U8MaskBitsVecToMask( + d, detail::ChangeLMUL(ScalableTag(), + Set(du8, static_cast(mask_bits)))); #else // Slow fallback for completeness; the above bits to mask cast is preferred. const RebindToUnsigned du; @@ -5354,9 +5363,10 @@ HWY_API MFromD Dup128MaskFromMaskBits(D d, unsigned mask_bits) { if (kN < 4) mask_bits &= detail::MaxMaskBits(); #if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 - const ScalableTag du8; + const ScalableTag()> du8; return detail::U8MaskBitsVecToMask( - d, Set(du8, static_cast(mask_bits * 0x11))); + d, detail::ChangeLMUL(ScalableTag(), + Set(du8, static_cast(mask_bits * 0x11)))); #else // Slow fallback for completeness; the above bits to mask cast is preferred. const RebindToUnsigned du; @@ -5371,9 +5381,10 @@ HWY_API MFromD Dup128MaskFromMaskBits(D d, unsigned mask_bits) { if (kN < 2) mask_bits &= detail::MaxMaskBits(); #if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 - const ScalableTag du8; + const ScalableTag()> du8; return detail::U8MaskBitsVecToMask( - d, Set(du8, static_cast(mask_bits * 0x55))); + d, detail::ChangeLMUL(ScalableTag(), + Set(du8, static_cast(mask_bits * 0x55)))); #else // Slow fallback for completeness; the above bits to mask cast is preferred. const RebindToUnsigned du; @@ -6017,6 +6028,23 @@ HWY_INLINE MFromD Lt128(D d, const VFromD a, const VFromD b) { #endif // HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 // ------------------------------ Lt128Upper +#if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 + +template +HWY_INLINE MFromD Lt128Upper(D d, const VFromD a, const VFromD b) { + static_assert(IsSame, uint64_t>(), "D must be u64"); + auto du8mf8 = ScalableTag{}; + const vuint8mf8_t ltHL = + detail::ChangeLMUL(du8mf8, detail::MaskToU8MaskBitsVec(Lt(a, b))); + const vuint8mf8_t ltHx = detail::AndS(ltHL, 0xaa); + const vuint8mf8_t ltxL = ShiftRight<1>(ltHx); + auto du8m1 = ScalableTag{}; + return detail::U8MaskBitsVecToMask(d, + detail::ChangeLMUL(du8m1, Or(ltHx, ltxL))); +} + +#else + template HWY_INLINE MFromD Lt128Upper(D d, const VFromD a, const VFromD b) { static_assert(IsSame, uint64_t>(), "D must be u64"); @@ -6028,7 +6056,27 @@ HWY_INLINE MFromD Lt128Upper(D d, const VFromD a, const VFromD b) { return MaskFromVec(OddEven(ltHL, down)); } +#endif // HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 + // ------------------------------ Eq128 +#if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 + +template +HWY_INLINE MFromD Eq128(D d, const VFromD a, const VFromD b) { + static_assert(IsSame, uint64_t>(), "D must be u64"); + auto du8mf8 = ScalableTag{}; + const vuint8mf8_t eqHL = + detail::ChangeLMUL(du8mf8, detail::MaskToU8MaskBitsVec(Eq(a, b))); + const vuint8mf8_t eqxH = ShiftRight<1>(eqHL); + const vuint8mf8_t result0L = detail::AndS(And(eqHL, eqxH), 0x55); + const vuint8mf8_t resultH0 = Add(result0L, result0L); + auto du8m1 = ScalableTag{}; + return detail::U8MaskBitsVecToMask( + d, detail::ChangeLMUL(du8m1, Or(result0L, resultH0))); +} + +#else + template HWY_INLINE MFromD Eq128(D d, const VFromD a, const VFromD b) { static_assert(IsSame, uint64_t>(), "D must be u64"); @@ -6040,7 +6088,26 @@ HWY_INLINE MFromD Eq128(D d, const VFromD a, const VFromD b) { return MaskFromVec(eq); } +#endif + // ------------------------------ Eq128Upper +#if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 + +template +HWY_INLINE MFromD Eq128Upper(D d, const VFromD a, const VFromD b) { + static_assert(IsSame, uint64_t>(), "D must be u64"); + auto du8mf8 = ScalableTag{}; + const vuint8mf8_t eqHL = + detail::ChangeLMUL(du8mf8, detail::MaskToU8MaskBitsVec(Eq(a, b))); + const vuint8mf8_t eqHx = detail::AndS(eqHL, 0xaa); + const vuint8mf8_t eqxL = ShiftRight<1>(eqHx); + auto du8m1 = ScalableTag{}; + return detail::U8MaskBitsVecToMask(d, + detail::ChangeLMUL(du8m1, Or(eqHx, eqxL))); +} + +#else + template HWY_INLINE MFromD Eq128Upper(D d, const VFromD a, const VFromD b) { static_assert(IsSame, uint64_t>(), "D must be u64"); @@ -6049,7 +6116,27 @@ HWY_INLINE MFromD Eq128Upper(D d, const VFromD a, const VFromD b) { return MaskFromVec(OddEven(eqHL, detail::Slide1Down(eqHL))); } +#endif + // ------------------------------ Ne128 +#if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 + +template +HWY_INLINE MFromD Ne128(D d, const VFromD a, const VFromD b) { + static_assert(IsSame, uint64_t>(), "D must be u64"); + auto du8mf8 = ScalableTag{}; + const vuint8mf8_t neHL = + detail::ChangeLMUL(du8mf8, detail::MaskToU8MaskBitsVec(Ne(a, b))); + const vuint8mf8_t nexH = ShiftRight<1>(neHL); + const vuint8mf8_t result0L = detail::AndS(Or(neHL, nexH), 0x55); + const vuint8mf8_t resultH0 = Add(result0L, result0L); + auto du8m1 = ScalableTag{}; + return detail::U8MaskBitsVecToMask( + d, detail::ChangeLMUL(du8m1, Or(result0L, resultH0))); +} + +#else + template HWY_INLINE MFromD Ne128(D d, const VFromD a, const VFromD b) { static_assert(IsSame, uint64_t>(), "D must be u64"); @@ -6060,7 +6147,26 @@ HWY_INLINE MFromD Ne128(D d, const VFromD a, const VFromD b) { return MaskFromVec(Or(neHL, neLH)); } +#endif + // ------------------------------ Ne128Upper +#if HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400 + +template +HWY_INLINE MFromD Ne128Upper(D d, const VFromD a, const VFromD b) { + static_assert(IsSame, uint64_t>(), "D must be u64"); + auto du8mf8 = ScalableTag{}; + const vuint8mf8_t neHL = + detail::ChangeLMUL(du8mf8, detail::MaskToU8MaskBitsVec(Ne(a, b))); + const vuint8mf8_t neHx = detail::AndS(neHL, 0xaa); + const vuint8mf8_t nexL = ShiftRight<1>(neHx); + auto du8m1 = ScalableTag{}; + return detail::U8MaskBitsVecToMask(d, + detail::ChangeLMUL(du8m1, Or(neHx, nexL))); +} + +#else + template HWY_INLINE MFromD Ne128Upper(D d, const VFromD a, const VFromD b) { static_assert(IsSame, uint64_t>(), "D must be u64"); @@ -6072,6 +6178,8 @@ HWY_INLINE MFromD Ne128Upper(D d, const VFromD a, const VFromD b) { return MaskFromVec(OddEven(neHL, down)); } +#endif + // ------------------------------ Min128, Max128 (Lt128) template diff --git a/hwy/per_target.cc b/hwy/per_target.cc index f1a83d4964..4f9de2e3a9 100644 --- a/hwy/per_target.cc +++ b/hwy/per_target.cc @@ -32,6 +32,7 @@ namespace hwy { namespace HWY_NAMESPACE { int64_t GetTarget() { return HWY_TARGET; } size_t GetVectorBytes() { return Lanes(ScalableTag()); } +bool GetHaveInteger64() { return HWY_HAVE_INTEGER64 != 0; } bool GetHaveFloat16() { return HWY_HAVE_FLOAT16 != 0; } bool GetHaveFloat64() { return HWY_HAVE_FLOAT64 != 0; } // NOLINTNEXTLINE(google-readability-namespace-comments) @@ -45,6 +46,7 @@ namespace hwy { namespace { HWY_EXPORT(GetTarget); HWY_EXPORT(GetVectorBytes); +HWY_EXPORT(GetHaveInteger64); HWY_EXPORT(GetHaveFloat16); HWY_EXPORT(GetHaveFloat64); } // namespace @@ -57,6 +59,10 @@ HWY_DLLEXPORT size_t VectorBytes() { return HWY_DYNAMIC_DISPATCH(GetVectorBytes)(); } +HWY_DLLEXPORT bool HaveInteger64() { + return HWY_DYNAMIC_DISPATCH(GetHaveInteger64)(); +} + HWY_DLLEXPORT bool HaveFloat16() { return HWY_DYNAMIC_DISPATCH(GetHaveFloat16)(); } diff --git a/hwy/per_target.h b/hwy/per_target.h index c600da14b7..7a86b0ebe6 100644 --- a/hwy/per_target.h +++ b/hwy/per_target.h @@ -39,7 +39,8 @@ HWY_DLLEXPORT int64_t DispatchedTarget(); // unnecessarily. HWY_DLLEXPORT size_t VectorBytes(); -// Returns whether 16/64-bit floats are a supported lane type. +// Returns whether 64-bit integers, 16/64-bit floats are a supported lane type. +HWY_DLLEXPORT bool HaveInteger64(); HWY_DLLEXPORT bool HaveFloat16(); HWY_DLLEXPORT bool HaveFloat64(); diff --git a/hwy/profiler.h b/hwy/profiler.h index 57ddbe623f..467ac0c4bb 100644 --- a/hwy/profiler.h +++ b/hwy/profiler.h @@ -60,7 +60,6 @@ #include "hwy/aligned_allocator.h" #include "hwy/cache_control.h" // FlushStream // #include "hwy/contrib/sort/vqsort.h" -#include "hwy/highway.h" // Stream #include "hwy/robust_statistics.h" #include "hwy/timer-inl.h" #include "hwy/timer.h" @@ -80,17 +79,6 @@ static constexpr size_t kMaxDepth = 64; // Maximum nesting of zones. static constexpr size_t kMaxZones = 256; // Total number of zones. -// Overwrites "to" without loading it into the cache (read-for-ownership). -// Both pointers must be aligned. -HWY_ATTR static void StreamCacheLine(const uint64_t* HWY_RESTRICT from, - uint64_t* HWY_RESTRICT to) { - namespace hn = HWY_NAMESPACE; - const hn::ScalableTag d; - for (size_t i = 0; i < HWY_ALIGNMENT / sizeof(uint64_t); i += Lanes(d)) { - hn::Stream(hn::Load(d, from + i), d, to + i); - } -} - #pragma pack(push, 1) // Represents zone entry/exit events. Stores a full-resolution timestamp plus @@ -109,35 +97,46 @@ class Packet { static constexpr uint64_t kTimestampMask = (1ULL << kTimestampBits) - 1; static Packet Make(const size_t biased_offset, const uint64_t timestamp) { + HWY_DASSERT(biased_offset != 0); HWY_DASSERT(biased_offset < (1ULL << kOffsetBits)); Packet packet; packet.bits_ = (biased_offset << kTimestampBits) + (timestamp & kTimestampMask); + + HWY_DASSERT(packet.BiasedOffset() == biased_offset); + HWY_DASSERT(packet.Timestamp() == (timestamp & kTimestampMask)); return packet; } uint64_t Timestamp() const { return bits_ & kTimestampMask; } - size_t BiasedOffset() const { return (bits_ >> kTimestampBits); } + size_t BiasedOffset() const { + const size_t biased_offset = (bits_ >> kTimestampBits); + HWY_DASSERT(biased_offset != 0); + HWY_DASSERT(biased_offset < (1ULL << kOffsetBits)); + return biased_offset; + } private: uint64_t bits_; }; static_assert(sizeof(Packet) == 8, "Wrong Packet size"); -// Returns the address of a string literal. Assuming zone names are also -// literals and stored nearby, we can represent them as offsets, which are -// faster to compute than hashes or even a static index. -// -// This function must not be static - each call (even from other translation -// units) must return the same value. -inline const char* StringOrigin() { - // Chosen such that no zone name is a prefix nor suffix of this string - // to ensure they aren't merged (offset 0 identifies zone-exit packets). - static const char* string_origin = "__#__"; - return string_origin - Packet::kOffsetBias; -} +// All translation units must use the same string origin. A static member +// function ensures this without requiring a separate .cc file. +struct StringOrigin { + // Returns the address of a string literal. Assuming zone names are also + // literals and stored nearby, we can represent them as offsets from this, + // which is faster to compute than hashes or even a static index. + static const char* Get() { + // Chosen such that no zone name is a prefix nor suffix of this string + // to ensure they aren't merged. Note zone exit packets use + // `biased_offset == kOffsetBias`. + static const char* string_origin = "__#__"; + return string_origin - Packet::kOffsetBias; + } +}; // Representation of an active zone, stored in a stack. Used to deduct // child duration from the parent's self time. POD. @@ -151,18 +150,36 @@ static_assert(sizeof(Node) == 16, "Wrong Node size"); struct Accumulator { static constexpr size_t kNumCallBits = 64 - Packet::kOffsetBits; - uint64_t BiasedOffset() const { return u128.lo >> kNumCallBits; } + uint64_t BiasedOffset() const { + const size_t biased_offset = u128.lo >> kNumCallBits; + HWY_DASSERT(biased_offset != 0); + HWY_DASSERT(biased_offset < (1ULL << Packet::kOffsetBits)); + return biased_offset; + } uint64_t NumCalls() const { return u128.lo & ((1ULL << kNumCallBits) - 1); } uint64_t Duration() const { return u128.hi; } void Set(uint64_t biased_offset, uint64_t num_calls, uint64_t duration) { + HWY_DASSERT(biased_offset != 0); + HWY_DASSERT(biased_offset < (1ULL << Packet::kOffsetBits)); + HWY_DASSERT(num_calls < (1ULL << kNumCallBits)); + u128.hi = duration; u128.lo = (biased_offset << kNumCallBits) + num_calls; + + HWY_DASSERT(BiasedOffset() == biased_offset); + HWY_DASSERT(NumCalls() == num_calls); + HWY_DASSERT(Duration() == duration); } void Add(uint64_t num_calls, uint64_t duration) { + const uint64_t biased_offset = BiasedOffset(); + (void)biased_offset; + u128.lo += num_calls; u128.hi += duration; + + HWY_DASSERT(biased_offset == BiasedOffset()); } // For fast sorting by duration, which must therefore be the hi element. @@ -182,7 +199,10 @@ inline T ClampedSubtract(const T minuend, const T subtrahend) { // Per-thread call graph (stack) and Accumulator for each zone. class Results { public: - Results() { ZeroBytes(zones_, sizeof(zones_)); } + Results() { + ZeroBytes(nodes_, sizeof(nodes_)); + ZeroBytes(zones_, sizeof(zones_)); + } // Used for computing overhead when this thread encounters its first Zone. // This has no observable effect apart from increasing "analyze_elapsed_". @@ -191,7 +211,7 @@ class Results { HWY_DASSERT(num_zones_ == 0); AnalyzePackets(packets, 2); const uint64_t duration = zones_[0].Duration(); - zones_[0].Set(0, 0, 0); + zones_[0].Set(1, 0, 0); // avoids triggering biased_offset = 0 checks HWY_DASSERT(depth_ == 0); num_zones_ = 0; return duration; @@ -217,6 +237,7 @@ class Results { if (p.BiasedOffset() != Packet::kOffsetBias) { HWY_DASSERT(depth_ < kMaxDepth); nodes_[depth_].packet = p; + HWY_DASSERT(p.BiasedOffset() != 0); nodes_[depth_].child_total = 0; ++depth_; continue; @@ -245,7 +266,7 @@ class Results { // Incorporates results from another thread. Call after all threads have // exited any zones. - void Assimilate(const Results& other) { + void Assimilate(Results& other) { namespace hn = HWY_NAMESPACE; const uint64_t t0 = hn::timer::Start(); HWY_DASSERT(depth_ == 0); @@ -255,6 +276,7 @@ class Results { const Accumulator& zone = other.zones_[i]; UpdateOrAdd(zone.BiasedOffset(), zone.NumCalls(), zone.Duration()); } + other.num_zones_ = 0; const uint64_t t1 = hn::timer::Stop(); analyze_elapsed_ += t1 - t0 + other.analyze_elapsed_; } @@ -268,20 +290,22 @@ class Results { // Sort by decreasing total (self) cost. // VQSort(&zones_[0].u128, num_zones_, SortDescending()); std::sort(zones_, zones_ + num_zones_, - [](const Accumulator& r1, const Accumulator& r2) { - return r1.Duration() > r2.Duration(); + [](const Accumulator& z1, const Accumulator& z2) { + return z1.Duration() > z2.Duration(); }); const double inv_freq = 1.0 / platform::InvariantTicksPerSecond(); - const char* string_origin = StringOrigin(); + const char* string_origin = StringOrigin::Get(); for (size_t i = 0; i < num_zones_; ++i) { - const Accumulator& r = zones_[i]; - const uint64_t num_calls = r.NumCalls(); - printf("%-40s: %10zu x %15zu = %9.6f\n", string_origin + r.BiasedOffset(), - num_calls, r.Duration() / num_calls, - static_cast(r.Duration()) * inv_freq); + const Accumulator& z = zones_[i]; + const size_t num_calls = z.NumCalls(); + const double duration = static_cast(z.Duration()); + printf("%-40s: %10zu x %15.0f = %9.6f\n", + string_origin + z.BiasedOffset(), num_calls, duration / num_calls, + duration * inv_freq); } + num_zones_ = 0; const uint64_t t1 = hn::timer::Stop(); analyze_elapsed_ += t1 - t0; @@ -293,16 +317,15 @@ class Results { // Updates an existing Accumulator (uniquely identified by biased_offset) or // adds one if this is the first time this thread analyzed that zone. // Uses a self-organizing list data structure, which avoids dynamic memory - // allocations and is far faster than unordered_map. Loads, updates and - // stores the entire Accumulator with vector instructions. + // allocations and is far faster than unordered_map. void UpdateOrAdd(const size_t biased_offset, const uint64_t num_calls, const uint64_t duration) { + HWY_DASSERT(biased_offset != 0); HWY_DASSERT(biased_offset < (1ULL << Packet::kOffsetBits)); // Special case for first zone: (maybe) update, without swapping. - if (zones_[0].BiasedOffset() == biased_offset) { + if (num_zones_ != 0 && zones_[0].BiasedOffset() == biased_offset) { zones_[0].Add(num_calls, duration); - HWY_DASSERT(zones_[0].BiasedOffset() == biased_offset); return; } @@ -310,7 +333,6 @@ class Results { for (size_t i = 1; i < num_zones_; ++i) { if (zones_[i].BiasedOffset() == biased_offset) { zones_[i].Add(num_calls, duration); - HWY_DASSERT(zones_[i].BiasedOffset() == biased_offset); // Swap with predecessor (more conservative than move to front, // but at least as successful). const Accumulator prev = zones_[i - 1]; @@ -322,9 +344,7 @@ class Results { // Not found; create a new Accumulator. HWY_DASSERT(num_zones_ < kMaxZones); - Accumulator* HWY_RESTRICT zone = zones_ + num_zones_; - zone->Set(biased_offset, num_calls, duration); - HWY_DASSERT(zone->BiasedOffset() == biased_offset); + zones_[num_zones_].Set(biased_offset, num_calls, duration); ++num_zones_; } @@ -332,7 +352,7 @@ class Results { // __func__ and GCC doesn't merge them. An N^2 search for duplicates is // acceptable because we only expect a few dozen zones. void MergeDuplicates() { - const char* string_origin = StringOrigin(); + const char* string_origin = StringOrigin::Get(); for (size_t i = 0; i < num_zones_; ++i) { const size_t biased_offset = zones_[i].BiasedOffset(); const char* name = string_origin + biased_offset; @@ -344,15 +364,15 @@ class Results { if (!strcmp(name, string_origin + zones_[j].BiasedOffset())) { num_calls += zones_[j].NumCalls(); zones_[i].Add(0, zones_[j].Duration()); - // Fill hole with last item. + // j was the last zone, so we are done. + if (j == num_zones_ - 1) break; + // Replace current zone with the last one, and check it next. zones_[j] = zones_[--num_zones_]; } else { // Name differed, try next Accumulator. ++j; } } - HWY_DASSERT(num_calls < (1ULL << Accumulator::kNumCallBits)); - // Re-pack regardless of whether any duplicates were found. zones_[i].Set(biased_offset, num_calls, zones_[i].Duration()); } @@ -379,20 +399,21 @@ class ThreadSpecific { : max_packets_((PROFILER_THREAD_STORAGE << 20) / sizeof(Packet)), packets_(AllocateAligned(max_packets_)), num_packets_(0), - string_origin_(StringOrigin()) { + string_origin_(StringOrigin::Get()) { // Even in optimized builds, verify that this zone's name offset fits // within the allotted space. If not, UpdateOrAdd is likely to overrun // zones_[]. Checking here on the cold path (only reached once per thread) // is cheap, but it only covers one zone. const size_t biased_offset = name - string_origin_; - HWY_ASSERT(biased_offset <= (1ULL << Packet::kOffsetBits)); + HWY_ASSERT(biased_offset < (1ULL << Packet::kOffsetBits)); } // Depends on Zone => defined below. void ComputeOverhead(); void WriteEntry(const char* name, const uint64_t timestamp) { - const size_t biased_offset = name - string_origin_; + HWY_DASSERT(name >= string_origin_); + const size_t biased_offset = static_cast(name - string_origin_); Write(Packet::Make(biased_offset, timestamp)); } @@ -421,6 +442,19 @@ class ThreadSpecific { Results& GetResults() { return results_; } private: + // Overwrites "to" while attempting to bypass the cache (read-for-ownership). + // Both pointers must be aligned. + static void StreamCacheLine(const uint64_t* HWY_RESTRICT from, + uint64_t* HWY_RESTRICT to) { +#if HWY_COMPILER_CLANG + for (size_t i = 0; i < HWY_ALIGNMENT / sizeof(uint64_t); ++i) { + __builtin_nontemporal_store(from[i], to + i); + } +#else + hwy::CopyBytes(from, to, HWY_ALIGNMENT); +#endif + } + // Write packet to buffer/storage, emptying them as needed. void Write(const Packet packet) { // Buffer full => copy to storage. @@ -452,7 +486,7 @@ class ThreadSpecific { AlignedFreeUniquePtr packets_; size_t num_packets_; // Cached here because we already read this cache line on zone entry/exit. - const char* HWY_RESTRICT string_origin_; + const char* string_origin_; Results results_; }; @@ -497,7 +531,7 @@ class ThreadList { // responsible for initializing ThreadSpecific. class Zone { public: - // "name" must be a string literal (see StringOrigin). + // "name" must be a string literal (see StringOrigin::Get). HWY_NOINLINE explicit Zone(const char* name) { HWY_FENCE; ThreadSpecific* HWY_RESTRICT thread_specific = StaticThreadSpecific(); @@ -592,7 +626,7 @@ inline void ThreadSpecific::ComputeOverhead() { robust_statistics::CountingSort(samples, kNumSamples); self_overhead = samples[kNumSamples / 2]; if (PROFILER_PRINT_OVERHEAD) { - printf("Overhead: %zu\n", self_overhead); + printf("Overhead: %.0f\n", static_cast(self_overhead)); } results_.SetSelfOverhead(self_overhead); } @@ -628,7 +662,7 @@ inline void ThreadSpecific::ComputeOverhead() { robust_statistics::CountingSort(samples, kNumSamples); const uint64_t child_overhead = samples[9 * kNumSamples / 10]; if (PROFILER_PRINT_OVERHEAD) { - printf("Child overhead: %zu\n", child_overhead); + printf("Child overhead: %.0f\n", static_cast(child_overhead)); } results_.SetChildOverhead(child_overhead); } diff --git a/hwy/stats.cc b/hwy/stats.cc index a00e3797ac..4c53124b5b 100644 --- a/hwy/stats.cc +++ b/hwy/stats.cc @@ -31,7 +31,7 @@ void Stats::Assimilate(const Stats& other) { min_ = std::min(min_, other.min_); max_ = std::max(max_, other.max_); - product_ *= other.product_; + sum_log_ += other.sum_log_; const double product_n = n_ * other.n_; const double n2 = n_ * n_; @@ -82,10 +82,10 @@ std::string Stats::ToString(int exclude) const { if ((exclude & kNoMeanSD) == 0) { const float sd = StandardDeviation(); if (sd > 100) { - ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%8.2E SD=%7.1E ", + ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%8.2e SD=%7.1e ", Mean(), sd); } else { - ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%8.6f SD=%7.5f ", + ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%8.6e SD=%7.5e ", Mean(), sd); } HWY_ASSERT(ret > 0); diff --git a/hwy/stats.h b/hwy/stats.h index 8506c217e6..207ad2bf8e 100644 --- a/hwy/stats.h +++ b/hwy/stats.h @@ -79,7 +79,8 @@ class Stats { min_ = HWY_MIN(min_, x); max_ = HWY_MAX(max_, x); - product_ *= x; + // Logarithmic transform avoids/delays underflow and overflow. + sum_log_ += std::log(static_cast(x)); // Online moments. Reference: https://goo.gl/9ha694 const double d = x - m1_; @@ -100,7 +101,7 @@ class Stats { float Max() const { return max_; } double GeometricMean() const { - return n_ == 0 ? 0.0 : pow(product_, 1.0 / n_); + return n_ == 0 ? 0.0 : std::exp(sum_log_ / n_); } double Mean() const { return m1_; } @@ -165,7 +166,7 @@ class Stats { min_ = hwy::HighestValue(); max_ = hwy::LowestValue(); - product_ = 1.0; + sum_log_ = 0.0; m1_ = 0.0; m2_ = 0.0; @@ -179,7 +180,7 @@ class Stats { float min_; float max_; - double product_; // for geomean + double sum_log_; // for geomean // Moments double m1_; diff --git a/hwy/targets_test.cc b/hwy/targets_test.cc index da295ab52e..c0cb1f85f9 100644 --- a/hwy/targets_test.cc +++ b/hwy/targets_test.cc @@ -21,12 +21,14 @@ #include "hwy/tests/hwy_gtest.h" #include "hwy/tests/test_util-inl.h" +// Simulate another project having its own namespace. namespace fake { +namespace { #define DECLARE_FUNCTION(TGT) \ namespace N_##TGT { \ /* Function argument is just to ensure/demonstrate they are possible. */ \ - int64_t FakeFunction(int) { return HWY_##TGT; } \ + HWY_MAYBE_UNUSED int64_t FakeFunction(int) { return HWY_##TGT; } \ template \ int64_t FakeFunctionT(T) { \ return HWY_##TGT; \ @@ -141,9 +143,11 @@ void CheckFakeFunction() { #endif } +} // namespace } // namespace fake namespace hwy { +namespace { #if !HWY_TEST_STANDALONE class HwyTargetsTest : public testing::Test {}; @@ -179,6 +183,7 @@ TEST(HwyTargetsTest, DisabledTargetsTest) { DisableTargets(0); // Reset the mask. } +} // namespace } // namespace hwy HWY_TEST_MAIN(); diff --git a/hwy/tests/arithmetic_test.cc b/hwy/tests/arithmetic_test.cc index fb63527133..96d72169fb 100644 --- a/hwy/tests/arithmetic_test.cc +++ b/hwy/tests/arithmetic_test.cc @@ -26,13 +26,14 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestPlusMinus { template HWY_NOINLINE void operator()(T /*unused*/, D d) { - const auto v2 = Iota(d, 2); - const auto v3 = Iota(d, 3); - const auto v4 = Iota(d, 4); + const auto v2 = Iota(d, hwy::Unpredictable1() + 1); + const auto v3 = Iota(d, hwy::Unpredictable1() + 2); + const auto v4 = Iota(d, hwy::Unpredictable1() + 3); const size_t N = Lanes(d); auto lanes = AllocateAligned(N); @@ -305,14 +306,15 @@ HWY_NOINLINE void TestAllIntegerAbsDiff() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyArithmeticTest); HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllPlusMinus); HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAddSub); @@ -321,6 +323,7 @@ HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAbs); HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllNeg); HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllIntegerAbsDiff); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/bit_permute_test.cc b/hwy/tests/bit_permute_test.cc index 9739b134e5..68fa9c6957 100644 --- a/hwy/tests/bit_permute_test.cc +++ b/hwy/tests/bit_permute_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestBitShuffle { template @@ -79,17 +80,19 @@ HWY_NOINLINE void TestAllBitShuffle() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyBitPermuteTest); HWY_EXPORT_AND_TEST_P(HwyBitPermuteTest, TestAllBitShuffle); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/blockwise_combine_test.cc b/hwy/tests/blockwise_combine_test.cc new file mode 100644 index 0000000000..7d7f42f488 --- /dev/null +++ b/hwy/tests/blockwise_combine_test.cc @@ -0,0 +1,152 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/blockwise_combine_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { +namespace { + +// Scalar does not define CombineShiftRightBytes. +#if HWY_TARGET != HWY_SCALAR || HWY_IDE + +template +struct TestCombineShiftRightBytes { + template + HWY_NOINLINE void operator()(T, D d) { + constexpr size_t kBlockSize = 16; + static_assert(kBytes < kBlockSize, "Shift count is per block"); + const Repartition d8; + const size_t N8 = Lanes(d8); + if (N8 < 16) return; + auto hi_bytes = AllocateAligned(N8); + auto lo_bytes = AllocateAligned(N8); + auto expected_bytes = AllocateAligned(N8); + HWY_ASSERT(hi_bytes && lo_bytes && expected_bytes); + uint8_t combined[2 * kBlockSize]; + + // Random inputs in each lane + RandomState rng; + for (size_t rep = 0; rep < AdjustedReps(100); ++rep) { + for (size_t i = 0; i < N8; ++i) { + hi_bytes[i] = static_cast(Random64(&rng) & 0xFF); + lo_bytes[i] = static_cast(Random64(&rng) & 0xFF); + } + for (size_t i = 0; i < N8; i += kBlockSize) { + // Arguments are not the same size. + CopyBytes(&lo_bytes[i], combined); + CopyBytes(&hi_bytes[i], combined + kBlockSize); + CopyBytes(combined + kBytes, &expected_bytes[i]); + } + + const auto hi = BitCast(d, Load(d8, hi_bytes.get())); + const auto lo = BitCast(d, Load(d8, lo_bytes.get())); + const auto expected = BitCast(d, Load(d8, expected_bytes.get())); + HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightBytes(d, hi, lo)); + } + } +}; + +template +struct TestCombineShiftRightLanes { + template + HWY_NOINLINE void operator()(T, D d) { + const Repartition d8; + const size_t N8 = Lanes(d8); + if (N8 < 16) return; + + auto hi_bytes = AllocateAligned(N8); + auto lo_bytes = AllocateAligned(N8); + auto expected_bytes = AllocateAligned(N8); + HWY_ASSERT(hi_bytes && lo_bytes && expected_bytes); + constexpr size_t kBlockSize = 16; + uint8_t combined[2 * kBlockSize]; + + // Random inputs in each lane + RandomState rng; + for (size_t rep = 0; rep < AdjustedReps(100); ++rep) { + for (size_t i = 0; i < N8; ++i) { + hi_bytes[i] = static_cast(Random64(&rng) & 0xFF); + lo_bytes[i] = static_cast(Random64(&rng) & 0xFF); + } + for (size_t i = 0; i < N8; i += kBlockSize) { + // Arguments are not the same size. + CopyBytes(&lo_bytes[i], combined); + CopyBytes(&hi_bytes[i], combined + kBlockSize); + CopyBytes(combined + kLanes * sizeof(T), + &expected_bytes[i]); + } + + const auto hi = BitCast(d, Load(d8, hi_bytes.get())); + const auto lo = BitCast(d, Load(d8, lo_bytes.get())); + const auto expected = BitCast(d, Load(d8, expected_bytes.get())); + HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightLanes(d, hi, lo)); + } + } +}; + +#endif // #if HWY_TARGET != HWY_SCALAR + +struct TestCombineShiftRight { + template + HWY_NOINLINE void operator()(T t, D d) { +// Scalar does not define CombineShiftRightBytes. +#if HWY_TARGET != HWY_SCALAR || HWY_IDE + constexpr int kMaxBytes = + HWY_MIN(16, static_cast(MaxLanes(d) * sizeof(T))); + constexpr int kMaxLanes = kMaxBytes / static_cast(sizeof(T)); + TestCombineShiftRightBytes()(t, d); + TestCombineShiftRightBytes()(t, d); + TestCombineShiftRightBytes<1>()(t, d); + + TestCombineShiftRightLanes()(t, d); + TestCombineShiftRightLanes()(t, d); + TestCombineShiftRightLanes<1>()(t, d); +#else + (void)t; + (void)d; +#endif + } +}; + +HWY_NOINLINE void TestAllCombineShiftRight() { + // Need at least 2 lanes. + ForAllTypes(ForShrinkableVectors()); +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace hwy { +namespace { +HWY_BEFORE_TEST(HwyBlockwiseCombineTest); +HWY_EXPORT_AND_TEST_P(HwyBlockwiseCombineTest, TestAllCombineShiftRight); +HWY_AFTER_TEST(); +} // namespace +} // namespace hwy +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/blockwise_shift_test.cc b/hwy/tests/blockwise_shift_test.cc index f432b8e2e5..d9bf6fbc4e 100644 --- a/hwy/tests/blockwise_shift_test.cc +++ b/hwy/tests/blockwise_shift_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestShiftBytes { template @@ -149,127 +150,21 @@ HWY_NOINLINE void TestAllShiftRightLanes() { ForAllTypes(ForPartialVectors()); } -// Scalar does not define CombineShiftRightBytes. -#if HWY_TARGET != HWY_SCALAR || HWY_IDE - -template -struct TestCombineShiftRightBytes { - template - HWY_NOINLINE void operator()(T, D d) { - constexpr size_t kBlockSize = 16; - static_assert(kBytes < kBlockSize, "Shift count is per block"); - const Repartition d8; - const size_t N8 = Lanes(d8); - if (N8 < 16) return; - auto hi_bytes = AllocateAligned(N8); - auto lo_bytes = AllocateAligned(N8); - auto expected_bytes = AllocateAligned(N8); - HWY_ASSERT(hi_bytes && lo_bytes && expected_bytes); - uint8_t combined[2 * kBlockSize]; - - // Random inputs in each lane - RandomState rng; - for (size_t rep = 0; rep < AdjustedReps(100); ++rep) { - for (size_t i = 0; i < N8; ++i) { - hi_bytes[i] = static_cast(Random64(&rng) & 0xFF); - lo_bytes[i] = static_cast(Random64(&rng) & 0xFF); - } - for (size_t i = 0; i < N8; i += kBlockSize) { - // Arguments are not the same size. - CopyBytes(&lo_bytes[i], combined); - CopyBytes(&hi_bytes[i], combined + kBlockSize); - CopyBytes(combined + kBytes, &expected_bytes[i]); - } - - const auto hi = BitCast(d, Load(d8, hi_bytes.get())); - const auto lo = BitCast(d, Load(d8, lo_bytes.get())); - const auto expected = BitCast(d, Load(d8, expected_bytes.get())); - HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightBytes(d, hi, lo)); - } - } -}; - -template -struct TestCombineShiftRightLanes { - template - HWY_NOINLINE void operator()(T, D d) { - const Repartition d8; - const size_t N8 = Lanes(d8); - if (N8 < 16) return; - - auto hi_bytes = AllocateAligned(N8); - auto lo_bytes = AllocateAligned(N8); - auto expected_bytes = AllocateAligned(N8); - HWY_ASSERT(hi_bytes && lo_bytes && expected_bytes); - constexpr size_t kBlockSize = 16; - uint8_t combined[2 * kBlockSize]; - - // Random inputs in each lane - RandomState rng; - for (size_t rep = 0; rep < AdjustedReps(100); ++rep) { - for (size_t i = 0; i < N8; ++i) { - hi_bytes[i] = static_cast(Random64(&rng) & 0xFF); - lo_bytes[i] = static_cast(Random64(&rng) & 0xFF); - } - for (size_t i = 0; i < N8; i += kBlockSize) { - // Arguments are not the same size. - CopyBytes(&lo_bytes[i], combined); - CopyBytes(&hi_bytes[i], combined + kBlockSize); - CopyBytes(combined + kLanes * sizeof(T), - &expected_bytes[i]); - } - - const auto hi = BitCast(d, Load(d8, hi_bytes.get())); - const auto lo = BitCast(d, Load(d8, lo_bytes.get())); - const auto expected = BitCast(d, Load(d8, expected_bytes.get())); - HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightLanes(d, hi, lo)); - } - } -}; - -#endif // #if HWY_TARGET != HWY_SCALAR - -struct TestCombineShiftRight { - template - HWY_NOINLINE void operator()(T t, D d) { -// Scalar does not define CombineShiftRightBytes. -#if HWY_TARGET != HWY_SCALAR || HWY_IDE - constexpr int kMaxBytes = - HWY_MIN(16, static_cast(MaxLanes(d) * sizeof(T))); - constexpr int kMaxLanes = kMaxBytes / static_cast(sizeof(T)); - TestCombineShiftRightBytes()(t, d); - TestCombineShiftRightBytes()(t, d); - TestCombineShiftRightBytes<1>()(t, d); - - TestCombineShiftRightLanes()(t, d); - TestCombineShiftRightLanes()(t, d); - TestCombineShiftRightLanes<1>()(t, d); -#else - (void)t; - (void)d; -#endif - } -}; - -HWY_NOINLINE void TestAllCombineShiftRight() { - // Need at least 2 lanes. - ForAllTypes(ForShrinkableVectors()); -} - +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyBlockwiseShiftTest); HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllShiftBytes); HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllShiftLeftLanes); HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllShiftRightLanes); -HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllCombineShiftRight); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/blockwise_test.cc b/hwy/tests/blockwise_test.cc index 713d3c6151..3affd19dad 100644 --- a/hwy/tests/blockwise_test.cc +++ b/hwy/tests/blockwise_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template struct TestBroadcastR { @@ -494,14 +495,15 @@ HWY_NOINLINE void TestAllSpecialShuffles() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyBlockwiseTest); HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllBroadcast); HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllTableLookupBytesSame); @@ -513,6 +515,7 @@ HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllZipUpper); #endif HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllSpecialShuffles); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/cast_test.cc b/hwy/tests/cast_test.cc index e0ef5ed4da..8d03e34197 100644 --- a/hwy/tests/cast_test.cc +++ b/hwy/tests/cast_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template size_t DeduceN(Simd) { @@ -327,19 +328,21 @@ HWY_NOINLINE void TestAllSameSizeResizeBitCast() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyCastTest); HWY_EXPORT_AND_TEST_P(HwyCastTest, TestAllBitCast); HWY_EXPORT_AND_TEST_P(HwyCastTest, TestAllResizeBitCastToOneLaneVect); HWY_EXPORT_AND_TEST_P(HwyCastTest, TestAllSameSizeResizeBitCast); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/combine_test.cc b/hwy/tests/combine_test.cc index 6daad7b0ad..77f52f5965 100644 --- a/hwy/tests/combine_test.cc +++ b/hwy/tests/combine_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestLowerHalf { template @@ -173,115 +174,6 @@ HWY_NOINLINE void TestAllCombine() { ForAllTypes(ForExtendableVectors()); } -struct TestConcat { - template - HWY_NOINLINE void operator()(T /*unused*/, D d) { - const size_t N = Lanes(d); - if (N == 1) return; - const size_t half_bytes = N * sizeof(T) / 2; - - auto hi = AllocateAligned(N); - auto lo = AllocateAligned(N); - auto expected = AllocateAligned(N); - HWY_ASSERT(hi && lo && expected); - RandomState rng; - for (size_t rep = 0; rep < 10; ++rep) { - for (size_t i = 0; i < N; ++i) { - hi[i] = ConvertScalarTo(Random64(&rng) & 0xFF); - lo[i] = ConvertScalarTo(Random64(&rng) & 0xFF); - } - - { - CopyBytes(&hi[N / 2], &expected[N / 2], half_bytes); - CopyBytes(&lo[0], &expected[0], half_bytes); - const Vec vhi = Load(d, hi.get()); - const Vec vlo = Load(d, lo.get()); - HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatUpperLower(d, vhi, vlo)); - } - - { - CopyBytes(&hi[N / 2], &expected[N / 2], half_bytes); - CopyBytes(&lo[N / 2], &expected[0], half_bytes); - const Vec vhi = Load(d, hi.get()); - const Vec vlo = Load(d, lo.get()); - HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatUpperUpper(d, vhi, vlo)); - } - - { - CopyBytes(&hi[0], &expected[N / 2], half_bytes); - CopyBytes(&lo[N / 2], &expected[0], half_bytes); - const Vec vhi = Load(d, hi.get()); - const Vec vlo = Load(d, lo.get()); - HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatLowerUpper(d, vhi, vlo)); - } - - { - CopyBytes(&hi[0], &expected[N / 2], half_bytes); - CopyBytes(&lo[0], &expected[0], half_bytes); - const Vec vhi = Load(d, hi.get()); - const Vec vlo = Load(d, lo.get()); - HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatLowerLower(d, vhi, vlo)); - } - } - } -}; - -HWY_NOINLINE void TestAllConcat() { - ForAllTypes(ForShrinkableVectors()); -} - -struct TestConcatOddEven { - template - HWY_NOINLINE void operator()(T /*unused*/, D d) { -#if HWY_TARGET != HWY_SCALAR - const size_t N = Lanes(d); - const Vec hi = Iota(d, N); - const Vec lo = Iota(d, 0); - const Vec even = Add(Iota(d, 0), Iota(d, 0)); - const Vec odd = Add(even, Set(d, 1)); - HWY_ASSERT_VEC_EQ(d, odd, ConcatOdd(d, hi, lo)); - HWY_ASSERT_VEC_EQ(d, even, ConcatEven(d, hi, lo)); - - const Vec v_1 = Set(d, ConvertScalarTo(1)); - const Vec v_2 = Set(d, ConvertScalarTo(2)); - const Vec v_3 = Set(d, ConvertScalarTo(3)); - const Vec v_4 = Set(d, ConvertScalarTo(4)); - - const Half dh; - const Vec v_12 = InterleaveLower(v_1, v_2); /* {1, 2, 1, 2, ...} */ - const Vec v_34 = InterleaveLower(v_3, v_4); /* {3, 4, 3, 4, ...} */ - const Vec v_13 = - ConcatLowerLower(d, v_3, v_1); /* {1, 1, ..., 3, 3, ...} */ - const Vec v_24 = - ConcatLowerLower(d, v_4, v_2); /* {2, 2, ..., 4, 4, ...} */ - - const Vec concat_even_1234_result = ConcatEven(d, v_34, v_12); - const Vec concat_odd_1234_result = ConcatOdd(d, v_34, v_12); - - HWY_ASSERT_VEC_EQ(d, v_13, concat_even_1234_result); - HWY_ASSERT_VEC_EQ(d, v_24, concat_odd_1234_result); - HWY_ASSERT_VEC_EQ(dh, LowerHalf(dh, v_3), - UpperHalf(dh, concat_even_1234_result)); - HWY_ASSERT_VEC_EQ(dh, LowerHalf(dh, v_4), - UpperHalf(dh, concat_odd_1234_result)); - - // This test catches inadvertent saturation. - const Vec min = Set(d, LowestValue()); - const Vec max = Set(d, HighestValue()); - HWY_ASSERT_VEC_EQ(d, max, ConcatOdd(d, max, max)); - HWY_ASSERT_VEC_EQ(d, max, ConcatEven(d, max, max)); - HWY_ASSERT_VEC_EQ(d, min, ConcatOdd(d, min, min)); - HWY_ASSERT_VEC_EQ(d, min, ConcatEven(d, min, min)); -#else - (void)d; -#endif // HWY_TARGET != HWY_SCALAR - } -}; - -HWY_NOINLINE void TestAllConcatOddEven() { - ForAllTypes(ForShrinkableVectors()); -} - struct TestInterleaveWholeHalves { template HWY_NOINLINE void operator()(T /*unused*/, D d) { @@ -343,23 +235,23 @@ HWY_NOINLINE void TestAllInterleaveWholeHalves() { ForAllTypes(ForShrinkableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyCombineTest); HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllLowerHalf); HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllUpperHalf); HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllZeroExtendVector); HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllCombine); -HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllConcat); -HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllConcatOddEven); HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllInterleaveWholeHalves); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/tests/compare_test.cc b/hwy/tests/compare_test.cc index a5ede74823..728b58c3dc 100644 --- a/hwy/tests/compare_test.cc +++ b/hwy/tests/compare_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // All types. struct TestEquality { @@ -672,14 +673,15 @@ HWY_NOINLINE void TestAllEq128Upper() { ForGEVectors<128, TestEq128Upper>()(uint64_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyCompareTest); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEquality); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictUnsigned); @@ -694,6 +696,7 @@ HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllLt128Upper); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEq128); HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEq128Upper); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/compress_test.cc b/hwy/tests/compress_test.cc index 723dd929fa..54ed3d794f 100644 --- a/hwy/tests/compress_test.cc +++ b/hwy/tests/compress_test.cc @@ -28,6 +28,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // Regenerate tables used in the implementation, instead of testing. #define HWY_PRINT_TABLES 0 @@ -253,8 +254,6 @@ HWY_NOINLINE void TestAllCompressBlocks() { #endif // !HWY_PRINT_TABLES #if HWY_PRINT_TABLES || HWY_IDE -namespace detail { // for code folding - void PrintCompress8x8Tables() { printf("======================================= 8x8\n"); constexpr size_t N = 8; @@ -783,40 +782,39 @@ void PrintCompressNot64x2Tables() { printf("\n"); } -} // namespace detail - HWY_NOINLINE void PrintTables() { // Only print once. #if HWY_TARGET == HWY_STATIC_TARGET - detail::PrintCompress32x8Tables(); - detail::PrintCompressNot32x8Tables(); - detail::PrintCompress64x4NibbleTables(); - detail::PrintCompressNot64x4NibbleTables(); - detail::PrintCompressNot64x2NibbleTables(); - detail::PrintCompress64x4Tables(); - detail::PrintCompressNot64x4Tables(); - detail::PrintCompress32x4Tables(); - detail::PrintCompressNot32x4Tables(); - detail::PrintCompress64x2Tables(); - detail::PrintCompressNot64x2Tables(); - detail::PrintCompress64x4PairTables(); - detail::PrintCompressNot64x4PairTables(); - detail::PrintCompress16x8Tables(); - detail::PrintCompress8x8Tables(); - detail::PrintCompressNot16x8Tables(); + PrintCompress32x8Tables(); + PrintCompressNot32x8Tables(); + PrintCompress64x4NibbleTables(); + PrintCompressNot64x4NibbleTables(); + PrintCompressNot64x2NibbleTables(); + PrintCompress64x4Tables(); + PrintCompressNot64x4Tables(); + PrintCompress32x4Tables(); + PrintCompressNot32x4Tables(); + PrintCompress64x2Tables(); + PrintCompressNot64x2Tables(); + PrintCompress64x4PairTables(); + PrintCompressNot64x4PairTables(); + PrintCompress16x8Tables(); + PrintCompress8x8Tables(); + PrintCompressNot16x8Tables(); #endif } #endif // HWY_PRINT_TABLES +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyCompressTest); #if HWY_PRINT_TABLES // Only print instead of running tests; this will be visible in the log. @@ -826,6 +824,7 @@ HWY_EXPORT_AND_TEST_P(HwyCompressTest, TestAllCompress); HWY_EXPORT_AND_TEST_P(HwyCompressTest, TestAllCompressBlocks); #endif HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/concat_test.cc b/hwy/tests/concat_test.cc new file mode 100644 index 0000000000..4dd68e4d8d --- /dev/null +++ b/hwy/tests/concat_test.cc @@ -0,0 +1,156 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "hwy/nanobenchmark.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/concat_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { +namespace { + +struct TestConcat { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + if (N == 1) return; + const size_t half_bytes = N * sizeof(T) / 2; + + auto hi = AllocateAligned(N); + auto lo = AllocateAligned(N); + auto expected = AllocateAligned(N); + HWY_ASSERT(hi && lo && expected); + RandomState rng; + for (size_t rep = 0; rep < 10; ++rep) { + for (size_t i = 0; i < N; ++i) { + hi[i] = ConvertScalarTo(Random64(&rng) & 0xFF); + lo[i] = ConvertScalarTo(Random64(&rng) & 0xFF); + } + + { + CopyBytes(&hi[N / 2], &expected[N / 2], half_bytes); + CopyBytes(&lo[0], &expected[0], half_bytes); + const Vec vhi = Load(d, hi.get()); + const Vec vlo = Load(d, lo.get()); + HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatUpperLower(d, vhi, vlo)); + } + + { + CopyBytes(&hi[N / 2], &expected[N / 2], half_bytes); + CopyBytes(&lo[N / 2], &expected[0], half_bytes); + const Vec vhi = Load(d, hi.get()); + const Vec vlo = Load(d, lo.get()); + HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatUpperUpper(d, vhi, vlo)); + } + + { + CopyBytes(&hi[0], &expected[N / 2], half_bytes); + CopyBytes(&lo[N / 2], &expected[0], half_bytes); + const Vec vhi = Load(d, hi.get()); + const Vec vlo = Load(d, lo.get()); + HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatLowerUpper(d, vhi, vlo)); + } + + { + CopyBytes(&hi[0], &expected[N / 2], half_bytes); + CopyBytes(&lo[0], &expected[0], half_bytes); + const Vec vhi = Load(d, hi.get()); + const Vec vlo = Load(d, lo.get()); + HWY_ASSERT_VEC_EQ(d, expected.get(), ConcatLowerLower(d, vhi, vlo)); + } + } + } +}; + +HWY_NOINLINE void TestAllConcat() { + ForAllTypes(ForShrinkableVectors()); +} + +struct TestConcatOddEven { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { +#if HWY_TARGET != HWY_SCALAR + const size_t N = Lanes(d); + const Vec hi = Iota(d, hwy::Unpredictable1() + N - 1); // N, N+1, ... + const Vec lo = Iota(d, hwy::Unpredictable1() - 1); // 0,1,2,3,... + const Vec even = Add(lo, lo); + const Vec odd = Add(even, Set(d, 1)); + HWY_ASSERT_VEC_EQ(d, odd, ConcatOdd(d, hi, lo)); + HWY_ASSERT_VEC_EQ(d, even, ConcatEven(d, hi, lo)); + + const Vec v_1 = Set(d, ConvertScalarTo(1)); + const Vec v_2 = Set(d, ConvertScalarTo(2)); + const Vec v_3 = Set(d, ConvertScalarTo(3)); + const Vec v_4 = Set(d, ConvertScalarTo(4)); + + const Half dh; + const Vec v_12 = InterleaveLower(v_1, v_2); /* {1, 2, 1, 2, ...} */ + const Vec v_34 = InterleaveLower(v_3, v_4); /* {3, 4, 3, 4, ...} */ + const Vec v_13 = + ConcatLowerLower(d, v_3, v_1); /* {1, 1, ..., 3, 3, ...} */ + const Vec v_24 = + ConcatLowerLower(d, v_4, v_2); /* {2, 2, ..., 4, 4, ...} */ + + const Vec concat_even_1234_result = ConcatEven(d, v_34, v_12); + const Vec concat_odd_1234_result = ConcatOdd(d, v_34, v_12); + + HWY_ASSERT_VEC_EQ(d, v_13, concat_even_1234_result); + HWY_ASSERT_VEC_EQ(d, v_24, concat_odd_1234_result); + HWY_ASSERT_VEC_EQ(dh, LowerHalf(dh, v_3), + UpperHalf(dh, concat_even_1234_result)); + HWY_ASSERT_VEC_EQ(dh, LowerHalf(dh, v_4), + UpperHalf(dh, concat_odd_1234_result)); + + // This test catches inadvertent saturation. + const Vec min = Set(d, LowestValue()); + const Vec max = Set(d, HighestValue()); + HWY_ASSERT_VEC_EQ(d, max, ConcatOdd(d, max, max)); + HWY_ASSERT_VEC_EQ(d, max, ConcatEven(d, max, max)); + HWY_ASSERT_VEC_EQ(d, min, ConcatOdd(d, min, min)); + HWY_ASSERT_VEC_EQ(d, min, ConcatEven(d, min, min)); +#else + (void)d; +#endif // HWY_TARGET != HWY_SCALAR + } +}; + +HWY_NOINLINE void TestAllConcatOddEven() { + ForAllTypes(ForShrinkableVectors()); +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace hwy { +namespace { +HWY_BEFORE_TEST(HwyConcatTest); +HWY_EXPORT_AND_TEST_P(HwyConcatTest, TestAllConcat); +HWY_EXPORT_AND_TEST_P(HwyConcatTest, TestAllConcatOddEven); +HWY_AFTER_TEST(); +} // namespace +} // namespace hwy +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/convert_test.cc b/hwy/tests/convert_test.cc index 4feb26e8ef..f9faac6bfb 100644 --- a/hwy/tests/convert_test.cc +++ b/hwy/tests/convert_test.cc @@ -27,6 +27,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template size_t DeduceN(Simd) { @@ -1438,14 +1439,15 @@ HWY_NOINLINE void TestAllNonFiniteF2IPromoteUpperLowerTo() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyConvertTest); HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllRebind); HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllPromoteTo); @@ -1465,6 +1467,7 @@ HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllF2IPromoteTo); HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllF2IPromoteUpperLowerTo); HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllNonFiniteF2IPromoteUpperLowerTo); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/count_test.cc b/hwy/tests/count_test.cc index d871ab33f9..cc2d841122 100644 --- a/hwy/tests/count_test.cc +++ b/hwy/tests/count_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestPopulationCount { template @@ -290,20 +291,22 @@ HWY_NOINLINE void TestAllHighestSetBitIndex() { ForIntegerTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyCountTest); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllPopulationCount); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllLeadingZeroCount); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllTrailingZeroCount); HWY_EXPORT_AND_TEST_P(HwyCountTest, TestAllHighestSetBitIndex); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/crypto_test.cc b/hwy/tests/crypto_test.cc index fbe439de29..b92a432de4 100644 --- a/hwy/tests/crypto_test.cc +++ b/hwy/tests/crypto_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { #define HWY_PRINT_CLMUL_GOLDEN 0 @@ -699,19 +700,21 @@ struct TestCLMul { HWY_NOINLINE void TestAllCLMul() { ForGEVectors<128, TestCLMul>()(uint64_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyCryptoTest); HWY_EXPORT_AND_TEST_P(HwyCryptoTest, TestAllAES); HWY_EXPORT_AND_TEST_P(HwyCryptoTest, TestAllAESInverse); HWY_EXPORT_AND_TEST_P(HwyCryptoTest, TestAllCLMul); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/demote_test.cc b/hwy/tests/demote_test.cc index 5a8e6274d3..1deff1d7c5 100644 --- a/hwy/tests/demote_test.cc +++ b/hwy/tests/demote_test.cc @@ -30,6 +30,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template struct TestDemoteTo { @@ -819,6 +820,7 @@ HWY_NOINLINE void TestAllI32F64() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy @@ -827,8 +829,8 @@ HWY_AFTER_NAMESPACE(); #endif // !HWY_IS_MSAN #if HWY_ONCE - namespace hwy { +namespace { #if !HWY_IS_MSAN HWY_BEFORE_TEST(HwyDemoteTest); HWY_EXPORT_AND_TEST_P(HwyDemoteTest, TestAllDemoteToInt); @@ -841,6 +843,7 @@ HWY_EXPORT_AND_TEST_P(HwyDemoteTest, TestAllOrderedDemote2To); HWY_EXPORT_AND_TEST_P(HwyDemoteTest, TestAllI32F64); HWY_AFTER_TEST(); #endif // !HWY_IS_MSAN +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/div_test.cc b/hwy/tests/div_test.cc index 88cd60c1e8..ca83280e65 100644 --- a/hwy/tests/div_test.cc +++ b/hwy/tests/div_test.cc @@ -26,6 +26,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestIntegerDiv { template > @@ -261,18 +262,20 @@ HWY_NOINLINE void TestAllIntegerMod() { ForIntegerTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyDivTest); HWY_EXPORT_AND_TEST_P(HwyDivTest, TestAllIntegerDiv); HWY_EXPORT_AND_TEST_P(HwyDivTest, TestAllIntegerMod); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/dup128_vec_test.cc b/hwy/tests/dup128_vec_test.cc index d20da75b99..369b00cd8d 100644 --- a/hwy/tests/dup128_vec_test.cc +++ b/hwy/tests/dup128_vec_test.cc @@ -22,6 +22,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestDup128VecFromValues { template @@ -218,17 +219,19 @@ HWY_NOINLINE void TestAllDup128VecFromValues() { ForFloat3264Types(func); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyDup128VecTest); HWY_EXPORT_AND_TEST_P(HwyDup128VecTest, TestAllDup128VecFromValues); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/tests/expand_test.cc b/hwy/tests/expand_test.cc index bfa3e18eab..9955a3a786 100644 --- a/hwy/tests/expand_test.cc +++ b/hwy/tests/expand_test.cc @@ -28,6 +28,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // Regenerate tables used in the implementation, instead of testing. #define HWY_PRINT_TABLES 0 @@ -124,8 +125,6 @@ HWY_NOINLINE void TestAllExpand() { #endif // !HWY_PRINT_TABLES #if HWY_PRINT_TABLES || HWY_IDE -namespace detail { // For code folding. - void PrintExpand8x8Tables() { printf("// %s\n", __FUNCTION__); constexpr size_t N = 8; @@ -256,29 +255,28 @@ void PrintExpand64x4NibbleTables() { printf("\n"); } -} // namespace detail - HWY_NOINLINE void PrintTables() { // Only print once. #if HWY_TARGET == HWY_STATIC_TARGET - detail::PrintExpand32x8NibbleTables(); - detail::PrintExpand64x4NibbleTables(); - detail::PrintExpand16x8LaneTables(); - detail::PrintExpand16x8ByteTables(); - detail::PrintExpand8x8Tables(); + PrintExpand32x8NibbleTables(); + PrintExpand64x4NibbleTables(); + PrintExpand16x8LaneTables(); + PrintExpand16x8ByteTables(); + PrintExpand8x8Tables(); #endif } #endif // HWY_PRINT_TABLES +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyExpandTest); #if HWY_PRINT_TABLES // Only print instead of running tests; this will be visible in the log. @@ -287,6 +285,7 @@ HWY_EXPORT_AND_TEST_P(HwyExpandTest, PrintTables); HWY_EXPORT_AND_TEST_P(HwyExpandTest, TestAllExpand); #endif HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/float_test.cc b/hwy/tests/float_test.cc index e92d7d0e8c..997332cf1b 100644 --- a/hwy/tests/float_test.cc +++ b/hwy/tests/float_test.cc @@ -30,6 +30,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { HWY_NOINLINE void TestAllF16FromF32() { const FixedTag d1; @@ -505,14 +506,15 @@ HWY_NOINLINE void TestAllAbsDiff() { ForFloatTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyFloatTest); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF16FromF32); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF32FromF16); @@ -528,6 +530,7 @@ HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllCeil); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllFloor); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllAbsDiff); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/fma_test.cc b/hwy/tests/fma_test.cc index dcab274ab3..2217270854 100644 --- a/hwy/tests/fma_test.cc +++ b/hwy/tests/fma_test.cc @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #undef HWY_TARGET_INCLUDE @@ -24,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { #ifndef HWY_NATIVE_FMA #error "Bug in set_macros-inl.h, did not set HWY_NATIVE_FMA" @@ -164,19 +166,21 @@ HWY_NOINLINE void TestAllMulAddSub() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyFmaTest); HWY_EXPORT_AND_TEST_P(HwyFmaTest, TestAllMulAdd); HWY_EXPORT_AND_TEST_P(HwyFmaTest, TestAllMulSub); HWY_EXPORT_AND_TEST_P(HwyFmaTest, TestAllMulAddSub); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/foreach_vec_test.cc b/hwy/tests/foreach_vec_test.cc index 9b776f4fd9..945efeffed 100644 --- a/hwy/tests/foreach_vec_test.cc +++ b/hwy/tests/foreach_vec_test.cc @@ -22,6 +22,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct ForeachVectorTestPerLaneSizeState { size_t num_of_lanes_mask; @@ -627,14 +628,15 @@ HWY_NOINLINE void TestAllForPartialFixedOrFullVectors() { #undef HWY_DECLARE_FOREACH_VECTOR_TEST +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyForeachVecTest); HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForMaxPow2); HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForExtendableVectors); @@ -646,6 +648,7 @@ HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForHalfVectors); HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForPartialVectors); HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForPartialFixedOrFullVectors); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/hwy_gtest.h b/hwy/tests/hwy_gtest.h index 96c3ae8143..22d5268e91 100644 --- a/hwy/tests/hwy_gtest.h +++ b/hwy/tests/hwy_gtest.h @@ -31,6 +31,10 @@ #endif // HWY_TEST_STANDALONE #include +#if HWY_TEST_STANDALONE +#include +#include +#endif #include #include @@ -174,18 +178,366 @@ std::string TestParamTargetNameAndT( #else // HWY_TEST_STANDALONE +namespace { + +class GTestFilterPattern { + private: + struct FilterPatternComponent { + bool has_match_any_string_wildcard; + size_t min_num_of_leading_chars_to_match; + const char* subpattern_start; + size_t subpattern_to_match_len; + }; + + public: + GTestFilterPattern() = default; + GTestFilterPattern(const GTestFilterPattern&) = default; + GTestFilterPattern(GTestFilterPattern&&) = default; + GTestFilterPattern& operator=(const GTestFilterPattern&) = default; + GTestFilterPattern& operator=(GTestFilterPattern&&) = default; + GTestFilterPattern(const char* gtest_filter_pattern, + size_t gtest_filter_pattern_len); + + public: + bool Matches(const char* test_name, + size_t remaining_to_match_len) const noexcept; + + private: + std::vector pattern_components_; + size_t min_test_name_len_; +}; + +GTestFilterPattern::GTestFilterPattern(const char* gtest_filter_pattern_str, + size_t gtest_filter_pattern_len) { + size_t min_test_name_len = 0; + const char* const gtest_filter_pattern_str_end = + gtest_filter_pattern_str + gtest_filter_pattern_len; + + for (const char* subpattern_start = gtest_filter_pattern_str; + subpattern_start != gtest_filter_pattern_str_end;) { + size_t min_num_of_leading_chars_to_match = 0; + bool has_match_any_string_wildcard = false; + + // Advance subpattern_start past any '*' or '?' characters + for (char first_non_wildcard_ch; + subpattern_start != gtest_filter_pattern_str_end && + ((first_non_wildcard_ch = (*subpattern_start)) == '*' || + first_non_wildcard_ch == '?'); + ++subpattern_start) { + if (first_non_wildcard_ch == '*') { + has_match_any_string_wildcard = true; + } else { + ++min_num_of_leading_chars_to_match; + } + } + + // If subpattern_start != gtest_filter_pattern_str_end is true, + // subpattern_start points to a non-wildcard character + + const char* subpattern_end; + if ((subpattern_end = subpattern_start) != gtest_filter_pattern_str_end) { + // Find the next '*' character past subpattern_start if there are any + // '*' characters past subpattern_start in the subpattern + while ((++subpattern_end) != gtest_filter_pattern_str_end && + (*subpattern_end) != '*') { + } + + // Decrement subpattern_end while subpattern_end != subpattern_start + 1 + // is true and subpattern_end - 1 points to a '?' character + for (; + subpattern_end != subpattern_start + 1 && subpattern_end[-1] == '?'; + --subpattern_end) { + } + + // subpattern_end - 1 now points to a non-wildcard character + } + + // Add the current filter pattern component to pattern_components_ + const FilterPatternComponent curr_filter_component{ + has_match_any_string_wildcard, min_num_of_leading_chars_to_match, + subpattern_start, + static_cast(subpattern_end - subpattern_start)}; + pattern_components_.push_back(curr_filter_component); + + // Advance to the next subpattern by setting subpattern_start to + // subpattern_end + subpattern_start = subpattern_end; + } + + min_test_name_len_ = min_test_name_len; +} + +bool GTestFilterPattern::Matches(const char* test_name, + size_t remaining_to_match_len) const noexcept { + if (remaining_to_match_len < min_test_name_len_) { + return false; + } + + const size_t num_of_pattern_components = pattern_components_.size(); + for (size_t i = num_of_pattern_components; i != 0; i--) { + const FilterPatternComponent& curr_pattern_component = + pattern_components_[i - 1]; + const size_t subpattern_to_match_len = + curr_pattern_component.subpattern_to_match_len; + const size_t min_num_to_match = + curr_pattern_component.min_num_of_leading_chars_to_match + + subpattern_to_match_len; + + if (remaining_to_match_len < min_num_to_match) { + return false; + } + + if (subpattern_to_match_len != 0) { + const bool is_restartable_subpattern_match = + i != num_of_pattern_components && + pattern_components_[i].has_match_any_string_wildcard; + + const char* subpattern_start = curr_pattern_component.subpattern_start; + + bool restart_match; + do { + restart_match = false; + const size_t test_name_match_substr_offset = + remaining_to_match_len - subpattern_to_match_len; + + bool matches_subpattern = + test_name[test_name_match_substr_offset] == subpattern_start[0] && + test_name[test_name_match_substr_offset + subpattern_to_match_len - + 1] == subpattern_start[subpattern_to_match_len - 1]; + if (matches_subpattern) { + for (size_t i = 1; i != subpattern_to_match_len - 1; i++) { + char c1 = test_name[test_name_match_substr_offset + i]; + char c2 = subpattern_start[i]; + + if (c1 != c2 && c2 != '?') { + matches_subpattern = false; + break; + } + } + } + + if (!matches_subpattern) { + restart_match = is_restartable_subpattern_match && + (--remaining_to_match_len) >= min_num_to_match; + if (!restart_match) { + return false; + } + } + } while (restart_match); + } + + remaining_to_match_len -= min_num_to_match; + } + + return true; +} + +std::vector& PositiveGTestFilterPatterns() { + static std::vector positive_test_filter_patterns; + return positive_test_filter_patterns; +} + +std::vector& NegativeGTestFilterPatterns() { + static std::vector negative_test_filter_patterns; + return negative_test_filter_patterns; +} + +// ShouldOnlyListHighwayTestNames() returns true if the names of the unit tests +// should be outputted without executing the unit tests. +// +// Otherwise, if the unit tests should be executed, +// ShouldOnlyListHighwayTestNames() returns true. +bool& ShouldOnlyListHighwayTestNames() { + static bool should_only_list_test_names = false; + return should_only_list_test_names; +} + +// ParseGTestFilterPatterns parses the filter patterns passed into the +// --gtest_filter= command line argument (or set by the GTEST_FILTER environment +// variable if there is no --gtest_filter= command line argument present) +static void ParseGTestFilterPatterns(const char* gtest_filter_str) { + std::vector* ptr_to_positive_test_filter_patterns = + &PositiveGTestFilterPatterns(); + std::vector* ptr_to_negative_test_filter_patterns = + &NegativeGTestFilterPatterns(); + + std::vector* ptr_to_vector_to_append_pattern_to = + ptr_to_positive_test_filter_patterns; + + char first_filter_pattern_ch; + bool colon_delimiter_encountered = false; + while ((first_filter_pattern_ch = (*gtest_filter_str)) != '\0') { + if (first_filter_pattern_ch == ':') { + colon_delimiter_encountered = true; + ++gtest_filter_str; + continue; + } + + if (first_filter_pattern_ch == '-' && + ptr_to_vector_to_append_pattern_to == + ptr_to_positive_test_filter_patterns) { + ptr_to_vector_to_append_pattern_to = ptr_to_negative_test_filter_patterns; + if (!colon_delimiter_encountered && + ptr_to_positive_test_filter_patterns->empty()) { + ptr_to_positive_test_filter_patterns->emplace_back("*", 1); + } + ++gtest_filter_str; + continue; + } + + size_t filter_pattern_len = 1; + + // Find the next filter pattern delimiter character or null terminator + for (char filter_pattern_end_ch; + (filter_pattern_end_ch = gtest_filter_str[filter_pattern_len]) != + '\0' && + filter_pattern_end_ch != ':' && + (filter_pattern_end_ch != '-' || + ptr_to_vector_to_append_pattern_to == + ptr_to_negative_test_filter_patterns); + ++filter_pattern_len) { + } + + // Add the current filter pattern to *ptr_to_vector_to_append_pattern_to + ptr_to_vector_to_append_pattern_to->emplace_back(gtest_filter_str, + filter_pattern_len); + + // Advance gtest_filter_str by filter_pattern_len chars + gtest_filter_str += filter_pattern_len; + } +} + +// TestNameMatchesGTestFilter(test_name) returns true if any of the following +// are true: +// - test_name matches the filter passed in by the last --gtest_filter= command +// line argument if any --gtest_filter= arguments are present on the command +// line +// - test_name matches the filter set by the GTEST_FILTER environment variable +// if no --gtest_filter= commands were passed into the command line and +// the GTEST_FILTER environment variable is set +// - there were no --gtest_filter= arguments on the command line and the +// GTEST_FILTER environment variable is not set +// +// Otherwise, TestNameMatchesGTestFilter(test_name) returns false +static HWY_INLINE HWY_MAYBE_UNUSED bool TestNameMatchesGTestFilter( + const char* test_name, size_t test_name_len) { + for (const GTestFilterPattern& negative_pattern : + NegativeGTestFilterPatterns()) { + if (negative_pattern.Matches(test_name, test_name_len)) { + return false; + } + } + for (const GTestFilterPattern& positive_pattern : + PositiveGTestFilterPatterns()) { + if (positive_pattern.Matches(test_name, test_name_len)) { + return true; + } + } + return false; +} +static HWY_INLINE HWY_MAYBE_UNUSED bool TestNameMatchesGTestFilter( + const char* test_name) { + return TestNameMatchesGTestFilter(test_name, strlen(test_name)); +} +static HWY_INLINE HWY_MAYBE_UNUSED bool TestNameMatchesGTestFilter( + const std::string& test_name) { + return TestNameMatchesGTestFilter(test_name.data(), test_name.length()); +} + +// InitTestProgramOptions processes the GTEST_FILTER environment variable, the +// --gtest_filter= command line argument, and the --gtest_list_tests command +// line argument +static HWY_MAYBE_UNUSED void InitTestProgramOptions( + const int argc, const char* const* const argv) { + // Suppress warning that is normally emitted by MSVC by the getenv call below + HWY_DIAGNOSTICS(push) +#if HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL + HWY_DIAGNOSTICS_OFF(disable : 4996, ignored "-Wdeprecated-declarations") +#endif + const char* gtest_filter = getenv("GTEST_FILTER"); + HWY_DIAGNOSTICS(pop) + + if (!gtest_filter) { + gtest_filter = "*"; + } + for (int i = 1; i < argc; i++) { + const char* const curr_arg = argv[i]; + if (!curr_arg) { + break; + } + if (curr_arg[0] == '-' && curr_arg[1] == '-' && curr_arg[2] == 'g' && + curr_arg[3] == 't' && curr_arg[4] == 'e' && curr_arg[5] == 's' && + curr_arg[6] == 't' && curr_arg[7] == '_') { /* --gtest_ */ + switch (curr_arg[8]) { + case 'f': + if (curr_arg[9] == 'i' && curr_arg[10] == 'l' && + curr_arg[11] == 't' && curr_arg[12] == 'e' && + curr_arg[13] == 'r' && curr_arg[14] == '=') { + // If the --gtest_filter= command line option is specified, only + // execute the tests that match the specified filter + gtest_filter = curr_arg + 15; + } + break; + case 'l': + if (curr_arg[9] == 'i' && curr_arg[10] == 's' && + curr_arg[11] == 't' && curr_arg[12] == '_' && + curr_arg[13] == 't' && curr_arg[14] == 'e' && + curr_arg[15] == 's' && curr_arg[16] == 't' && + curr_arg[17] == 's' && curr_arg[18] == '\0') { + // If the --gtest_list_tests command line option is specified, + // output the name of the unit tests but do not execute the unit + // tests + ShouldOnlyListHighwayTestNames() = true; + break; + } + default: + break; + } + } + } + + // Initialize PositiveGTestFilterPatterns() and NegativeGTestFilterPatterns() + // from gtest_filter + ParseGTestFilterPatterns(gtest_filter); +} + +} // namespace + // Cannot be a function, otherwise the HWY_EXPORT table defined here will not // be visible to HWY_DYNAMIC_DISPATCH. -#define HWY_EXPORT_AND_TEST_P(suite, func_name) \ - HWY_EXPORT(func_name); \ - hwy::SetSupportedTargetsForTest(0); \ - for (int64_t target : hwy::SupportedAndGeneratedTargets()) { \ - hwy::SetSupportedTargetsForTest(target); \ - fprintf(stderr, "=== %s for %s:\n", #func_name, hwy::TargetName(target)); \ - HWY_DYNAMIC_DISPATCH(func_name)(); \ - } \ - /* Disable the mask after the test. */ \ - hwy::SetSupportedTargetsForTest(0); \ +#define HWY_EXPORT_AND_TEST_P(suite, func_name) \ + full_test_name = #suite; \ + full_test_name += "Group/"; \ + full_test_name += #suite; \ + full_test_name += '.'; \ + full_test_name_suite_prefix_len = full_test_name.length(); \ + full_test_name += #func_name; \ + full_test_name += '/'; \ + full_test_name_prefix_len = full_test_name.length(); \ + HWY_EXPORT(func_name); \ + hwy::SetSupportedTargetsForTest(0); \ + for (int64_t target : hwy::SupportedAndGeneratedTargets()) { \ + hwy::SetSupportedTargetsForTest(target); \ + full_test_name.resize(full_test_name_prefix_len); \ + full_test_name += hwy::TargetName(target); \ + if (hwy::TestNameMatchesGTestFilter(full_test_name)) { \ + if (hwy::ShouldOnlyListHighwayTestNames()) { \ + const char* full_test_name_c_str = full_test_name.c_str(); \ + if (need_to_output_suite_name) { \ + need_to_output_suite_name = false; \ + printf("%sGroup/%s.\n", #suite, #suite); \ + } \ + printf(" %s\n", \ + full_test_name_c_str + full_test_name_suite_prefix_len); \ + } else { \ + fprintf(stderr, "=== %s for %s:\n", #func_name, \ + hwy::TargetName(target)); \ + HWY_DYNAMIC_DISPATCH(func_name)(); \ + } \ + } \ + } \ + /* Disable the mask after the test. */ \ + hwy::SetSupportedTargetsForTest(0); \ static_assert(true, "For requiring trailing semicolon") // HWY_BEFORE_TEST may reside inside a namespace, but HWY_AFTER_TEST will define @@ -206,15 +558,22 @@ struct RegisterRunAll { void RunAll(); \ static hwy::RegisterRunAll HWY_CONCAT(reg_, suite)(&RunAll); \ void RunAll() { \ + std::string full_test_name; \ + size_t full_test_name_suite_prefix_len; \ + size_t full_test_name_prefix_len; \ + bool need_to_output_suite_name = true; \ static_assert(true, "For requiring trailing semicolon") // Must be followed by semicolon, then a closing brace for ONE namespace. -#define HWY_AFTER_TEST() \ - } /* RunAll*/ \ - } /* namespace */ \ - int main(int /*argc*/, char** /*argv*/) { \ - hwy::GetRunAll()(); \ - fprintf(stderr, "Success.\n"); \ +#define HWY_AFTER_TEST() \ + } /* RunAll*/ \ + } /* namespace */ \ + int main(int argc, char** argv) { \ + hwy::InitTestProgramOptions(argc, argv); \ + hwy::GetRunAll()(); \ + if (!hwy::ShouldOnlyListHighwayTestNames()) { \ + fprintf(stderr, "Success.\n"); \ + } \ return 0 // -------------------- Non-SIMD test cases: @@ -222,6 +581,8 @@ struct RegisterRunAll { struct FuncAndName { VoidFunc func; const char* name; + const char* suite_name; + const char* full_name; }; // Singleton of registered tests to be run by HWY_TEST_MAIN @@ -232,27 +593,49 @@ std::vector& GetFuncAndNames() { // For use by TEST; adds to the list. struct RegisterTest { - RegisterTest(VoidFunc func, const char* name) { - hwy::GetFuncAndNames().push_back({func, name}); + RegisterTest(VoidFunc func, const char* name, const char* suite_name, + const char* full_name) { + hwy::GetFuncAndNames().push_back({func, name, suite_name, full_name}); } }; -// Registers a function to be called by `HWY_TEST_MAIN`. `suite` is unused. -#define TEST(suite, func) \ - void func(); \ - static hwy::RegisterTest HWY_CONCAT(reg_, func)({&func, #func}); \ +// Registers a function to be called by `HWY_TEST_MAIN`. +#define TEST(suite, func) \ + void func(); \ + static hwy::RegisterTest HWY_CONCAT( \ + reg_, func)({&func, #func, #suite, #suite "Group/" #suite "." #func}); \ void func() // Expands to a main() that calls all TEST. Must reside at namespace scope. -#define HWY_TEST_MAIN() \ - int main() { \ - for (const auto& func_and_name : hwy::GetFuncAndNames()) { \ - fprintf(stderr, "=== %s:\n", func_and_name.name); \ - func_and_name.func(); \ - } \ - fprintf(stderr, "Success.\n"); \ - return 0; \ - } \ +#define HWY_TEST_MAIN() \ + int main(int argc, char** argv) { \ + hwy::InitTestProgramOptions(argc, argv); \ + const char* suite_name_of_prev_test = nullptr; \ + for (const auto& func_and_name : hwy::GetFuncAndNames()) { \ + if (hwy::TestNameMatchesGTestFilter(func_and_name.full_name)) { \ + if (hwy::ShouldOnlyListHighwayTestNames()) { \ + const char* const suite_name_of_curr_test = \ + func_and_name.suite_name; \ + if (suite_name_of_curr_test != suite_name_of_prev_test && \ + (!suite_name_of_prev_test || \ + strcmp(suite_name_of_prev_test, suite_name_of_curr_test) == \ + 0)) { \ + suite_name_of_prev_test = suite_name_of_curr_test; \ + printf("%sGroup/%s.\n", suite_name_of_curr_test, \ + suite_name_of_curr_test); \ + } \ + printf(" %s\n", func_and_name.name); \ + } else { \ + fprintf(stderr, "=== %s:\n", func_and_name.name); \ + func_and_name.func(); \ + } \ + } \ + } \ + if (!hwy::ShouldOnlyListHighwayTestNames()) { \ + fprintf(stderr, "Success.\n"); \ + } \ + return 0; \ + } \ static_assert(true, "For requiring trailing semicolon") #endif // HWY_TEST_STANDALONE diff --git a/hwy/tests/if_test.cc b/hwy/tests/if_test.cc index ff22078e20..f4e1c90ae9 100644 --- a/hwy/tests/if_test.cc +++ b/hwy/tests/if_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestIfThenElse { template @@ -348,14 +349,15 @@ HWY_NOINLINE void TestAllIfNegativeThenNegOrUndefIfZero() { ForFloatTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyIfTest); HWY_EXPORT_AND_TEST_P(HwyIfTest, TestAllIfThenElse); HWY_EXPORT_AND_TEST_P(HwyIfTest, TestAllIfVecThenElse); @@ -364,6 +366,7 @@ HWY_EXPORT_AND_TEST_P(HwyIfTest, TestAllZeroIfNegative); HWY_EXPORT_AND_TEST_P(HwyIfTest, TestAllIfNegative); HWY_EXPORT_AND_TEST_P(HwyIfTest, TestAllIfNegativeThenNegOrUndefIfZero); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/in_range_float_to_int_conv_test.cc b/hwy/tests/in_range_float_to_int_conv_test.cc index bbde851f84..bd76696471 100644 --- a/hwy/tests/in_range_float_to_int_conv_test.cc +++ b/hwy/tests/in_range_float_to_int_conv_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // HWY_IN_RANGE_F2I_CONV_TEST_CONST_ASSERT(condition, msg) checks that condition // is true using static_assert if constexpr BitCastScalar is available and @@ -620,14 +621,15 @@ HWY_NOINLINE void TestAllPromoteInRangeOddEvenFloatToInt() { #undef HWY_IN_RANGE_F2I_CONV_TEST_CONST_ASSERT +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyInRangeFloatToIntConvTest); HWY_EXPORT_AND_TEST_P(HwyInRangeFloatToIntConvTest, TestAllConvertInRangeFloatToInt); @@ -636,6 +638,7 @@ HWY_EXPORT_AND_TEST_P(HwyInRangeFloatToIntConvTest, HWY_EXPORT_AND_TEST_P(HwyInRangeFloatToIntConvTest, TestAllPromoteInRangeOddEvenFloatToInt); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/interleaved_test.cc b/hwy/tests/interleaved_test.cc index 15c46f7756..1591f94a7f 100644 --- a/hwy/tests/interleaved_test.cc +++ b/hwy/tests/interleaved_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestLoadStoreInterleaved2 { template @@ -168,19 +169,21 @@ HWY_NOINLINE void TestAllLoadStoreInterleaved4() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyInterleavedTest); HWY_EXPORT_AND_TEST_P(HwyInterleavedTest, TestAllLoadStoreInterleaved2); HWY_EXPORT_AND_TEST_P(HwyInterleavedTest, TestAllLoadStoreInterleaved3); HWY_EXPORT_AND_TEST_P(HwyInterleavedTest, TestAllLoadStoreInterleaved4); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/list_targets.cc b/hwy/tests/list_targets.cc index 6aa836d93e..0dbefe1dca 100644 --- a/hwy/tests/list_targets.cc +++ b/hwy/tests/list_targets.cc @@ -16,10 +16,13 @@ // Simple tool to print the list of targets that were compiled in when building // this tool. +#include #include #include "hwy/highway.h" +namespace { + void PrintTargets(const char* msg, int64_t targets) { fprintf(stderr, "%s", msg); // For each bit other than the sign bit: @@ -31,6 +34,8 @@ void PrintTargets(const char* msg, int64_t targets) { fprintf(stderr, "\n"); } +} // namespace + int main() { #ifdef HWY_COMPILE_ONLY_EMU128 const int only_emu128 = 1; diff --git a/hwy/tests/logical_test.cc b/hwy/tests/logical_test.cc index 5676187dde..ecd7589c9e 100644 --- a/hwy/tests/logical_test.cc +++ b/hwy/tests/logical_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestNot { template @@ -145,19 +146,21 @@ HWY_NOINLINE void TestAllTestBit() { ForIntegerTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyLogicalTest); HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllNot); HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllLogical); HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllTestBit); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mask_combine_test.cc b/hwy/tests/mask_combine_test.cc index d7c34cc492..5e92391648 100644 --- a/hwy/tests/mask_combine_test.cc +++ b/hwy/tests/mask_combine_test.cc @@ -22,6 +22,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestLowerAndUpperHalvesOfMask { template @@ -171,18 +172,20 @@ HWY_NOINLINE void TestAllCombineMasks() { ForAllTypes(ForExtendableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskCombineTest); HWY_EXPORT_AND_TEST_P(HwyMaskCombineTest, TestAllLowerAndUpperHalvesOfMask); HWY_EXPORT_AND_TEST_P(HwyMaskCombineTest, TestAllCombineMasks); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mask_convert_test.cc b/hwy/tests/mask_convert_test.cc index 7d160d5d91..ede43f5c53 100644 --- a/hwy/tests/mask_convert_test.cc +++ b/hwy/tests/mask_convert_test.cc @@ -22,6 +22,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template struct TestPromoteMaskTo { @@ -330,19 +331,21 @@ HWY_NOINLINE void TestAllOrderedDemote2MasksTo() { ForUIF163264(ForShrinkableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskConvertTest); HWY_EXPORT_AND_TEST_P(HwyMaskConvertTest, TestAllPromoteMaskTo); HWY_EXPORT_AND_TEST_P(HwyMaskConvertTest, TestAllDemoteMaskTo); HWY_EXPORT_AND_TEST_P(HwyMaskConvertTest, TestAllOrderedDemote2MasksTo); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mask_mem_test.cc b/hwy/tests/mask_mem_test.cc index 5812768215..0fd84e6e9a 100644 --- a/hwy/tests/mask_mem_test.cc +++ b/hwy/tests/mask_mem_test.cc @@ -31,6 +31,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestMaskedLoad { template @@ -378,14 +379,15 @@ HWY_NOINLINE void TestAllStoreMaskBits() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskMemTest); HWY_EXPORT_AND_TEST_P(HwyMaskMemTest, TestAllMaskedLoad); HWY_EXPORT_AND_TEST_P(HwyMaskMemTest, TestAllMaskedScatter); @@ -395,6 +397,7 @@ HWY_EXPORT_AND_TEST_P(HwyMaskMemTest, TestAllGatherIndexN); HWY_EXPORT_AND_TEST_P(HwyMaskMemTest, TestAllBlendedStore); HWY_EXPORT_AND_TEST_P(HwyMaskMemTest, TestAllStoreMaskBits); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mask_slide_test.cc b/hwy/tests/mask_slide_test.cc index 9cf96c842a..415336a8a1 100644 --- a/hwy/tests/mask_slide_test.cc +++ b/hwy/tests/mask_slide_test.cc @@ -22,6 +22,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestSlideMaskDownLanes { template @@ -139,18 +140,20 @@ HWY_NOINLINE void TestAllSlideMaskUpLanes() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskSlideTest); HWY_EXPORT_AND_TEST_P(HwyMaskSlideTest, TestAllSlideMaskDownLanes); HWY_EXPORT_AND_TEST_P(HwyMaskSlideTest, TestAllSlideMaskUpLanes); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mask_test.cc b/hwy/tests/mask_test.cc index 98e884753a..3ad55f5ced 100644 --- a/hwy/tests/mask_test.cc +++ b/hwy/tests/mask_test.cc @@ -26,6 +26,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { // All types. struct TestMaskFalse { @@ -546,14 +547,15 @@ HWY_NOINLINE void TestAllDup128MaskFromMaskBits() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskTest); HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllMaskFalse); HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllFromVec); @@ -570,6 +572,7 @@ HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllSetOnlyFirst); HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllSetAtOrAfterFirst); HWY_EXPORT_AND_TEST_P(HwyMaskTest, TestAllDup128MaskFromMaskBits); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/masked_arithmetic_test.cc b/hwy/tests/masked_arithmetic_test.cc index 4513274e6d..6bc6767a9c 100644 --- a/hwy/tests/masked_arithmetic_test.cc +++ b/hwy/tests/masked_arithmetic_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestAddSubMul { template @@ -378,14 +379,15 @@ HWY_NOINLINE void TestAllFloatExceptions() { ForFloatTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskedArithmeticTest); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllAddSubMul); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllSatAddSub); @@ -393,6 +395,7 @@ HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllDiv); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllIntegerDivMod); HWY_EXPORT_AND_TEST_P(HwyMaskedArithmeticTest, TestAllFloatExceptions); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/masked_minmax_test.cc b/hwy/tests/masked_minmax_test.cc index 9dd612a1ba..0e071b14c1 100644 --- a/hwy/tests/masked_minmax_test.cc +++ b/hwy/tests/masked_minmax_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestUnsignedMinMax { template @@ -136,18 +137,20 @@ HWY_NOINLINE void TestAllSignedMinMax() { ForFloatTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMaskedMinMaxTest); HWY_EXPORT_AND_TEST_P(HwyMaskedMinMaxTest, TestAllUnsignedMinMax); HWY_EXPORT_AND_TEST_P(HwyMaskedMinMaxTest, TestAllSignedMinMax); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/memory_test.cc b/hwy/tests/memory_test.cc index 6fb4cc4f41..8b698f3308 100644 --- a/hwy/tests/memory_test.cc +++ b/hwy/tests/memory_test.cc @@ -33,6 +33,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestLoadStore { template @@ -326,7 +327,6 @@ HWY_NOINLINE void TestAllCache() { Pause(); } -namespace detail { template HWY_INLINE T GenerateOtherValue(size_t val) { const T conv_val = static_cast(val); @@ -347,8 +347,6 @@ HWY_INLINE T GenerateOtherValue(size_t val) { return F16FromF32(GenerateOtherValue(val)); } -} // namespace detail - struct TestLoadN { template HWY_NOINLINE void operator()(T /*unused*/, D d) { @@ -365,7 +363,7 @@ struct TestLoadN { HWY_ASSERT(load_buf && expected); for (size_t i = 0; i < load_buf_len; i++) { - load_buf[i] = detail::GenerateOtherValue<0, T>(i + 1); + load_buf[i] = GenerateOtherValue<0, T>(i + 1); } ZeroBytes(expected.get(), N * sizeof(T)); @@ -401,7 +399,7 @@ struct TestLoadN { HWY_ASSERT_VEC_EQ(d, Load(d, expected.get()), actual_2); } - load_buf[0] = detail::GenerateOtherValue<0, T>(0); + load_buf[0] = GenerateOtherValue<0, T>(0); CopyBytes(load_buf.get(), expected.get(), N * sizeof(T)); HWY_ASSERT_VEC_EQ(d, Load(d, expected.get()), LoadN(d, load_buf.get(), N)); } @@ -428,7 +426,7 @@ struct TestLoadNOr { HWY_ASSERT(load_buf && expected); for (size_t i = 0; i < load_buf_len; i++) { - load_buf[i] = detail::GenerateOtherValue(i + 1); + load_buf[i] = GenerateOtherValue(i + 1); } const Vec no = Set(d, ConvertScalarTo(kNo)); @@ -468,7 +466,7 @@ struct TestLoadNOr { HWY_ASSERT_VEC_EQ(d, Load(d, expected.get()), actual_2); } - load_buf[0] = detail::GenerateOtherValue(kNo); + load_buf[0] = GenerateOtherValue(kNo); CopyBytes(load_buf.get(), expected.get(), N * sizeof(T)); HWY_ASSERT_VEC_EQ(d, Load(d, expected.get()), LoadNOr(no, d, load_buf.get(), N)); @@ -561,14 +559,15 @@ HWY_NOINLINE void TestAllStoreN() { ForAllTypesAndSpecial(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMemoryTest); HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadStore); HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllSafeCopyN); @@ -581,6 +580,7 @@ HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadN); HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadNOr); HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStoreN); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/minmax_test.cc b/hwy/tests/minmax_test.cc index 3ef116d30d..ef2b99609b 100644 --- a/hwy/tests/minmax_test.cc +++ b/hwy/tests/minmax_test.cc @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include + #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/minmax_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep @@ -22,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestUnsignedMinMax { template @@ -257,19 +261,21 @@ HWY_NOINLINE void TestAllMinMax128Upper() { ForGEVectors<128, TestMinMax128Upper>()(uint64_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMinMaxTest); HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax); HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax128); HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax128Upper); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mul_by_pow2_test.cc b/hwy/tests/mul_by_pow2_test.cc index a2432ff63e..3ddaa06b40 100644 --- a/hwy/tests/mul_by_pow2_test.cc +++ b/hwy/tests/mul_by_pow2_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template static void MulByPow2TestCases( @@ -598,18 +599,20 @@ HWY_NOINLINE void TestAllMulByFloorPow2() { ForFloatTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMulByPow2Test); HWY_EXPORT_AND_TEST_P(HwyMulByPow2Test, TestAllMulByPow2); HWY_EXPORT_AND_TEST_P(HwyMulByPow2Test, TestAllMulByFloorPow2); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mul_pairwise_test.cc b/hwy/tests/mul_pairwise_test.cc index e0496d9e1c..c380ad93e8 100644 --- a/hwy/tests/mul_pairwise_test.cc +++ b/hwy/tests/mul_pairwise_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestWidenMulPairwiseAdd { // Must be inlined on aarch64 for bf16, else clang crashes. @@ -340,19 +341,21 @@ HWY_NOINLINE void TestAllSatWidenMulPairwiseAccumulate() { ForShrinkableVectors()(int16_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMulPairwiseTest); HWY_EXPORT_AND_TEST_P(HwyMulPairwiseTest, TestAllWidenMulPairwiseAdd); HWY_EXPORT_AND_TEST_P(HwyMulPairwiseTest, TestAllSatWidenMulPairwiseAdd); HWY_EXPORT_AND_TEST_P(HwyMulPairwiseTest, TestAllSatWidenMulPairwiseAccumulate); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/mul_test.cc b/hwy/tests/mul_test.cc index 06cecec5e7..13307f41c0 100644 --- a/hwy/tests/mul_test.cc +++ b/hwy/tests/mul_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template constexpr uint64_t FirstBits() { @@ -423,14 +424,15 @@ HWY_NOINLINE void TestAllMulOdd() { // uint64_t MulOdd is already tested in TestMulEvenOdd64 } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyMulTest); HWY_EXPORT_AND_TEST_P(HwyMulTest, TestAllMul); HWY_EXPORT_AND_TEST_P(HwyMulTest, TestAllMulHigh); @@ -438,6 +440,7 @@ HWY_EXPORT_AND_TEST_P(HwyMulTest, TestAllMulFixedPoint15); HWY_EXPORT_AND_TEST_P(HwyMulTest, TestAllMulEven); HWY_EXPORT_AND_TEST_P(HwyMulTest, TestAllMulOdd); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/reduction_test.cc b/hwy/tests/reduction_test.cc index 0683bc9830..fffc4a7873 100644 --- a/hwy/tests/reduction_test.cc +++ b/hwy/tests/reduction_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestSumOfLanes { template ()(uint8_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyReductionTest); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumOfLanes); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMinMaxOfLanes); @@ -366,6 +368,7 @@ HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf2); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf4); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf8); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/resize_test.cc b/hwy/tests/resize_test.cc index b8a1b930f7..4601838427 100644 --- a/hwy/tests/resize_test.cc +++ b/hwy/tests/resize_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { #if HWY_TARGET != HWY_SCALAR @@ -168,18 +169,20 @@ HWY_NOINLINE void TestAllExtendingResizeBitCast() { ForAllTypes(ForExtendableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyResizeTest); HWY_EXPORT_AND_TEST_P(HwyResizeTest, TestAllTruncatingResizeBitCast); HWY_EXPORT_AND_TEST_P(HwyResizeTest, TestAllExtendingResizeBitCast); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/tests/reverse_test.cc b/hwy/tests/reverse_test.cc index fa00f45e33..429e45e88f 100644 --- a/hwy/tests/reverse_test.cc +++ b/hwy/tests/reverse_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestReverse { template @@ -284,14 +285,15 @@ HWY_NOINLINE void TestAllReverseBlocks() { ForAllTypes(ForGEVectors<128, TestReverseBlocks>()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyReverseTest); HWY_EXPORT_AND_TEST_P(HwyReverseTest, TestAllReverse); HWY_EXPORT_AND_TEST_P(HwyReverseTest, TestAllReverse2); @@ -301,6 +303,7 @@ HWY_EXPORT_AND_TEST_P(HwyReverseTest, TestAllReverseLaneBytes); HWY_EXPORT_AND_TEST_P(HwyReverseTest, TestAllReverseBits); HWY_EXPORT_AND_TEST_P(HwyReverseTest, TestAllReverseBlocks); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/rotate_test.cc b/hwy/tests/rotate_test.cc index c63d1acdb3..f5cfdf4389 100644 --- a/hwy/tests/rotate_test.cc +++ b/hwy/tests/rotate_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestRotateLeft { template @@ -277,19 +278,21 @@ HWY_NOINLINE void TestAllVariableRotations() { ForIntegerTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyRotateTest); HWY_EXPORT_AND_TEST_P(HwyRotateTest, TestAllRotateLeft); HWY_EXPORT_AND_TEST_P(HwyRotateTest, TestAllRotateRight); HWY_EXPORT_AND_TEST_P(HwyRotateTest, TestAllVariableRotations); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/saturated_test.cc b/hwy/tests/saturated_test.cc index 2e9348ff80..7e29b65b66 100644 --- a/hwy/tests/saturated_test.cc +++ b/hwy/tests/saturated_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestUnsignedSaturatedAddSub { template @@ -147,19 +148,21 @@ HWY_NOINLINE void TestAllSaturatedNeg() { ForSignedTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwySaturatedTest); HWY_EXPORT_AND_TEST_P(HwySaturatedTest, TestAllSaturatedAddSub); HWY_EXPORT_AND_TEST_P(HwySaturatedTest, TestAllSaturatedAbs); HWY_EXPORT_AND_TEST_P(HwySaturatedTest, TestAllSaturatedNeg); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/shift_test.cc b/hwy/tests/shift_test.cc index c7ecffe176..a6a7f73b59 100644 --- a/hwy/tests/shift_test.cc +++ b/hwy/tests/shift_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template struct TestLeftShifts { @@ -501,20 +502,22 @@ HWY_NOINLINE void TestAllVariableRoundingShr() { ForIntegerTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyShiftTest); HWY_EXPORT_AND_TEST_P(HwyShiftTest, TestAllShifts); HWY_EXPORT_AND_TEST_P(HwyShiftTest, TestAllVariableShifts); HWY_EXPORT_AND_TEST_P(HwyShiftTest, TestAllRoundingShiftRight); HWY_EXPORT_AND_TEST_P(HwyShiftTest, TestAllVariableRoundingShr); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/shuffle4_test.cc b/hwy/tests/shuffle4_test.cc index 12af97921c..72ba621707 100644 --- a/hwy/tests/shuffle4_test.cc +++ b/hwy/tests/shuffle4_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { class TestPer4LaneBlockShuffle { private: @@ -213,17 +214,19 @@ HWY_NOINLINE void TestAllPer4LaneBlockShuffle() { ForAllTypes(ForPartialFixedOrFullScalableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyShuffle4Test); HWY_EXPORT_AND_TEST_P(HwyShuffle4Test, TestAllPer4LaneBlockShuffle); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/sign_test.cc b/hwy/tests/sign_test.cc index 7f818f4f2c..cf7f170665 100644 --- a/hwy/tests/sign_test.cc +++ b/hwy/tests/sign_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestCopySign { template @@ -81,18 +82,20 @@ HWY_NOINLINE void TestAllBroadcastSignBit() { ForSignedTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwySignTest); HWY_EXPORT_AND_TEST_P(HwySignTest, TestAllCopySign); HWY_EXPORT_AND_TEST_P(HwySignTest, TestAllBroadcastSignBit); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/slide_up_down_test.cc b/hwy/tests/slide_up_down_test.cc index 566f5e0016..1fc2da4629 100644 --- a/hwy/tests/slide_up_down_test.cc +++ b/hwy/tests/slide_up_down_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { class TestSlideUpLanes { private: @@ -444,20 +445,22 @@ HWY_NOINLINE void TestAllSlideBlocks() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwySlideUpDownTest); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideUpLanes); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideDownLanes); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlide1); HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideBlocks); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/sums_abs_diff_test.cc b/hwy/tests/sums_abs_diff_test.cc index 4cf3e650bb..b7485afe15 100644 --- a/hwy/tests/sums_abs_diff_test.cc +++ b/hwy/tests/sums_abs_diff_test.cc @@ -25,6 +25,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestSumsOf8AbsDiff { template @@ -332,19 +333,21 @@ HWY_NOINLINE void TestAllSumsOfShuffledQuadAbsDiff() { ForGEVectors<32, TestSumsOfShuffledQuadAbsDiff>()(uint8_t()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwySumsAbsDiffTest); HWY_EXPORT_AND_TEST_P(HwySumsAbsDiffTest, TestAllSumsOf8AbsDiff); HWY_EXPORT_AND_TEST_P(HwySumsAbsDiffTest, TestAllSumsOfAdjQuadAbsDiff); HWY_EXPORT_AND_TEST_P(HwySumsAbsDiffTest, TestAllSumsOfShuffledQuadAbsDiff); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/swizzle_block_test.cc b/hwy/tests/swizzle_block_test.cc index 7bef627644..7e880aa7ed 100644 --- a/hwy/tests/swizzle_block_test.cc +++ b/hwy/tests/swizzle_block_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestOddEvenBlocks { template @@ -243,14 +244,15 @@ HWY_NOINLINE void TestAllBroadcastBlock() { ForAllTypes(ForPartialFixedOrFullScalableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwySwizzleBlockTest); HWY_EXPORT_AND_TEST_P(HwySwizzleBlockTest, TestAllOddEvenBlocks); HWY_EXPORT_AND_TEST_P(HwySwizzleBlockTest, TestAllSwapAdjacentBlocks); @@ -258,6 +260,7 @@ HWY_EXPORT_AND_TEST_P(HwySwizzleBlockTest, TestAllInsertBlock); HWY_EXPORT_AND_TEST_P(HwySwizzleBlockTest, TestAllExtractBlock); HWY_EXPORT_AND_TEST_P(HwySwizzleBlockTest, TestAllBroadcastBlock); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/swizzle_test.cc b/hwy/tests/swizzle_test.cc index db8262f3a7..7e14bdade0 100644 --- a/hwy/tests/swizzle_test.cc +++ b/hwy/tests/swizzle_test.cc @@ -26,6 +26,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestGetLane { template @@ -397,14 +398,15 @@ HWY_NOINLINE void TestAllBroadcastLane() { ForAllTypes(ForPartialFixedOrFullScalableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwySwizzleTest); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllGetLane); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllExtractLane); @@ -414,6 +416,7 @@ HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllDupOdd); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllOddEven); HWY_EXPORT_AND_TEST_P(HwySwizzleTest, TestAllBroadcastLane); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/table_test.cc b/hwy/tests/table_test.cc index 100fa3e745..09fdd7eaf6 100644 --- a/hwy/tests/table_test.cc +++ b/hwy/tests/table_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestTableLookupLanes { template @@ -193,18 +194,20 @@ HWY_NOINLINE void TestAllTwoTablesLookupLanes() { ForAllTypes(ForPartialVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyTableTest); HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanes); HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanes); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/test_util_test.cc b/hwy/tests/test_util_test.cc index ead163707b..ca4ecb2ce8 100644 --- a/hwy/tests/test_util_test.cc +++ b/hwy/tests/test_util_test.cc @@ -29,6 +29,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestName { template @@ -96,18 +97,20 @@ HWY_NOINLINE void TestAllEqual() { ForFloatTypes(TestEqualFloat()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(TestUtilTest); HWY_EXPORT_AND_TEST_P(TestUtilTest, TestAllName); HWY_EXPORT_AND_TEST_P(TestUtilTest, TestAllEqual); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/truncate_test.cc b/hwy/tests/truncate_test.cc index ace24b3513..11ec70c19e 100644 --- a/hwy/tests/truncate_test.cc +++ b/hwy/tests/truncate_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { template constexpr bool IsSupportedTruncation() { @@ -107,18 +108,20 @@ HWY_NOINLINE void TestAllOrderedTruncate2To() { ForU163264(ForShrinkableVectors()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyTruncateTest); HWY_EXPORT_AND_TEST_P(HwyTruncateTest, TestAllTruncate); HWY_EXPORT_AND_TEST_P(HwyTruncateTest, TestAllOrderedTruncate2To); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/tests/tuple_test.cc b/hwy/tests/tuple_test.cc index 511537e65d..60fe14e9c6 100644 --- a/hwy/tests/tuple_test.cc +++ b/hwy/tests/tuple_test.cc @@ -24,6 +24,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestCreateAndSet { template @@ -82,17 +83,19 @@ HWY_NOINLINE void TestAllCreate() { ForAllTypes(ForMaxPow2()); } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(TupleTest); HWY_EXPORT_AND_TEST_P(TupleTest, TestAllCreate); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - +HWY_TEST_MAIN(); #endif // HWY_ONCE diff --git a/hwy/tests/widen_mul_test.cc b/hwy/tests/widen_mul_test.cc index 073138186c..1d5f1313b5 100644 --- a/hwy/tests/widen_mul_test.cc +++ b/hwy/tests/widen_mul_test.cc @@ -26,6 +26,7 @@ HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { +namespace { struct TestSatWidenMulAccumFixedPoint { template @@ -505,14 +506,15 @@ HWY_NOINLINE void TestAllSumOfMulQuadAccumulate() { #endif } +} // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE - namespace hwy { +namespace { HWY_BEFORE_TEST(HwyWidenMulTest); HWY_EXPORT_AND_TEST_P(HwyWidenMulTest, TestAllSatWidenMulAccumFixedPoint); HWY_EXPORT_AND_TEST_P(HwyWidenMulTest, TestAllMulEvenAdd); @@ -521,6 +523,7 @@ HWY_EXPORT_AND_TEST_P(HwyWidenMulTest, TestAllReorderWidenMulAccumulate); HWY_EXPORT_AND_TEST_P(HwyWidenMulTest, TestAllRearrangeToOddPlusEven); HWY_EXPORT_AND_TEST_P(HwyWidenMulTest, TestAllSumOfMulQuadAccumulate); HWY_AFTER_TEST(); +} // namespace } // namespace hwy - -#endif +HWY_TEST_MAIN(); +#endif // HWY_ONCE diff --git a/hwy/timer-inl.h b/hwy/timer-inl.h index d9a9fa84fb..9e98e6d00c 100644 --- a/hwy/timer-inl.h +++ b/hwy/timer-inl.h @@ -16,6 +16,8 @@ // High-resolution and high-precision timer // Per-target include guard +// NOTE: this file could/should be a normal header, but user code may reference +// hn::timer, and defining that here requires highway.h. #if defined(HIGHWAY_HWY_TIMER_INL_H_) == defined(HWY_TARGET_TOGGLE) #ifdef HIGHWAY_HWY_TIMER_INL_H_ #undef HIGHWAY_HWY_TIMER_INL_H_ diff --git a/libhwy-test.pc.in b/libhwy-test.pc.in index 0416b10df3..57d89a3340 100644 --- a/libhwy-test.pc.in +++ b/libhwy-test.pc.in @@ -5,7 +5,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: libhwy-test Description: Efficient and performance-portable SIMD wrapper, test helpers. -Requires: gtest +Requires: @HWY_PC_HWY_TEST_REQUIRES@ Version: @HWY_LIBRARY_VERSION@ Libs: -L${libdir} -lhwy_test -Cflags: -I${includedir} +Cflags: -I${includedir} @HWY_PC_HWY_TEST_CFLAGS@