diff --git a/include/eve/arch/cpu/wide.hpp b/include/eve/arch/cpu/wide.hpp index 9cf0c59de7..ddd9404297 100644 --- a/include/eve/arch/cpu/wide.hpp +++ b/include/eve/arch/cpu/wide.hpp @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -953,32 +955,32 @@ namespace eve } //! @brief Element-wise less-or-equal comparison between eve::wide - friend EVE_FORCEINLINE auto operator<=(wide v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<=(wide a, wide b) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return detail::self_leq(v, w); + return is_less_equal(a, b); } //! @brief Element-wise less-or-equal comparison between a eve::wide and a scalar template - friend EVE_FORCEINLINE auto operator<=(wide v, S w) noexcept + friend EVE_FORCEINLINE auto operator<=(wide w, S s) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return v <= wide {w}; + return is_less_equal(w, s); } //! @brief Element-wise less-or-equal comparison between a scalar and a eve::wide template - friend EVE_FORCEINLINE auto operator<=(S v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<=(S s, wide w) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return wide {v} <= w; + return is_less_equal(s, w); } //! Computes the logical negation of its parameter diff --git a/include/eve/detail/function/simd/arm/neon/friends.hpp b/include/eve/detail/function/simd/arm/neon/friends.hpp index f4ed7ee2a7..84e6eeecd7 100644 --- a/include/eve/detail/function/simd/arm/neon/friends.hpp +++ b/include/eve/detail/function/simd/arm/neon/friends.hpp @@ -94,37 +94,4 @@ namespace eve::detail else if constexpr( sizeof(T) == 8 ) return map([](E const& e, E const& f){ return as_logical_t(e >= f); }, v, w); } - - template - EVE_FORCEINLINE logical> self_leq(wide v,wide w) noexcept - requires arm_abi> - { - constexpr auto cat = categorize>(); - - if constexpr( cat == category::int32x4 ) return vcleq_s32(v, w); - else if constexpr( cat == category::int16x8 ) return vcleq_s16(v, w); - else if constexpr( cat == category::int8x16 ) return vcleq_s8(v, w); - else if constexpr( cat == category::uint32x4 ) return vcleq_u32(v, w); - else if constexpr( cat == category::uint16x8 ) return vcleq_u16(v, w); - else if constexpr( cat == category::uint8x16 ) return vcleq_u8(v, w); - else if constexpr( cat == category::float32x4) return vcleq_f32(v, w); - else if constexpr( cat == category::int32x2 ) return vcle_s32(v, w); - else if constexpr( cat == category::int16x4 ) return vcle_s16(v, w); - else if constexpr( cat == category::int8x8 ) return vcle_s8(v, w); - else if constexpr( cat == category::uint32x2 ) return vcle_u32(v, w); - else if constexpr( cat == category::uint16x4 ) return vcle_u16(v, w); - else if constexpr( cat == category::uint8x8 ) return vcle_u8(v, w); - else if constexpr( cat == category::float32x2) return vcle_f32(v, w); - else if constexpr( current_api >= asimd) - { - if constexpr( cat == category::float64x1) return vcle_f64(v, w); - else if constexpr( cat == category::int64x1) return vcle_s64(v, w); - else if constexpr( cat == category::uint64x1) return vcle_u64(v, w); - else if constexpr( cat == category::float64x2) return vcleq_f64(v, w); - else if constexpr( cat == category::int64x2) return vcleq_s64(v, w); - else if constexpr( cat == category::uint64x2) return vcleq_u64(v, w); - } - else if constexpr( sizeof(T) == 8 ) - return map([](E const& e, E const& f){ return as_logical_t(e <= f); }, v, w); - } } diff --git a/include/eve/detail/function/simd/arm/sve/friends.hpp b/include/eve/detail/function/simd/arm/sve/friends.hpp index 19d2c2ed28..7661a394fd 100644 --- a/include/eve/detail/function/simd/arm/sve/friends.hpp +++ b/include/eve/detail/function/simd/arm/sve/friends.hpp @@ -22,11 +22,6 @@ EVE_FORCEINLINE auto self_neq(wide v, wide w) noexcept -> as_logical_t> requires sve_abi> { return svcmpne(sve_true(), v, w); } -template -EVE_FORCEINLINE auto -self_leq(wide v, wide w) noexcept -> as_logical_t> -requires sve_abi> { return svcmple(sve_true(), v, w); } - template EVE_FORCEINLINE auto self_geq(wide v, wide w) noexcept -> as_logical_t> diff --git a/include/eve/detail/function/simd/common/friends.hpp b/include/eve/detail/function/simd/common/friends.hpp index 0396a6cbfa..fc95c6be1d 100644 --- a/include/eve/detail/function/simd/common/friends.hpp +++ b/include/eve/detail/function/simd/common/friends.hpp @@ -99,19 +99,6 @@ namespace eve::detail //================================================================================================ // Ordering operators - template - EVE_FORCEINLINE auto self_leq(Wide const& v,Wide const& w) noexcept - { - if constexpr( product_type ) - { - return convert(kumi::to_tuple(v) <= kumi::to_tuple(w), as_element>()); - } - else - { - constexpr auto ge = [](E const& e, E const& f) { return as_logical_t(e <= f); }; - return apply_over(ge, v, w); - } - } template EVE_FORCEINLINE auto self_geq(Wide const& v,Wide const& w) noexcept diff --git a/include/eve/detail/function/simd/ppc/friends.hpp b/include/eve/detail/function/simd/ppc/friends.hpp index f0f76ad0cf..ce1a0ea290 100644 --- a/include/eve/detail/function/simd/ppc/friends.hpp +++ b/include/eve/detail/function/simd/ppc/friends.hpp @@ -37,14 +37,4 @@ namespace eve::detail else return !(v < w); } - - template - EVE_FORCEINLINE auto self_leq(wide const &v, wide const &w) noexcept - requires ppc_abi> - { - if constexpr(std::is_floating_point_v) - return logical>(vec_cmple(v.storage(), w.storage())); - else - return !(v > w); - } } diff --git a/include/eve/detail/function/simd/riscv/friends.hpp b/include/eve/detail/function/simd/riscv/friends.hpp index c77dad9219..1e6c7820fb 100644 --- a/include/eve/detail/function/simd/riscv/friends.hpp +++ b/include/eve/detail/function/simd/riscv/friends.hpp @@ -44,37 +44,6 @@ requires rvv_abi> return self_geq_impl(lhs, rhs); } -template -EVE_FORCEINLINE auto -self_leq_impl(wide lhs, U rhs) noexcept -> logical> -requires rvv_abi> && (std::same_as, U> || scalar_value) -{ - if constexpr( scalar_value && !std::same_as ) return self_leq(lhs, static_cast(rhs)); - else - { - constexpr auto c = categorize>(); - if constexpr( match(c, category::int_) ) return __riscv_vmsle(lhs, rhs, N::value); - else if constexpr( match(c, category::uint_) ) return __riscv_vmsleu(lhs, rhs, N::value); - else if constexpr( match(c, category::float_) ) return __riscv_vmfle(lhs, rhs, N::value); - } -} - -template -EVE_FORCEINLINE auto -self_leq(wide lhs, wide rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_leq_impl(lhs, rhs); -} - -template -EVE_FORCEINLINE auto -self_leq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_leq_impl(lhs, rhs); -} - template EVE_FORCEINLINE auto self_eq_impl(wide lhs, U rhs) noexcept -> logical> diff --git a/include/eve/detail/function/simd/x86/friends.hpp b/include/eve/detail/function/simd/x86/friends.hpp index fe18f49008..6a3de8915d 100644 --- a/include/eve/detail/function/simd/x86/friends.hpp +++ b/include/eve/detail/function/simd/x86/friends.hpp @@ -229,55 +229,4 @@ EVE_FORCEINLINE as_logical_t> else return !(v < w); } } - -//================================================================================================ -template -EVE_FORCEINLINE as_logical_t> - self_leq(wide v, wide w) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - constexpr auto f = to_integer(cmp_flt::le_oq); - - if constexpr( current_api >= avx512 ) - { - if constexpr( c == category::float32x16 ) return mask16 {_mm512_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_cmple_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_cmple_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_cmple_epu64_mask(v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_cmple_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_cmple_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_cmple_epu32_mask(v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_cmple_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_cmple_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_cmple_epu16_mask(v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_cmple_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_cmple_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_cmple_epu8_mask(v, w)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_cmple_epi64_mask(v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_cmple_epi64_mask(v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_cmple_epi64_mask(v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_cmple_epi32_mask(v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_cmple_epi32_mask(v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_cmple_epi32_mask(v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_cmple_epi16_mask(v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_cmple_epi16_mask(v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_cmple_epi16_mask(v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_cmple_epi8_mask(v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_cmple_epi8_mask(v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_cmple_epi8_mask(v, w)}; - } - else - { - if constexpr( c == category::float32x8 ) return _mm256_cmp_ps(v, w, f); - else if constexpr( c == category::float64x4 ) return _mm256_cmp_pd(v, w, f); - else if constexpr( c == category::float32x4 ) return _mm_cmple_ps(v, w); - else if constexpr( c == category::float64x2 ) return _mm_cmple_pd(v, w); - else return !(v > w); - } -} } diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index cfb3c49261..dac7d1c57d 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -7,10 +7,10 @@ //================================================================================================== #pragma once -#include #include #include #include +#include namespace eve::detail { @@ -31,7 +31,7 @@ namespace eve::detail else { if constexpr (scalar_value) return as_logical_t(a < b); - else return map([](auto e, auto f) { return e < f; }, a, b); + else return map([](E e, E f){ return as_logical_t(e < f); }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/is_less_equal.hpp b/include/eve/module/core/regular/impl/is_less_equal.hpp new file mode 100644 index 0000000000..e1ade44039 --- /dev/null +++ b/include/eve/module/core/regular/impl/is_less_equal.hpp @@ -0,0 +1,36 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE constexpr as_logical_t is_less_equal_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept + { + if constexpr (O::contains(almost)) + { + auto tol = o[almost].value(T{}); + if constexpr(integral_value) return a <= eve::next(b, tol); + else return a <= fam(b, tol, eve::max(eve::abs(a), eve::abs(b))); + } + else if constexpr (product_type) + { + return kumi::to_tuple(a) <= kumi::to_tuple(b); + } + else + { + if constexpr (scalar_value) return as_logical_t(a <= b); + else return map([](E e, E f){ return as_logical_t(e <= f); }, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp index e374b0e600..1e89ebe018 100644 --- a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp @@ -49,7 +49,7 @@ namespace eve::detail else if constexpr (cat == category::int64x2) return vcltq_s64(a, b); else if constexpr (cat == category::uint64x2) return vcltq_u64(a, b); } - else return map(is_less, a, b); + else return map([](E e, E f){ return as_logical_t(e < f); }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp new file mode 100644 index 0000000000..7b37d0b355 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp @@ -0,0 +1,55 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(neon128_), O const& opts, wide a, wide b) noexcept + requires arm_abi> + { + if constexpr (O::contains(almost)) + { + return is_less_equal.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto cat = categorize>(); + + if constexpr (cat == category::int32x4) return vcleq_s32(a, b); + else if constexpr (cat == category::int16x8) return vcleq_s16(a, b); + else if constexpr (cat == category::int8x16) return vcleq_s8(a, b); + else if constexpr (cat == category::uint32x4) return vcleq_u32(a, b); + else if constexpr (cat == category::uint16x8) return vcleq_u16(a, b); + else if constexpr (cat == category::uint8x16) return vcleq_u8(a, b); + else if constexpr (cat == category::float32x4) return vcleq_f32(a, b); + else if constexpr (cat == category::int32x2) return vcle_s32(a, b); + else if constexpr (cat == category::int16x4) return vcle_s16(a, b); + else if constexpr (cat == category::int8x8) return vcle_s8(a, b); + else if constexpr (cat == category::uint32x2) return vcle_u32(a, b); + else if constexpr (cat == category::uint16x4) return vcle_u16(a, b); + else if constexpr (cat == category::uint8x8) return vcle_u8(a, b); + else if constexpr (cat == category::float32x2) return vcle_f32(a, b); + else if constexpr (current_api >= asimd) + { + if constexpr (cat == category::float64x1) return vcle_f64(a, b); + else if constexpr (cat == category::int64x1) return vcle_s64(a, b); + else if constexpr (cat == category::uint64x1) return vcle_u64(a, b); + else if constexpr (cat == category::float64x2) return vcleq_f64(a, b); + else if constexpr (cat == category::int64x2) return vcleq_s64(a, b); + else if constexpr (cat == category::uint64x2) return vcleq_u64(a, b); + } + else return map([](E e, E f){ return as_logical_t(e <= f); }, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp new file mode 100644 index 0000000000..c89c98fe89 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp @@ -0,0 +1,22 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(sve_), O const& opts, wide a, wide b) noexcept + requires sve_abi> + { + if constexpr (O::contains(almost)) return is_less_equal.behavior(cpu_{}, opts, a, b); + else return svcmple(sve_true(), a, b); + } +} diff --git a/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp new file mode 100644 index 0000000000..7660c2f104 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp @@ -0,0 +1,25 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(vmx_), O const& opts, wide a, wide b) noexcept + requires ppc_abi> + { + if constexpr (O::contains(almost)) return is_less_equal.behavior(cpu_{}, opts, a, b); + else if constexpr(std::is_floating_point_v) return logical>(vec_cmple(a.storage(), b.storage())); + else return !(a > b); + } +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp new file mode 100644 index 0000000000..64a710725d --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp @@ -0,0 +1,32 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(rvv_), O const& opts, wide a, U b) noexcept + requires (rvv_abi> && same_element_type) + { + if constexpr (O::contains(almost)) + { + return is_less_equal.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto c = categorize>(); + + if constexpr (match(c, category::int_)) return __riscv_vmsle(a, b, N::value); + else if constexpr (match(c, category::uint_)) return __riscv_vmsleu(a, b, N::value); + else if constexpr (match(c, category::float_)) return __riscv_vmfle(a, b, N::value); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index 8ecc20658f..491371d796 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -74,6 +74,7 @@ namespace eve::detail else { constexpr auto use_avx2 = current_api >= avx2; + constexpr auto use_avx = current_api >= avx; constexpr auto use_sse4_1 = current_api >= sse4_1; constexpr auto use_sse4_2 = current_api >= sse4_2; constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; @@ -85,18 +86,19 @@ namespace eve::detail return bit_cast((bit_cast(lhs, as(sm)) - sm) < (bit_cast(rhs, as(sm)) - sm), as{}); }; - if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); - else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); - else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); - else if constexpr (use_avx2 && c == category::uint32x8) return eve::min(a, b) != b; - else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); - else if constexpr (use_avx2 && c == category::uint16x16) return eve::min(a, b) != b; - else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); - else if constexpr (use_avx2 && c == category::uint8x32) return eve::min(a, b) != b; - else if constexpr (use_sse4_2 && c == category::int64x2) return _mm_cmpgt_epi64(b, a); - else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); - else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); - else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); + if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); + else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); + else if constexpr (use_avx2 && c == category::uint32x8) return eve::min(a, b) != b; + else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); + else if constexpr (use_avx2 && c == category::uint16x16) return eve::min(a, b) != b; + else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); + else if constexpr (use_avx2 && c == category::uint8x32) return eve::min(a, b) != b; + else if constexpr (use_avx && ((sizeof(T) * N::value) == 32)) return aggregate(is_less, a, b); + else if constexpr (use_sse4_2 && c == category::int64x2) return _mm_cmpgt_epi64(b, a); + else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); + else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); + else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); else if constexpr (c == category::uint32x4) { if constexpr (use_sse4_1) return eve::min(a, b) != b; diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp index 73c874da67..c0bb93423b 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp @@ -19,57 +19,118 @@ namespace eve::detail { -// ----------------------------------------------------------------------------------------------- -// masked implementation - template - EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(avx512_), - C const& mask, - O const& opts, - wide const& v, - wide const& w) noexcept requires x86_abi> + template + EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(sse2_), O const& opts, wide a, wide b) noexcept + requires x86_abi> { - if constexpr( C::has_alternative || O::contains(almost) ) + if constexpr (O::contains(almost)) { - return is_less_equal.behavior(cpu_{}, opts, v, w); + return is_less_equal.behavior(cpu_{}, opts, a, b); } else { - auto const s = alternative(mask, v, as(to_logical(v))); - [[maybe_unused]] auto m = expand_mask(mask, as(v)).storage().value; + constexpr auto c = categorize>(); + constexpr auto f = to_integer(cmp_flt::le_oq); + + if constexpr (current_api >= avx512) + { + if constexpr (c == category::float32x16) return mask16 {_mm512_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_cmple_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_cmple_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_cmple_epu64_mask(a, b)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_cmple_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_cmple_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_cmple_epu32_mask(a, b)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_cmple_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_cmple_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_cmple_epu16_mask(a, b)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_cmple_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_cmple_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_cmple_epu8_mask(a, b)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_cmple_epi64_mask(a, b)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_cmple_epi64_mask(a, b)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_cmple_epi64_mask(a, b)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_cmple_epi32_mask(a, b)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_cmple_epi32_mask(a, b)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_cmple_epi32_mask(a, b)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_cmple_epi16_mask(a, b)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_cmple_epi16_mask(a, b)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_cmple_epi16_mask(a, b)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_cmple_epi8_mask(a, b)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_cmple_epi8_mask(a, b)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_cmple_epi8_mask(a, b)}; + } + else + { + constexpr auto use_avx2 = current_api >= avx2; + constexpr auto use_avx = current_api >= avx; + constexpr auto use_sse4_1 = current_api >= sse4_1; + + if constexpr (c == category::float32x8) return _mm256_cmp_ps(a, b, f); + else if constexpr (c == category::float64x4) return _mm256_cmp_pd(a, b, f); + else if constexpr (c == category::float32x4) return _mm_cmple_ps(a, b); + else if constexpr (c == category::float64x2) return _mm_cmple_pd(a, b); + else if constexpr (use_avx2) return eve::min(a, b) == a; + else if constexpr (use_avx && ((sizeof(T) * N::value) == 32)) return aggregate(is_less_equal, a, b); + else if constexpr (use_sse4_1) return eve::min(a, b) == a; + else return !is_less(b, a); + } + } + } + + // ----------------------------------------------------------------------------------------------- + // masked implementation + template + EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(avx512_), C const& mask, O const& opts, wide a, wide b) noexcept + requires x86_abi> + { + if constexpr (C::has_alternative || O::contains(almost)) + { + return is_less_equal.behavior(cpu_{}, opts, a, b); + } + else + { + auto const s = alternative(mask, a, as(to_logical(a))); + [[maybe_unused]] auto m = expand_mask(mask, as(a)).storage().value; constexpr auto c = categorize>(); constexpr auto f = to_integer(cmp_flt::le_oq); - if constexpr( C::is_complete ) return s; - else if constexpr( c == category::float32x16 ) return mask16 {_mm512_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_mask_cmple_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_mask_cmple_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_mask_cmple_epi64_mask(m, v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_mask_cmple_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_mask_cmple_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_mask_cmple_epi32_mask(m, v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_mask_cmple_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_mask_cmple_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_mask_cmple_epi16_mask(m, v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_mask_cmple_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_mask_cmple_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_mask_cmple_epi8_mask(m, v, w)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_mask_cmple_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_mask_cmple_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_mask_cmple_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_mask_cmple_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_mask_cmple_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_mask_cmple_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_mask_cmple_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_mask_cmple_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_mask_cmple_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_mask_cmple_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_mask_cmple_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_mask_cmple_epu8_mask(m, v, w)}; + if constexpr (C::is_complete) return s; + else if constexpr (c == category::float32x16) return mask16 {_mm512_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_mask_cmple_epi64_mask(m, a, b)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_mask_cmple_epi64_mask(m, a, b)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_mask_cmple_epi64_mask(m, a, b)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_mask_cmple_epi32_mask(m, a, b)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_mask_cmple_epi32_mask(m, a, b)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_mask_cmple_epi32_mask(m, a, b)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_mask_cmple_epi16_mask(m, a, b)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_mask_cmple_epi16_mask(m, a, b)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_mask_cmple_epi16_mask(m, a, b)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_mask_cmple_epi8_mask(m, a, b)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_mask_cmple_epi8_mask(m, a, b)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_mask_cmple_epi8_mask(m, a, b)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_mask_cmple_epu64_mask(m, a, b)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_mask_cmple_epu64_mask(m, a, b)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_mask_cmple_epu64_mask(m, a, b)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_mask_cmple_epu32_mask(m, a, b)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_mask_cmple_epu32_mask(m, a, b)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_mask_cmple_epu32_mask(m, a, b)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_mask_cmple_epu16_mask(m, a, b)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_mask_cmple_epu16_mask(m, a, b)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_mask_cmple_epu16_mask(m, a, b)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_mask_cmple_epu8_mask(m, a, b)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_mask_cmple_epu8_mask(m, a, b)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_mask_cmple_epu8_mask(m, a, b)}; } } } diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index 45df111b91..0a7b27295c 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/include/eve/module/core/regular/is_less_equal.hpp b/include/eve/module/core/regular/is_less_equal.hpp index 5ca61df55c..cfe74ac7cb 100644 --- a/include/eve/module/core/regular/is_less_equal.hpp +++ b/include/eve/module/core/regular/is_less_equal.hpp @@ -9,21 +9,25 @@ #include #include -#include #include -#include -#include +#include + namespace eve { template - struct is_less_equal_t : strict_elementwise_callable + struct is_less_equal_t : elementwise_callable { - template - requires(eve::same_lanes_or_scalar) - constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + template + constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + requires (compatible_arithmetic_values) { - // static_assert( valid_tolerance, Options>::value, "[eve::is_less_equal] simd tolerance requires at least one simd parameter." ); + if constexpr (Options::contains(almost)) + { + static_assert(floating_value, "[eve::is_less_equal] The definitely option is only supported for floating types."); + } + + // static_assert( valid_tolerance, Options>::value, "[eve::is_less_equal] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); } @@ -86,40 +90,26 @@ namespace eve //================================================================================================ //! @} //================================================================================================ - - namespace detail - { - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_equal_(EVE_REQUIRES(cpu_), O const&, logical a, logical b) noexcept - { - if constexpr( scalar_value && scalar_value) return common_logical_t(a <= b); - else return a <= b; - } - - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_equal_(EVE_REQUIRES(cpu_), O const & o, T const& aa, U const& bb) noexcept - { - if constexpr(O::contains(almost)) - { - using w_t = common_value_t; - auto a = w_t(aa); - auto b = w_t(bb); - - auto tol = o[almost].value(w_t{}); - if constexpr(integral_value) return a <= eve::next(b, tol); - else return a <= fam(b, tol, eve::max(eve::abs(a), eve::abs(b))); - } - else - { - if constexpr(scalar_value && scalar_value) return common_logical_t(aa <= bb); - else return aa <= bb; - } - } - } } +#include + #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_POWERPC_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_NEON_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_SVE_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif