diff --git a/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp b/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp index c72e416c6..e7916b0d4 100644 --- a/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp @@ -28,7 +28,7 @@ namespace xsimd // bitwise_lshift template ::value, void>::type*/> - inline batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept { return x << y; }, @@ -37,7 +37,7 @@ namespace xsimd // bitwise_rshift template ::value, void>::type*/> - inline batch bitwise_rshift(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept { return x >> y; }, @@ -46,21 +46,21 @@ namespace xsimd // decr template - inline batch decr(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch decr(batch const& self, requires_arch) noexcept { return self - T(1); } // decr_if template - inline batch decr_if(batch const& self, Mask const& mask, requires_arch) noexcept + XSIMD_INLINE batch decr_if(batch const& self, Mask const& mask, requires_arch) noexcept { return select(mask, decr(self), self); } // div template ::value, void>::type> - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept -> T { return x / y; }, @@ -69,13 +69,13 @@ namespace xsimd // fma template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return x * y + z; } template - inline batch, A> fma(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> fma(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept { auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real())); auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag())); @@ -84,13 +84,13 @@ namespace xsimd // fms template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return x * y - z; } template - inline batch, A> fms(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> fms(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept { auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real())); auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag())); @@ -99,13 +99,13 @@ namespace xsimd // fnma template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return -x * y + z; } template - inline batch, A> fnma(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> fnma(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept { auto res_r = -fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real())); auto res_i = -fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag())); @@ -114,13 +114,13 @@ namespace xsimd // fnms template - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return -x * y - z; } template - inline batch, A> fnms(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> fnms(batch, A> const& x, batch, A> const& y, batch, A> const& z, requires_arch) noexcept { auto res_r = -fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real())); auto res_i = -fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag())); @@ -129,7 +129,7 @@ namespace xsimd // hadd template ::value, void>::type*/> - inline T hadd(batch const& self, requires_arch) noexcept + XSIMD_INLINE T hadd(batch const& self, requires_arch) noexcept { alignas(A::alignment()) T buffer[batch::size]; self.store_aligned(buffer); @@ -143,21 +143,21 @@ namespace xsimd // incr template - inline batch incr(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch incr(batch const& self, requires_arch) noexcept { return self + T(1); } // incr_if template - inline batch incr_if(batch const& self, Mask const& mask, requires_arch) noexcept + XSIMD_INLINE batch incr_if(batch const& self, Mask const& mask, requires_arch) noexcept { return select(mask, incr(self), self); } // mul template ::value, void>::type*/> - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept -> T { return x * y; }, @@ -166,7 +166,7 @@ namespace xsimd // rotl template - inline batch rotl(batch const& self, STy other, requires_arch) noexcept + XSIMD_INLINE batch rotl(batch const& self, STy other, requires_arch) noexcept { constexpr auto N = std::numeric_limits::digits; return (self << other) | (self >> (N - other)); @@ -174,7 +174,7 @@ namespace xsimd // rotr template - inline batch rotr(batch const& self, STy other, requires_arch) noexcept + XSIMD_INLINE batch rotr(batch const& self, STy other, requires_arch) noexcept { constexpr auto N = std::numeric_limits::digits; return (self >> other) | (self << (N - other)); @@ -182,12 +182,12 @@ namespace xsimd // sadd template - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { return add(self, other); // no saturated arithmetic on floating point numbers } template ::value, void>::type*/> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -204,19 +204,19 @@ namespace xsimd } } template - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { return add(self, other); // no saturated arithmetic on floating point numbers } // ssub template - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { return sub(self, other); // no saturated arithmetic on floating point numbers } template ::value, void>::type*/> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -229,7 +229,7 @@ namespace xsimd } } template - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { return sub(self, other); // no saturated arithmetic on floating point numbers } diff --git a/include/xsimd/arch/generic/xsimd_generic_complex.hpp b/include/xsimd/arch/generic/xsimd_generic_complex.hpp index 960e4c10c..812c592ae 100644 --- a/include/xsimd/arch/generic/xsimd_generic_complex.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_complex.hpp @@ -26,54 +26,54 @@ namespace xsimd // real template - inline batch real(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch real(batch const& self, requires_arch) noexcept { return self; } template - inline batch real(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch real(batch, A> const& self, requires_arch) noexcept { return self.real(); } // imag template - inline batch imag(batch const& /*self*/, requires_arch) noexcept + XSIMD_INLINE batch imag(batch const& /*self*/, requires_arch) noexcept { return batch(T(0)); } template - inline batch imag(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch imag(batch, A> const& self, requires_arch) noexcept { return self.imag(); } // arg template - inline real_batch_type_t> arg(batch const& self, requires_arch) noexcept + XSIMD_INLINE real_batch_type_t> arg(batch const& self, requires_arch) noexcept { return atan2(imag(self), real(self)); } // conj template - inline complex_batch_type_t> conj(batch const& self, requires_arch) noexcept + XSIMD_INLINE complex_batch_type_t> conj(batch const& self, requires_arch) noexcept { return { real(self), -imag(self) }; } // norm template - inline real_batch_type_t> norm(batch const& self, requires_arch) noexcept + XSIMD_INLINE real_batch_type_t> norm(batch const& self, requires_arch) noexcept { return { fma(real(self), real(self), imag(self) * imag(self)) }; } // proj template - inline complex_batch_type_t> proj(batch const& self, requires_arch) noexcept + XSIMD_INLINE complex_batch_type_t> proj(batch const& self, requires_arch) noexcept { using batch_type = complex_batch_type_t>; using real_batch = typename batch_type::real_batch; @@ -86,19 +86,19 @@ namespace xsimd } template - inline batch_bool isnan(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch, A> const& self, requires_arch) noexcept { return batch_bool(isnan(self.real()) || isnan(self.imag())); } template - inline batch_bool isinf(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isinf(batch, A> const& self, requires_arch) noexcept { return batch_bool(isinf(self.real()) || isinf(self.imag())); } template - inline batch_bool isfinite(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isfinite(batch, A> const& self, requires_arch) noexcept { return batch_bool(isfinite(self.real()) && isfinite(self.imag())); } diff --git a/include/xsimd/arch/generic/xsimd_generic_details.hpp b/include/xsimd/arch/generic/xsimd_generic_details.hpp index 14c62a089..e676e0a7d 100644 --- a/include/xsimd/arch/generic/xsimd_generic_details.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_details.hpp @@ -23,81 +23,81 @@ namespace xsimd { // Forward declaration. Should we put them in a separate file? template - inline batch abs(batch const& self) noexcept; + XSIMD_INLINE batch abs(batch const& self) noexcept; template - inline batch abs(batch, A> const& self) noexcept; + XSIMD_INLINE batch abs(batch, A> const& self) noexcept; template - inline bool any(batch_bool const& self) noexcept; + XSIMD_INLINE bool any(batch_bool const& self) noexcept; template - inline batch atan2(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch atan2(batch const& self, batch const& other) noexcept; template - inline batch batch_cast(batch const&, batch const& out) noexcept; + XSIMD_INLINE batch batch_cast(batch const&, batch const& out) noexcept; template - inline batch bitofsign(batch const& self) noexcept; + XSIMD_INLINE batch bitofsign(batch const& self) noexcept; template - inline batch bitwise_cast(batch const& self) noexcept; + XSIMD_INLINE batch bitwise_cast(batch const& self) noexcept; template - inline batch cos(batch const& self) noexcept; + XSIMD_INLINE batch cos(batch const& self) noexcept; template - inline batch cosh(batch const& self) noexcept; + XSIMD_INLINE batch cosh(batch const& self) noexcept; template - inline batch exp(batch const& self) noexcept; + XSIMD_INLINE batch exp(batch const& self) noexcept; template - inline batch fma(batch const& x, batch const& y, batch const& z) noexcept; + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z) noexcept; template - inline batch fms(batch const& x, batch const& y, batch const& z) noexcept; + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z) noexcept; template - inline batch frexp(const batch& x, const batch, A>& e) noexcept; + XSIMD_INLINE batch frexp(const batch& x, const batch, A>& e) noexcept; template - inline batch horner(const batch& self) noexcept; + XSIMD_INLINE batch horner(const batch& self) noexcept; template - inline batch hypot(const batch& self) noexcept; + XSIMD_INLINE batch hypot(const batch& self) noexcept; template - inline batch_bool is_even(batch const& self) noexcept; + XSIMD_INLINE batch_bool is_even(batch const& self) noexcept; template - inline batch_bool is_flint(batch const& self) noexcept; + XSIMD_INLINE batch_bool is_flint(batch const& self) noexcept; template - inline batch_bool is_odd(batch const& self) noexcept; + XSIMD_INLINE batch_bool is_odd(batch const& self) noexcept; template - inline typename batch::batch_bool_type isinf(batch const& self) noexcept; + XSIMD_INLINE typename batch::batch_bool_type isinf(batch const& self) noexcept; template - inline typename batch::batch_bool_type isfinite(batch const& self) noexcept; + XSIMD_INLINE typename batch::batch_bool_type isfinite(batch const& self) noexcept; template - inline typename batch::batch_bool_type isnan(batch const& self) noexcept; + XSIMD_INLINE typename batch::batch_bool_type isnan(batch const& self) noexcept; template - inline batch ldexp(const batch& x, const batch, A>& e) noexcept; + XSIMD_INLINE batch ldexp(const batch& x, const batch, A>& e) noexcept; template - inline batch log(batch const& self) noexcept; + XSIMD_INLINE batch log(batch const& self) noexcept; template - inline batch nearbyint(batch const& self) noexcept; + XSIMD_INLINE batch nearbyint(batch const& self) noexcept; template - inline batch, A> nearbyint_as_int(const batch& x) noexcept; + XSIMD_INLINE batch, A> nearbyint_as_int(const batch& x) noexcept; template - inline T reduce_add(batch const&) noexcept; + XSIMD_INLINE T reduce_add(batch const&) noexcept; template - inline batch select(batch_bool const&, batch const&, batch const&) noexcept; + XSIMD_INLINE batch select(batch_bool const&, batch const&, batch const&) noexcept; template - inline batch, A> select(batch_bool const&, batch, A> const&, batch, A> const&) noexcept; + XSIMD_INLINE batch, A> select(batch_bool const&, batch, A> const&, batch, A> const&) noexcept; template - inline batch sign(batch const& self) noexcept; + XSIMD_INLINE batch sign(batch const& self) noexcept; template - inline batch signnz(batch const& self) noexcept; + XSIMD_INLINE batch signnz(batch const& self) noexcept; template - inline batch sin(batch const& self) noexcept; + XSIMD_INLINE batch sin(batch const& self) noexcept; template - inline batch sinh(batch const& self) noexcept; + XSIMD_INLINE batch sinh(batch const& self) noexcept; template - inline std::pair, batch> sincos(batch const& self) noexcept; + XSIMD_INLINE std::pair, batch> sincos(batch const& self) noexcept; template - inline batch sqrt(batch const& self) noexcept; + XSIMD_INLINE batch sqrt(batch const& self) noexcept; template - inline batch tan(batch const& self) noexcept; + XSIMD_INLINE batch tan(batch const& self) noexcept; template - inline batch, A> to_float(batch const& self) noexcept; + XSIMD_INLINE batch, A> to_float(batch const& self) noexcept; template - inline batch, A> to_int(batch const& self) noexcept; + XSIMD_INLINE batch, A> to_int(batch const& self) noexcept; template - inline batch trunc(batch const& self) noexcept; + XSIMD_INLINE batch trunc(batch const& self) noexcept; namespace kernel { @@ -105,7 +105,7 @@ namespace xsimd namespace detail { template - inline batch apply(F&& func, batch const& self, batch const& other) noexcept + XSIMD_INLINE batch apply(F&& func, batch const& self, batch const& other) noexcept { constexpr std::size_t size = batch::size; alignas(A::alignment()) T self_buffer[size]; @@ -120,7 +120,7 @@ namespace xsimd } template - inline batch apply_transform(F&& func, batch const& self) noexcept + XSIMD_INLINE batch apply_transform(F&& func, batch const& self) noexcept { static_assert(batch::size == batch::size, "Source and destination sizes must match"); @@ -141,42 +141,42 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return bitwise_cast(self); } @@ -184,7 +184,7 @@ namespace xsimd // Provide a generic uint32_t -> float cast only if we have a // non-generic int32_t -> float fast_cast template const&>(), std::declval const&>(), A {}))> - inline batch fast_cast(batch const& v, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& v, batch const&, requires_arch) noexcept { // see https://stackoverflow.com/questions/34066228/how-to-perform-uint32-float-conversion-with-sse batch msk_lo(0xFFFF); @@ -201,7 +201,7 @@ namespace xsimd // Provide a generic float -> uint32_t cast only if we have a // non-generic float -> int32_t fast_cast template const&>(), std::declval const&>(), A {}))> - inline batch fast_cast(batch const& v, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& v, batch const&, requires_arch) noexcept { auto is_large = v >= batch(1u << 31); auto small = bitwise_cast(batch_cast(v)); @@ -258,25 +258,25 @@ namespace xsimd * ==================================================== */ template - inline B coef() noexcept + XSIMD_INLINE B coef() noexcept { using value_type = typename B::value_type; return B(bit_cast(as_unsigned_integer_t(c))); } template - inline B horner(const B&) noexcept + XSIMD_INLINE B horner(const B&) noexcept { return B(typename B::value_type(0.)); } template - inline B horner(const B&) noexcept + XSIMD_INLINE B horner(const B&) noexcept { return coef(); } template - inline B horner(const B& self) noexcept + XSIMD_INLINE B horner(const B& self) noexcept { return fma(self, horner(self), coef()); } @@ -291,19 +291,19 @@ namespace xsimd * ==================================================== */ template - inline B horner1(const B&) noexcept + XSIMD_INLINE B horner1(const B&) noexcept { return B(1.); } template - inline B horner1(const B& x) noexcept + XSIMD_INLINE B horner1(const B& x) noexcept { return x + detail::coef(); } template - inline B horner1(const B& x) noexcept + XSIMD_INLINE B horner1(const B& x) noexcept { return fma(x, horner1(x), detail::coef()); } diff --git a/include/xsimd/arch/generic/xsimd_generic_logical.hpp b/include/xsimd/arch/generic/xsimd_generic_logical.hpp index dd446e83d..69d9657e1 100644 --- a/include/xsimd/arch/generic/xsimd_generic_logical.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_logical.hpp @@ -24,7 +24,7 @@ namespace xsimd // from mask template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) bool buffer[batch_bool::size]; // This is inefficient but should never be called. It's just a @@ -36,28 +36,28 @@ namespace xsimd // ge template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return other <= self; } // gt template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return other < self; } // is_even template - inline batch_bool is_even(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool is_even(batch const& self, requires_arch) noexcept { return is_flint(self * T(0.5)); } // is_flint template - inline batch_bool is_flint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool is_flint(batch const& self, requires_arch) noexcept { auto frac = select(isnan(self - self), constants::nan>(), self - trunc(self)); return frac == T(0.); @@ -65,69 +65,69 @@ namespace xsimd // is_odd template - inline batch_bool is_odd(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool is_odd(batch const& self, requires_arch) noexcept { return is_even(self - T(1.)); } // isinf template ::value, void>::type> - inline batch_bool isinf(batch const&, requires_arch) noexcept + XSIMD_INLINE batch_bool isinf(batch const&, requires_arch) noexcept { return batch_bool(false); } template - inline batch_bool isinf(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isinf(batch const& self, requires_arch) noexcept { return abs(self) == std::numeric_limits::infinity(); } template - inline batch_bool isinf(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isinf(batch const& self, requires_arch) noexcept { return abs(self) == std::numeric_limits::infinity(); } // isfinite template ::value, void>::type> - inline batch_bool isfinite(batch const&, requires_arch) noexcept + XSIMD_INLINE batch_bool isfinite(batch const&, requires_arch) noexcept { return batch_bool(true); } template - inline batch_bool isfinite(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isfinite(batch const& self, requires_arch) noexcept { return (self - self) == 0.f; } template - inline batch_bool isfinite(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isfinite(batch const& self, requires_arch) noexcept { return (self - self) == 0.; } // isnan template ::value, void>::type> - inline batch_bool isnan(batch const&, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const&, requires_arch) noexcept { return batch_bool(false); } // le template ::value, void>::type> - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return (self < other) || (self == other); } // neq template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return !(other == self); } // logical_and template - inline batch logical_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch logical_and(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept { return x && y; }, @@ -136,7 +136,7 @@ namespace xsimd // logical_or template - inline batch logical_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch logical_or(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept { return x || y; }, @@ -145,7 +145,7 @@ namespace xsimd // mask template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { alignas(A::alignment()) bool buffer[batch_bool::size]; self.store_aligned(buffer); diff --git a/include/xsimd/arch/generic/xsimd_generic_math.hpp b/include/xsimd/arch/generic/xsimd_generic_math.hpp index a7b8e2f90..f9e7f5782 100644 --- a/include/xsimd/arch/generic/xsimd_generic_math.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_math.hpp @@ -27,7 +27,7 @@ namespace xsimd using namespace types; // abs template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { if (std::is_unsigned::value) return self; @@ -40,7 +40,7 @@ namespace xsimd } template - inline batch abs(batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch abs(batch, A> const& z, requires_arch) noexcept { return hypot(z.real(), z.imag()); } @@ -49,13 +49,13 @@ namespace xsimd namespace detail { template - inline batch avg(batch const& x, batch const& y, std::true_type, std::false_type) noexcept + XSIMD_INLINE batch avg(batch const& x, batch const& y, std::true_type, std::false_type) noexcept { return (x & y) + ((x ^ y) >> 1); } template - inline batch avg(batch const& x, batch const& y, std::true_type, std::true_type) noexcept + XSIMD_INLINE batch avg(batch const& x, batch const& y, std::true_type, std::true_type) noexcept { // Inspired by // https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c @@ -66,14 +66,14 @@ namespace xsimd } template - inline batch avg(batch const& x, batch const& y, std::false_type, std::true_type) noexcept + XSIMD_INLINE batch avg(batch const& x, batch const& y, std::false_type, std::true_type) noexcept { return (x + y) / 2; } } template - inline batch avg(batch const& x, batch const& y, requires_arch) noexcept + XSIMD_INLINE batch avg(batch const& x, batch const& y, requires_arch) noexcept { return detail::avg(x, y, typename std::is_integral::type {}, typename std::is_signed::type {}); } @@ -82,7 +82,7 @@ namespace xsimd namespace detail { template - inline batch avgr(batch const& x, batch const& y, std::true_type) noexcept + XSIMD_INLINE batch avgr(batch const& x, batch const& y, std::true_type) noexcept { constexpr unsigned shift = 8 * sizeof(T) - 1; auto adj = std::is_signed::value ? ((x ^ y) & 0x1) : (((x ^ y) << shift) >> shift); @@ -90,21 +90,21 @@ namespace xsimd } template - inline batch avgr(batch const& x, batch const& y, std::false_type) noexcept + XSIMD_INLINE batch avgr(batch const& x, batch const& y, std::false_type) noexcept { return ::xsimd::kernel::avg(x, y, A {}); } } template - inline batch avgr(batch const& x, batch const& y, requires_arch) noexcept + XSIMD_INLINE batch avgr(batch const& x, batch const& y, requires_arch) noexcept { return detail::avgr(x, y, typename std::is_integral::type {}); } // batch_cast template - inline batch batch_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch batch_cast(batch const& self, batch const&, requires_arch) noexcept { return self; } @@ -112,12 +112,12 @@ namespace xsimd namespace detail { template - inline batch batch_cast(batch const& self, batch const& out, requires_arch, with_fast_conversion) noexcept + XSIMD_INLINE batch batch_cast(batch const& self, batch const& out, requires_arch, with_fast_conversion) noexcept { return fast_cast(self, out, A {}); } template - inline batch batch_cast(batch const& self, batch const&, requires_arch, with_slow_conversion) noexcept + XSIMD_INLINE batch batch_cast(batch const& self, batch const&, requires_arch, with_slow_conversion) noexcept { static_assert(!std::is_same::value, "there should be no conversion for this type combination"); using batch_type_in = batch; @@ -133,14 +133,14 @@ namespace xsimd } template - inline batch batch_cast(batch const& self, batch const& out, requires_arch) noexcept + XSIMD_INLINE batch batch_cast(batch const& self, batch const& out, requires_arch) noexcept { return detail::batch_cast(self, out, A {}, detail::conversion_type {}); } // bitofsign template - inline batch bitofsign(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitofsign(batch const& self, requires_arch) noexcept { static_assert(std::is_integral::value, "int type implementation"); if (std::is_unsigned::value) @@ -150,19 +150,19 @@ namespace xsimd } template - inline batch bitofsign(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitofsign(batch const& self, requires_arch) noexcept { return self & constants::signmask>(); } template - inline batch bitofsign(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitofsign(batch const& self, requires_arch) noexcept { return self & constants::signmask>(); } // bitwise_cast template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return self; } @@ -178,7 +178,7 @@ namespace xsimd * ==================================================== */ template - inline batch cbrt(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch cbrt(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type z = abs(self); @@ -225,7 +225,7 @@ namespace xsimd } template - inline batch cbrt(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch cbrt(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type z = abs(self); @@ -274,14 +274,14 @@ namespace xsimd // clip template - inline batch clip(batch const& self, batch const& lo, batch const& hi, requires_arch) noexcept + XSIMD_INLINE batch clip(batch const& self, batch const& lo, batch const& hi, requires_arch) noexcept { return min(hi, max(self, lo)); } // copysign template ::value, void>::type> - inline batch copysign(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch copysign(batch const& self, batch const& other, requires_arch) noexcept { return abs(self) | bitofsign(other); } @@ -308,7 +308,7 @@ namespace xsimd using batch_type = batch; // computes erf(a0)/a0 // x is sqr(a0) and 0 <= abs(a0) <= 2/3 - static inline batch_type erf1(const batch_type& x) noexcept + static XSIMD_INLINE batch_type erf1(const batch_type& x) noexcept { return detail::horner= 2/3 - static inline batch_type erfc2(const batch_type& x) noexcept + static XSIMD_INLINE batch_type erfc2(const batch_type& x) noexcept { return detail::horner(x); } - static inline batch_type erfc3(const batch_type& x) noexcept + static XSIMD_INLINE batch_type erfc3(const batch_type& x) noexcept { return (batch_type(1.) - x) * detail::horner; // computes erf(a0)/a0 // x is sqr(a0) and 0 <= abs(a0) <= 0.65 - static inline batch_type erf1(const batch_type& x) noexcept + static XSIMD_INLINE batch_type erf1(const batch_type& x) noexcept { return detail::horner= 6 rx = 1/x - static inline batch_type erfc4(const batch_type& x) noexcept + static XSIMD_INLINE batch_type erfc4(const batch_type& x) noexcept { return detail::horner - inline batch erf(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch erf(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -485,7 +485,7 @@ namespace xsimd } template - inline batch erf(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch erf(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -521,7 +521,7 @@ namespace xsimd // erfc template - inline batch erfc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch erfc(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -546,7 +546,7 @@ namespace xsimd } template - inline batch erfc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch erfc(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -590,54 +590,54 @@ namespace xsimd B x; template - inline B operator()(const Ts&... coefs) noexcept + XSIMD_INLINE B operator()(const Ts&... coefs) noexcept { return eval(coefs...); } private: - inline B eval(const B& c0) noexcept + XSIMD_INLINE B eval(const B& c0) noexcept { return c0; } - inline B eval(const B& c0, const B& c1) noexcept + XSIMD_INLINE B eval(const B& c0, const B& c1) noexcept { return fma(x, c1, c0); } template - inline B eval(::xsimd::detail::index_sequence, const Tuple& tuple) + XSIMD_INLINE B eval(::xsimd::detail::index_sequence, const Tuple& tuple) { return estrin { x * x }(std::get(tuple)...); } template - inline B eval(const std::tuple& tuple) noexcept + XSIMD_INLINE B eval(const std::tuple& tuple) noexcept { return eval(::xsimd::detail::make_index_sequence(), tuple); } template - inline B eval(const std::tuple& tuple, const B& c0) noexcept + XSIMD_INLINE B eval(const std::tuple& tuple, const B& c0) noexcept { return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0)))); } template - inline B eval(const std::tuple& tuple, const B& c0, const B& c1) noexcept + XSIMD_INLINE B eval(const std::tuple& tuple, const B& c0, const B& c1) noexcept { return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0, c1)))); } template - inline B eval(const std::tuple& tuple, const B& c0, const B& c1, const Ts&... coefs) noexcept + XSIMD_INLINE B eval(const std::tuple& tuple, const B& c0, const B& c1, const Ts&... coefs) noexcept { return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0, c1))), coefs...); } template - inline B eval(const B& c0, const B& c1, const Ts&... coefs) noexcept + XSIMD_INLINE B eval(const B& c0, const B& c1, const Ts&... coefs) noexcept { return eval(std::make_tuple(eval(c0, c1)), coefs...); } @@ -645,7 +645,7 @@ namespace xsimd } template - inline batch estrin(const batch& self) noexcept + XSIMD_INLINE batch estrin(const batch& self) noexcept { using batch_type = batch; return detail::estrin { self }(detail::coef()...); @@ -722,7 +722,7 @@ namespace xsimd struct exp_reduction : exp_reduction_base, exp_tag> { using batch_type = batch; - static inline batch_type approx(const batch_type& x) noexcept + static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept { batch_type y = detail::horner() * a); x = fnma(k, constants::log_2hi(), a); @@ -747,7 +747,7 @@ namespace xsimd struct exp_reduction : exp_reduction_base, exp10_tag> { using batch_type = batch; - static inline batch_type approx(const batch_type& x) noexcept + static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept { return ++(detail::horner() * a); x = fnma(k, constants::log10_2hi(), a); @@ -773,7 +773,7 @@ namespace xsimd struct exp_reduction : exp_reduction_base, exp2_tag> { using batch_type = batch; - static inline batch_type approx(const batch_type& x) noexcept + static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept { batch_type y = detail::horner()); } - static inline batch_type reduce(const batch_type& a, batch_type& x) noexcept + static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type& x) noexcept { batch_type k = nearbyint(a); x = (a - k); @@ -797,7 +797,7 @@ namespace xsimd struct exp_reduction : exp_reduction_base, exp_tag> { using batch_type = batch; - static inline batch_type approx(const batch_type& x) noexcept + static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept { batch_type t = x * x; return fnma(t, @@ -810,7 +810,7 @@ namespace xsimd x); } - static inline batch_type reduce(const batch_type& a, batch_type& hi, batch_type& lo, batch_type& x) noexcept + static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type& hi, batch_type& lo, batch_type& x) noexcept { batch_type k = nearbyint(constants::invlog_2() * a); hi = fnma(k, constants::log_2hi(), a); @@ -819,7 +819,7 @@ namespace xsimd return k; } - static inline batch_type finalize(const batch_type& x, const batch_type& c, const batch_type& hi, const batch_type& lo) noexcept + static XSIMD_INLINE batch_type finalize(const batch_type& x, const batch_type& c, const batch_type& hi, const batch_type& lo) noexcept { return batch_type(1.) - (((lo - (x * c) / (batch_type(2.) - c)) - hi)); } @@ -829,7 +829,7 @@ namespace xsimd struct exp_reduction : exp_reduction_base, exp10_tag> { using batch_type = batch; - static inline batch_type approx(const batch_type& x) noexcept + static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept { batch_type xx = x * x; batch_type px = x * detail::horner(xx); @@ -837,7 +837,7 @@ namespace xsimd return ++(x2 + x2); } - static inline batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept + static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept { batch_type k = nearbyint(constants::invlog10_2() * a); x = fnma(k, constants::log10_2hi(), a); @@ -845,7 +845,7 @@ namespace xsimd return k; } - static inline batch_type finalize(const batch_type&, const batch_type& c, const batch_type&, const batch_type&) noexcept + static XSIMD_INLINE batch_type finalize(const batch_type&, const batch_type& c, const batch_type&, const batch_type&) noexcept { return c; } @@ -855,7 +855,7 @@ namespace xsimd struct exp_reduction : exp_reduction_base, exp2_tag> { using batch_type = batch; - static inline batch_type approx(const batch_type& x) noexcept + static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept { batch_type t = x * x; return fnma(t, @@ -868,21 +868,21 @@ namespace xsimd x); } - static inline batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept + static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept { batch_type k = nearbyint(a); x = (a - k) * constants::log_2(); return k; } - static inline batch_type finalize(const batch_type& x, const batch_type& c, const batch_type&, const batch_type&) noexcept + static XSIMD_INLINE batch_type finalize(const batch_type& x, const batch_type& c, const batch_type&, const batch_type&) noexcept { return batch_type(1.) + x + x * c / (batch_type(2.) - c); } }; template - inline batch exp(batch const& self) noexcept + XSIMD_INLINE batch exp(batch const& self) noexcept { using batch_type = batch; using reducer_t = exp_reduction; @@ -895,7 +895,7 @@ namespace xsimd } template - inline batch exp(batch const& self) noexcept + XSIMD_INLINE batch exp(batch const& self) noexcept { using batch_type = batch; using reducer_t = exp_reduction; @@ -910,13 +910,13 @@ namespace xsimd } template - inline batch exp(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch exp(batch const& self, requires_arch) noexcept { return detail::exp(self); } template - inline batch, A> exp(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch, A> exp(batch, A> const& self, requires_arch) noexcept { using batch_type = batch, A>; auto isincos = sincos(self.imag()); @@ -925,14 +925,14 @@ namespace xsimd // exp10 template - inline batch exp10(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch exp10(batch const& self, requires_arch) noexcept { return detail::exp(self); } // exp2 template - inline batch exp2(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch exp2(batch const& self, requires_arch) noexcept { return detail::exp(self); } @@ -950,7 +950,7 @@ namespace xsimd * ==================================================== */ template - static inline batch expm1(const batch& a) noexcept + static XSIMD_INLINE batch expm1(const batch& a) noexcept { using batch_type = batch; batch_type k = nearbyint(constants::invlog_2() * a); @@ -974,7 +974,7 @@ namespace xsimd } template - static inline batch expm1(const batch& a) noexcept + static XSIMD_INLINE batch expm1(const batch& a) noexcept { using batch_type = batch; batch_type k = nearbyint(constants::invlog_2() * a); @@ -1005,7 +1005,7 @@ namespace xsimd } template - inline batch expm1(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch expm1(batch const& self, requires_arch) noexcept { using batch_type = batch; return select(self < constants::logeps(), @@ -1016,7 +1016,7 @@ namespace xsimd } template - inline batch, A> expm1(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> expm1(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -1029,7 +1029,7 @@ namespace xsimd // polar template - inline batch, A> polar(const batch& r, const batch& theta, requires_arch) noexcept + XSIMD_INLINE batch, A> polar(const batch& r, const batch& theta, requires_arch) noexcept { auto sincosTheta = sincos(theta); return { r * sincosTheta.second, r * sincosTheta.first }; @@ -1037,14 +1037,14 @@ namespace xsimd // fdim template - inline batch fdim(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch fdim(batch const& self, batch const& other, requires_arch) noexcept { return fmax(batch(0), self - other); } // fmod template - inline batch fmod(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch fmod(batch const& self, batch const& other, requires_arch) noexcept { return fnma(trunc(self / other), other, self); } @@ -1060,7 +1060,7 @@ namespace xsimd * ==================================================== */ template - inline batch frexp(const batch& self, batch, A>& exp, requires_arch) noexcept + XSIMD_INLINE batch frexp(const batch& self, batch, A>& exp, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1075,28 +1075,28 @@ namespace xsimd // from bool template - inline batch from_bool(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& self, requires_arch) noexcept { return batch(self.data) & batch(1); } // horner template - inline batch horner(const batch& self) noexcept + XSIMD_INLINE batch horner(const batch& self) noexcept { return detail::horner, Coefs...>(self); } // hypot template - inline batch hypot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch hypot(batch const& self, batch const& other, requires_arch) noexcept { return sqrt(fma(self, self, other * other)); } // ipow template - inline batch ipow(batch const& self, ITy other, requires_arch) noexcept + XSIMD_INLINE batch ipow(batch const& self, ITy other, requires_arch) noexcept { return ::xsimd::detail::ipow(self, other); } @@ -1112,7 +1112,7 @@ namespace xsimd * ==================================================== */ template - inline batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept + XSIMD_INLINE batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept { using batch_type = batch; using itype = as_integer_t; @@ -1123,7 +1123,7 @@ namespace xsimd // lgamma template - inline batch lgamma(batch const& self, requires_arch) noexcept; + XSIMD_INLINE batch lgamma(batch const& self, requires_arch) noexcept; namespace detail { @@ -1137,7 +1137,7 @@ namespace xsimd * ==================================================== */ template - static inline batch gammalnB(const batch& x) noexcept + static XSIMD_INLINE batch gammalnB(const batch& x) noexcept { return horner, 0x3ed87730, // 4.227843421859038E-001 @@ -1152,7 +1152,7 @@ namespace xsimd } template - static inline batch gammalnC(const batch& x) noexcept + static XSIMD_INLINE batch gammalnC(const batch& x) noexcept { return horner, 0xbf13c468, // -5.772156501719101E-001 @@ -1167,7 +1167,7 @@ namespace xsimd } template - static inline batch gammaln2(const batch& x) noexcept + static XSIMD_INLINE batch gammaln2(const batch& x) noexcept { return horner, 0x3daaaa94, // 8.333316229807355E-002f @@ -1177,7 +1177,7 @@ namespace xsimd } template - static inline batch gammaln1(const batch& x) noexcept + static XSIMD_INLINE batch gammaln1(const batch& x) noexcept { return horner, 0xc12a0c675418055eull, // -8.53555664245765465627E5 @@ -1199,7 +1199,7 @@ namespace xsimd } template - static inline batch gammalnA(const batch& x) noexcept + static XSIMD_INLINE batch gammalnA(const batch& x) noexcept { return horner, 0x3fb555555555554bull, // 8.33333333333331927722E-2 @@ -1226,7 +1226,7 @@ namespace xsimd struct lgamma_impl> { using batch_type = batch; - static inline batch_type compute(const batch_type& a) noexcept + static XSIMD_INLINE batch_type compute(const batch_type& a) noexcept { auto inf_result = (a <= batch_type(0.)) && is_flint(a); batch_type x = select(inf_result, constants::nan(), a); @@ -1248,7 +1248,7 @@ namespace xsimd } private: - static inline batch_type negative(const batch_type& q, const batch_type& w) noexcept + static XSIMD_INLINE batch_type negative(const batch_type& q, const batch_type& w) noexcept { batch_type p = floor(q); batch_type z = q - p; @@ -1258,7 +1258,7 @@ namespace xsimd return -log(constants::invpi() * abs(z)) - w; } - static inline batch_type other(const batch_type& x) noexcept + static XSIMD_INLINE batch_type other(const batch_type& x) noexcept { auto xlt650 = (x < batch_type(6.5)); batch_type r0x = x; @@ -1347,7 +1347,7 @@ namespace xsimd { using batch_type = batch; - static inline batch_type compute(const batch_type& a) noexcept + static XSIMD_INLINE batch_type compute(const batch_type& a) noexcept { auto inf_result = (a <= batch_type(0.)) && is_flint(a); batch_type x = select(inf_result, constants::nan(), a); @@ -1369,6 +1369,8 @@ namespace xsimd } private: + // FIXME: cannot mark this one as XSIMD_INLINE because there's a + // recursive loop on `lgamma'. static inline batch_type large_negative(const batch_type& q) noexcept { batch_type w = lgamma(q); @@ -1381,7 +1383,7 @@ namespace xsimd return constants::logpi() - log(z) - w; } - static inline batch_type other(const batch_type& xx) noexcept + static XSIMD_INLINE batch_type other(const batch_type& xx) noexcept { batch_type x = xx; auto test = (x < batch_type(13.)); @@ -1424,7 +1426,7 @@ namespace xsimd } template - inline batch lgamma(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch lgamma(batch const& self, requires_arch) noexcept { return detail::lgamma_impl>::compute(self); } @@ -1440,7 +1442,7 @@ namespace xsimd * ==================================================== */ template - inline batch log(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log(batch const& self, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1480,7 +1482,7 @@ namespace xsimd } template - inline batch log(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log(batch const& self, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1523,14 +1525,14 @@ namespace xsimd } template - inline batch, A> log(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> log(const batch, A>& z, requires_arch) noexcept { return batch, A>(log(abs(z)), atan2(z.imag(), z.real())); } // log2 template - inline batch log2(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log2(batch const& self, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1570,7 +1572,7 @@ namespace xsimd } template - inline batch log2(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log2(batch const& self, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1620,7 +1622,7 @@ namespace xsimd namespace detail { template - inline batch logN_complex_impl(const batch& z, typename batch::value_type base) noexcept + XSIMD_INLINE batch logN_complex_impl(const batch& z, typename batch::value_type base) noexcept { using batch_type = batch; using rv_type = typename batch_type::value_type; @@ -1629,7 +1631,7 @@ namespace xsimd } template - inline batch, A> log2(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch, A> log2(batch, A> const& self, requires_arch) noexcept { return detail::logN_complex_impl(self, std::log(2)); } @@ -1647,7 +1649,7 @@ namespace xsimd * ==================================================== */ template - inline batch log10(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log10(batch const& self, requires_arch) noexcept { using batch_type = batch; const batch_type @@ -1698,7 +1700,7 @@ namespace xsimd } template - inline batch log10(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log10(batch const& self, requires_arch) noexcept { using batch_type = batch; const batch_type @@ -1752,7 +1754,7 @@ namespace xsimd } template - inline batch, A> log10(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> log10(const batch, A>& z, requires_arch) noexcept { return detail::logN_complex_impl(z, std::log(10)); } @@ -1768,7 +1770,7 @@ namespace xsimd * ==================================================== */ template - inline batch log1p(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log1p(batch const& self, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1800,7 +1802,7 @@ namespace xsimd } template - inline batch log1p(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch log1p(batch const& self, requires_arch) noexcept { using batch_type = batch; using int_type = as_integer_t; @@ -1833,7 +1835,7 @@ namespace xsimd } template - inline batch, A> log1p(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch, A> log1p(batch, A> const& self, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -1848,7 +1850,7 @@ namespace xsimd // mod template ::value, void>::type> - inline batch mod(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mod(batch const& self, batch const& other, requires_arch) noexcept { return detail::apply([](T x, T y) noexcept -> T { return x % y; }, @@ -1857,14 +1859,14 @@ namespace xsimd // nearbyint template ::value, void>::type> - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return self; } namespace detail { template - inline batch nearbyintf(batch const& self) noexcept + XSIMD_INLINE batch nearbyintf(batch const& self) noexcept { using batch_type = batch; batch_type s = bitofsign(self); @@ -1884,26 +1886,26 @@ namespace xsimd } } template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return detail::nearbyintf(self); } template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return detail::nearbyintf(self); } // nearbyint_as_int template ::value, void>::type> - inline batch nearbyint_as_int(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, requires_arch) noexcept { return self; } // nearbyint_as_int template - inline batch, A> + XSIMD_INLINE batch, A> nearbyint_as_int(batch const& self, requires_arch) noexcept { using U = as_integer_t; @@ -1913,7 +1915,7 @@ namespace xsimd } template - inline batch, A> + XSIMD_INLINE batch, A> nearbyint_as_int(batch const& self, requires_arch) noexcept { using U = as_integer_t; @@ -1930,12 +1932,12 @@ namespace xsimd { using batch_type = batch; - static inline batch_type next(batch_type const& b) noexcept + static XSIMD_INLINE batch_type next(batch_type const& b) noexcept { return b; } - static inline batch_type prev(batch_type const& b) noexcept + static XSIMD_INLINE batch_type prev(batch_type const& b) noexcept { return b; } @@ -1963,13 +1965,13 @@ namespace xsimd using int_batch = typename bitwise_cast_batch::type; using int_type = typename int_batch::value_type; - static inline batch_type next(const batch_type& b) noexcept + static XSIMD_INLINE batch_type next(const batch_type& b) noexcept { batch_type n = ::xsimd::bitwise_cast(::xsimd::bitwise_cast(b) + int_type(1)); return select(b == constants::infinity(), b, n); } - static inline batch_type prev(const batch_type& b) noexcept + static XSIMD_INLINE batch_type prev(const batch_type& b) noexcept { batch_type p = ::xsimd::bitwise_cast(::xsimd::bitwise_cast(b) - int_type(1)); return select(b == constants::minusinfinity(), b, p); @@ -1977,7 +1979,7 @@ namespace xsimd }; } template - inline batch nextafter(batch const& from, batch const& to, requires_arch) noexcept + XSIMD_INLINE batch nextafter(batch const& from, batch const& to, requires_arch) noexcept { using kernel = detail::nextafter_kernel; return select(from == to, from, @@ -1995,7 +1997,7 @@ namespace xsimd * ==================================================== */ template - inline batch pow(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch pow(batch const& self, batch const& other, requires_arch) noexcept { using batch_type = batch; const auto zero = batch_type(0.); @@ -2010,7 +2012,7 @@ namespace xsimd } template - inline batch, A> pow(const batch, A>& a, const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> pow(const batch, A>& a, const batch, A>& z, requires_arch) noexcept { using cplx_batch = batch, A>; using real_batch = typename cplx_batch::real_batch; @@ -2029,8 +2031,8 @@ namespace xsimd // reciprocal template ::value, void>::type> - inline batch reciprocal(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch reciprocal(batch const& self, + requires_arch) noexcept { using batch_type = batch; return div(batch_type(1), self); @@ -2038,7 +2040,7 @@ namespace xsimd // reduce_add template - inline std::complex reduce_add(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE std::complex reduce_add(batch, A> const& self, requires_arch) noexcept { return { reduce_add(self.real()), reduce_add(self.imag()) }; } @@ -2055,13 +2057,13 @@ namespace xsimd }; template - inline T reduce(Op, batch const& self, std::integral_constant) noexcept + XSIMD_INLINE T reduce(Op, batch const& self, std::integral_constant) noexcept { return self.get(0); } template - inline T reduce(Op op, batch const& self, std::integral_constant) noexcept + XSIMD_INLINE T reduce(Op op, batch const& self, std::integral_constant) noexcept { using index_type = as_unsigned_integer_t; batch split = swizzle(self, make_batch_constant>()); @@ -2071,7 +2073,7 @@ namespace xsimd // reduce_max template - inline T reduce_max(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_max(batch const& self, requires_arch) noexcept { return detail::reduce([](batch const& x, batch const& y) { return max(x, y); }, @@ -2080,7 +2082,7 @@ namespace xsimd // reduce_min template - inline T reduce_min(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_min(batch const& self, requires_arch) noexcept { return detail::reduce([](batch const& x, batch const& y) { return min(x, y); }, @@ -2089,17 +2091,17 @@ namespace xsimd // remainder template - inline batch remainder(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch remainder(batch const& self, batch const& other, requires_arch) noexcept { return fnma(nearbyint(self / other), other, self); } template - inline batch remainder(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch remainder(batch const& self, batch const& other, requires_arch) noexcept { return fnma(nearbyint(self / other), other, self); } template ::value, void>::type> - inline batch remainder(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch remainder(batch const& self, batch const& other, requires_arch) noexcept { auto mod = self % other; return select(mod <= other / 2, mod, mod - other); @@ -2107,14 +2109,14 @@ namespace xsimd // select template - inline batch, A> select(batch_bool const& cond, batch, A> const& true_br, batch, A> const& false_br, requires_arch) noexcept + XSIMD_INLINE batch, A> select(batch_bool const& cond, batch, A> const& true_br, batch, A> const& false_br, requires_arch) noexcept { return { select(cond, true_br.real(), false_br.real()), select(cond, true_br.imag(), false_br.imag()) }; } // sign template ::value, void>::type> - inline batch sign(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch sign(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type res = select(self > batch_type(0), batch_type(1), batch_type(0)) - select(self < batch_type(0), batch_type(1), batch_type(0)); @@ -2124,7 +2126,7 @@ namespace xsimd namespace detail { template - inline batch signf(batch const& self) noexcept + XSIMD_INLINE batch signf(batch const& self) noexcept { using batch_type = batch; batch_type res = select(self > batch_type(0.f), batch_type(1.f), batch_type(0.f)) - select(self < batch_type(0.f), batch_type(1.f), batch_type(0.f)); @@ -2137,17 +2139,17 @@ namespace xsimd } template - inline batch sign(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch sign(batch const& self, requires_arch) noexcept { return detail::signf(self); } template - inline batch sign(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch sign(batch const& self, requires_arch) noexcept { return detail::signf(self); } template - inline batch, A> sign(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> sign(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -2160,7 +2162,7 @@ namespace xsimd // signnz template ::value, void>::type> - inline batch signnz(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch signnz(batch const& self, requires_arch) noexcept { using batch_type = batch; return (self >> (sizeof(T) * 8 - 1)) | batch_type(1.); @@ -2169,7 +2171,7 @@ namespace xsimd namespace detail { template - inline batch signnzf(batch const& self) noexcept + XSIMD_INLINE batch signnzf(batch const& self) noexcept { using batch_type = batch; #ifndef XSIMD_NO_NANS @@ -2181,19 +2183,19 @@ namespace xsimd } template - inline batch signnz(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch signnz(batch const& self, requires_arch) noexcept { return detail::signnzf(self); } template - inline batch signnz(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch signnz(batch const& self, requires_arch) noexcept { return detail::signnzf(self); } // sqrt template - inline batch, A> sqrt(batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> sqrt(batch, A> const& z, requires_arch) noexcept { constexpr T csqrt_scale_factor = std::is_same::value ? 6.7108864e7f : 1.8014398509481984e16; @@ -2248,7 +2250,7 @@ namespace xsimd struct stirling_kernel> { using batch_type = batch; - static inline batch_type compute(const batch_type& x) noexcept + static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept { return horner(x); } - static inline batch_type split_limit() noexcept + static XSIMD_INLINE batch_type split_limit() noexcept { return batch_type(bit_cast(uint32_t(0x41d628f6))); } - static inline batch_type large_limit() noexcept + static XSIMD_INLINE batch_type large_limit() noexcept { return batch_type(bit_cast(uint32_t(0x420c28f3))); } @@ -2272,7 +2274,7 @@ namespace xsimd struct stirling_kernel> { using batch_type = batch; - static inline batch_type compute(const batch_type& x) noexcept + static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept { return horner(x); } - static inline batch_type split_limit() noexcept + static XSIMD_INLINE batch_type split_limit() noexcept { return batch_type(bit_cast(uint64_t(0x4061e083ba3443d4))); } - static inline batch_type large_limit() noexcept + static XSIMD_INLINE batch_type large_limit() noexcept { return batch_type(bit_cast(uint64_t(0x4065800000000000))); } @@ -2304,7 +2306,7 @@ namespace xsimd * ==================================================== */ template - inline batch stirling(const batch& a) noexcept + XSIMD_INLINE batch stirling(const batch& a) noexcept { using batch_type = batch; const batch_type stirlingsplitlim = stirling_kernel::split_limit(); @@ -2342,7 +2344,7 @@ namespace xsimd struct tgamma_kernel> { using batch_type = batch; - static inline batch_type compute(const batch_type& x) noexcept + static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept { return horner> { using batch_type = batch; - static inline batch_type compute(const batch_type& x) noexcept + static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept { return horner - inline B tgamma_large_negative(const B& a) noexcept + XSIMD_INLINE B tgamma_large_negative(const B& a) noexcept { B st = stirling(a); B p = floor(a); @@ -2409,7 +2411,7 @@ namespace xsimd } template - inline B tgamma_other(const B& a, const BB& test) noexcept + XSIMD_INLINE B tgamma_other(const B& a, const BB& test) noexcept { B x = select(test, B(2.), a); #ifndef XSIMD_NO_INFINITIES @@ -2448,7 +2450,7 @@ namespace xsimd } template - inline batch tgamma(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch tgamma(batch const& self, requires_arch) noexcept { using batch_type = batch; auto nan_result = (self < batch_type(0.) && is_flint(self)); diff --git a/include/xsimd/arch/generic/xsimd_generic_memory.hpp b/include/xsimd/arch/generic/xsimd_generic_memory.hpp index f92f6b48b..4651ecdbb 100644 --- a/include/xsimd/arch/generic/xsimd_generic_memory.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_memory.hpp @@ -36,7 +36,7 @@ namespace xsimd namespace detail { template - inline batch create_compress_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence) + XSIMD_INLINE batch create_compress_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence) { batch swizzle_mask(IT(0)); alignas(A::alignment()) IT mask_buffer[batch::size] = { Is... }; @@ -49,7 +49,7 @@ namespace xsimd } template - inline batch + XSIMD_INLINE batch compress(batch const& x, batch_bool const& mask, kernel::requires_arch) noexcept { @@ -65,7 +65,7 @@ namespace xsimd namespace detail { template - inline batch create_expand_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence) + XSIMD_INLINE batch create_expand_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence) { batch swizzle_mask(IT(0)); IT j = 0; @@ -75,7 +75,7 @@ namespace xsimd } template - inline batch + XSIMD_INLINE batch expand(batch const& x, batch_bool const& mask, kernel::requires_arch) noexcept { @@ -88,7 +88,7 @@ namespace xsimd // extract_pair template - inline batch extract_pair(batch const& self, batch const& other, std::size_t i, requires_arch) noexcept + XSIMD_INLINE batch extract_pair(batch const& self, batch const& other, std::size_t i, requires_arch) noexcept { constexpr std::size_t size = batch::size; assert(i < size && "index in bounds"); @@ -115,6 +115,7 @@ namespace xsimd // gather namespace detail { + // Not using XSIMD_INLINE here as it makes msvc hand got ever on avx512 template ::type = 0> inline batch gather(U const* src, batch const& index, ::xsimd::index I) noexcept @@ -134,7 +135,7 @@ namespace xsimd } // namespace detail template - inline batch + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, kernel::requires_arch) noexcept { @@ -146,7 +147,7 @@ namespace xsimd // Gather with runtime indexes and mismatched strides. template - inline detail::sizes_mismatch_t> + XSIMD_INLINE detail::sizes_mismatch_t> gather(batch const&, U const* src, batch const& index, kernel::requires_arch) noexcept { @@ -158,7 +159,7 @@ namespace xsimd // Gather with runtime indexes and matching strides. template - inline detail::stride_match_t> + XSIMD_INLINE detail::stride_match_t> gather(batch const&, U const* src, batch const& index, kernel::requires_arch) noexcept { @@ -170,7 +171,7 @@ namespace xsimd // insert template - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { struct index_mask { @@ -185,7 +186,7 @@ namespace xsimd // get template - inline T get(batch const& self, ::xsimd::index, requires_arch) noexcept + XSIMD_INLINE T get(batch const& self, ::xsimd::index, requires_arch) noexcept { alignas(A::alignment()) T buffer[batch::size]; self.store_aligned(&buffer[0]); @@ -193,7 +194,7 @@ namespace xsimd } template - inline T get(batch_bool const& self, ::xsimd::index, requires_arch) noexcept + XSIMD_INLINE T get(batch_bool const& self, ::xsimd::index, requires_arch) noexcept { alignas(A::alignment()) T buffer[batch_bool::size]; self.store_aligned(&buffer[0]); @@ -201,7 +202,7 @@ namespace xsimd } template - inline auto get(batch, A> const& self, ::xsimd::index, requires_arch) noexcept -> typename batch, A>::value_type + XSIMD_INLINE auto get(batch, A> const& self, ::xsimd::index, requires_arch) noexcept -> typename batch, A>::value_type { alignas(A::alignment()) T buffer[batch, A>::size]; self.store_aligned(&buffer[0]); @@ -209,7 +210,7 @@ namespace xsimd } template - inline T get(batch const& self, std::size_t i, requires_arch) noexcept + XSIMD_INLINE T get(batch const& self, std::size_t i, requires_arch) noexcept { alignas(A::alignment()) T buffer[batch::size]; self.store_aligned(&buffer[0]); @@ -217,7 +218,7 @@ namespace xsimd } template - inline T get(batch_bool const& self, std::size_t i, requires_arch) noexcept + XSIMD_INLINE T get(batch_bool const& self, std::size_t i, requires_arch) noexcept { alignas(A::alignment()) bool buffer[batch_bool::size]; self.store_aligned(&buffer[0]); @@ -225,7 +226,7 @@ namespace xsimd } template - inline auto get(batch, A> const& self, std::size_t i, requires_arch) noexcept -> typename batch, A>::value_type + XSIMD_INLINE auto get(batch, A> const& self, std::size_t i, requires_arch) noexcept -> typename batch, A>::value_type { using T2 = typename batch, A>::value_type; alignas(A::alignment()) T2 buffer[batch, A>::size]; @@ -237,14 +238,14 @@ namespace xsimd namespace detail { template - inline batch load_aligned(T_in const* mem, convert, requires_arch, with_fast_conversion) noexcept + XSIMD_INLINE batch load_aligned(T_in const* mem, convert, requires_arch, with_fast_conversion) noexcept { using batch_type_in = batch; using batch_type_out = batch; return fast_cast(batch_type_in::load_aligned(mem), batch_type_out(), A {}); } template - inline batch load_aligned(T_in const* mem, convert, requires_arch, with_slow_conversion) noexcept + XSIMD_INLINE batch load_aligned(T_in const* mem, convert, requires_arch, with_slow_conversion) noexcept { static_assert(!std::is_same::value, "there should be a direct load for this type combination"); using batch_type_out = batch; @@ -254,7 +255,7 @@ namespace xsimd } } template - inline batch load_aligned(T_in const* mem, convert cvt, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T_in const* mem, convert cvt, requires_arch) noexcept { return detail::load_aligned(mem, cvt, A {}, detail::conversion_type {}); } @@ -263,7 +264,7 @@ namespace xsimd namespace detail { template - inline batch load_unaligned(T_in const* mem, convert, requires_arch, with_fast_conversion) noexcept + XSIMD_INLINE batch load_unaligned(T_in const* mem, convert, requires_arch, with_fast_conversion) noexcept { using batch_type_in = batch; using batch_type_out = batch; @@ -271,21 +272,21 @@ namespace xsimd } template - inline batch load_unaligned(T_in const* mem, convert cvt, requires_arch, with_slow_conversion) noexcept + XSIMD_INLINE batch load_unaligned(T_in const* mem, convert cvt, requires_arch, with_slow_conversion) noexcept { static_assert(!std::is_same::value, "there should be a direct load for this type combination"); return load_aligned(mem, cvt, generic {}, with_slow_conversion {}); } } template - inline batch load_unaligned(T_in const* mem, convert cvt, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T_in const* mem, convert cvt, requires_arch) noexcept { return detail::load_unaligned(mem, cvt, generic {}, detail::conversion_type {}); } // rotate_left template - inline batch rotate_left(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_left(batch const& self, requires_arch) noexcept { struct rotate_generator { @@ -299,14 +300,14 @@ namespace xsimd } template - inline batch, A> rotate_left(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch, A> rotate_left(batch, A> const& self, requires_arch) noexcept { return { rotate_left(self.real()), rotate_left(self.imag()) }; } // rotate_right template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { struct rotate_generator { @@ -320,7 +321,7 @@ namespace xsimd } template - inline batch, A> rotate_right(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch, A> rotate_right(batch, A> const& self, requires_arch) noexcept { return { rotate_right(self.real()), rotate_right(self.imag()) }; } @@ -329,15 +330,15 @@ namespace xsimd namespace detail { template ::type = 0> - inline void scatter(batch const& src, U* dst, - batch const& index, - ::xsimd::index I) noexcept + XSIMD_INLINE void scatter(batch const& src, U* dst, + batch const& index, + ::xsimd::index I) noexcept { dst[index.get(I)] = static_cast(src.get(I)); } template ::type = 0> - inline void + XSIMD_INLINE void scatter(batch const& src, U* dst, batch const& index, ::xsimd::index I) noexcept { @@ -350,7 +351,7 @@ namespace xsimd } // namespace detail template - inline void + XSIMD_INLINE void scatter(batch const& src, T* dst, batch const& index, kernel::requires_arch) noexcept @@ -362,7 +363,7 @@ namespace xsimd } template - inline detail::sizes_mismatch_t + XSIMD_INLINE detail::sizes_mismatch_t scatter(batch const& src, U* dst, batch const& index, kernel::requires_arch) noexcept @@ -374,7 +375,7 @@ namespace xsimd } template - inline detail::stride_match_t + XSIMD_INLINE detail::stride_match_t scatter(batch const& src, U* dst, batch const& index, kernel::requires_arch) noexcept @@ -455,7 +456,7 @@ namespace xsimd } template - inline batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept { constexpr size_t bsize = sizeof...(Indices); @@ -512,7 +513,7 @@ namespace xsimd // store template - inline void store(batch_bool const& self, bool* mem, requires_arch) noexcept + XSIMD_INLINE void store(batch_bool const& self, bool* mem, requires_arch) noexcept { using batch_type = batch; constexpr auto size = batch_bool::size; @@ -524,7 +525,7 @@ namespace xsimd // store_aligned template - inline void store_aligned(T_out* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T_out* mem, batch const& self, requires_arch) noexcept { static_assert(!std::is_same::value, "there should be a direct store for this type combination"); alignas(A::alignment()) T_in buffer[batch::size]; @@ -534,7 +535,7 @@ namespace xsimd // store_unaligned template - inline void store_unaligned(T_out* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T_out* mem, batch const& self, requires_arch) noexcept { static_assert(!std::is_same::value, "there should be a direct store for this type combination"); return store_aligned(mem, self, generic {}); @@ -542,13 +543,13 @@ namespace xsimd // swizzle template - inline batch, A> swizzle(batch, A> const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& self, batch_constant mask, requires_arch) noexcept { return { swizzle(self.real(), mask), swizzle(self.imag(), mask) }; } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { constexpr size_t size = batch::size; alignas(A::alignment()) T self_buffer[size]; @@ -564,7 +565,7 @@ namespace xsimd } template - inline batch, A> swizzle(batch, A> const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& self, batch mask, requires_arch) noexcept { return { swizzle(self.real(), mask), swizzle(self.imag(), mask) }; } @@ -573,26 +574,26 @@ namespace xsimd namespace detail { template - inline batch, A> load_complex(batch const& /*hi*/, batch const& /*lo*/, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& /*hi*/, batch const& /*lo*/, requires_arch) noexcept { static_assert(std::is_same::value, "load_complex not implemented for the required architecture"); } template - inline batch complex_high(batch, A> const& /*src*/, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& /*src*/, requires_arch) noexcept { static_assert(std::is_same::value, "complex_high not implemented for the required architecture"); } template - inline batch complex_low(batch, A> const& /*src*/, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& /*src*/, requires_arch) noexcept { static_assert(std::is_same::value, "complex_low not implemented for the required architecture"); } } template - inline batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept { using real_batch = batch; T_in const* buffer = reinterpret_cast(mem); @@ -603,7 +604,7 @@ namespace xsimd // load_complex_unaligned template - inline batch, A> load_complex_unaligned(std::complex const* mem, convert>, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_unaligned(std::complex const* mem, convert>, requires_arch) noexcept { using real_batch = batch; T_in const* buffer = reinterpret_cast(mem); @@ -614,7 +615,7 @@ namespace xsimd // store_complex_aligned template - inline void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { using real_batch = batch; real_batch hi = detail::complex_high(src, A {}); @@ -626,7 +627,7 @@ namespace xsimd // store_compelx_unaligned template - inline void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { using real_batch = batch; real_batch hi = detail::complex_high(src, A {}); diff --git a/include/xsimd/arch/generic/xsimd_generic_rounding.hpp b/include/xsimd/arch/generic/xsimd_generic_rounding.hpp index b6a79a451..daf7b58ea 100644 --- a/include/xsimd/arch/generic/xsimd_generic_rounding.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_rounding.hpp @@ -24,7 +24,7 @@ namespace xsimd // ceil template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { batch truncated_self = trunc(self); return select(truncated_self < self, truncated_self + 1, truncated_self); @@ -32,7 +32,7 @@ namespace xsimd // floor template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { batch truncated_self = trunc(self); return select(truncated_self > self, truncated_self - 1, truncated_self); @@ -40,7 +40,7 @@ namespace xsimd // round template - inline batch round(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch round(batch const& self, requires_arch) noexcept { auto v = abs(self); auto c = ceil(v); @@ -50,17 +50,17 @@ namespace xsimd // trunc template ::value, void>::type> - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return self; } template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return select(abs(self) < constants::maxflint>(), to_float(to_int(self)), self); } template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return select(abs(self) < constants::maxflint>(), to_float(to_int(self)), self); } diff --git a/include/xsimd/arch/generic/xsimd_generic_trigo.hpp b/include/xsimd/arch/generic/xsimd_generic_trigo.hpp index 2568a7253..b1bb68f25 100644 --- a/include/xsimd/arch/generic/xsimd_generic_trigo.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_trigo.hpp @@ -35,7 +35,7 @@ namespace xsimd // acos template - inline batch acos(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch acos(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -47,7 +47,7 @@ namespace xsimd return select(x_larger_05, x, constants::pio2() - x); } template - inline batch, A> acos(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> acos(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -66,7 +66,7 @@ namespace xsimd * ==================================================== */ template - inline batch acosh(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch acosh(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = self - batch_type(1.); @@ -76,7 +76,7 @@ namespace xsimd return select(test, l1pz + constants::log_2(), l1pz); } template - inline batch, A> acosh(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> acosh(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; batch_type w = acos(z); @@ -86,7 +86,7 @@ namespace xsimd // asin template - inline batch asin(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch asin(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -105,7 +105,7 @@ namespace xsimd return z ^ sign; } template - inline batch asin(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch asin(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -127,7 +127,7 @@ namespace xsimd ^ bitofsign(self)); } template - inline batch, A> asin(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> asin(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -159,32 +159,32 @@ namespace xsimd namespace detail { template ::value, void>::type> - inline batch + XSIMD_INLINE batch average(const batch& x1, const batch& x2) noexcept { return (x1 & x2) + ((x1 ^ x2) >> 1); } template - inline batch + XSIMD_INLINE batch averagef(const batch& x1, const batch& x2) noexcept { using batch_type = batch; return fma(x1, batch_type(0.5), x2 * batch_type(0.5)); } template - inline batch average(batch const& x1, batch const& x2) noexcept + XSIMD_INLINE batch average(batch const& x1, batch const& x2) noexcept { return averagef(x1, x2); } template - inline batch average(batch const& x1, batch const& x2) noexcept + XSIMD_INLINE batch average(batch const& x1, batch const& x2) noexcept { return averagef(x1, x2); } } template - inline batch asinh(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch asinh(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -212,7 +212,7 @@ namespace xsimd #endif } template - inline batch asinh(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch asinh(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -226,7 +226,7 @@ namespace xsimd return bitofsign(self) ^ z; } template - inline batch, A> asinh(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> asinh(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; batch_type w = asin(batch_type(-z.imag(), z.real())); @@ -238,7 +238,7 @@ namespace xsimd namespace detail { template - static inline batch kernel_atan(const batch& x, const batch& recx) noexcept + static XSIMD_INLINE batch kernel_atan(const batch& x, const batch& recx) noexcept { using batch_type = batch; const auto flag1 = x < constants::tan3pio8(); @@ -259,7 +259,7 @@ namespace xsimd return yy + z1; } template - static inline batch kernel_atan(const batch& x, const batch& recx) noexcept + static XSIMD_INLINE batch kernel_atan(const batch& x, const batch& recx) noexcept { using batch_type = batch; const auto flag1 = x < constants::tan3pio8(); @@ -288,7 +288,7 @@ namespace xsimd } } template - inline batch atan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch atan(batch const& self, requires_arch) noexcept { using batch_type = batch; const batch_type absa = abs(self); @@ -296,7 +296,7 @@ namespace xsimd return x ^ bitofsign(self); } template - inline batch, A> atan(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> atan(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -327,7 +327,7 @@ namespace xsimd * ==================================================== */ template - inline batch atanh(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch atanh(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -338,7 +338,7 @@ namespace xsimd return bitofsign(self) ^ (batch_type(0.5) * log1p(select(test, fma(t, tmp, t), tmp))); } template - inline batch, A> atanh(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> atanh(const batch, A>& z, requires_arch) noexcept { using batch_type = batch, A>; batch_type w = atan(batch_type(-z.imag(), z.real())); @@ -348,7 +348,7 @@ namespace xsimd // atan2 template - inline batch atan2(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch atan2(batch const& self, batch const& other, requires_arch) noexcept { using batch_type = batch; const batch_type q = abs(self / other); @@ -360,19 +360,19 @@ namespace xsimd namespace detail { template - inline batch quadrant(const batch& x) noexcept + XSIMD_INLINE batch quadrant(const batch& x) noexcept { return x & batch(3); } template - inline batch quadrant(const batch& x) noexcept + XSIMD_INLINE batch quadrant(const batch& x) noexcept { return to_float(quadrant(to_int(x))); } template - inline batch quadrant(const batch& x) noexcept + XSIMD_INLINE batch quadrant(const batch& x) noexcept { using batch_type = batch; batch_type a = x * batch_type(0.25); @@ -389,7 +389,7 @@ namespace xsimd */ template - inline batch cos_eval(const batch& z) noexcept + XSIMD_INLINE batch cos_eval(const batch& z) noexcept { using batch_type = batch; batch_type y = detail::horner - inline batch sin_eval(const batch& z, const batch& x) noexcept + XSIMD_INLINE batch sin_eval(const batch& z, const batch& x) noexcept { using batch_type = batch; batch_type y = detail::horner - static inline batch base_tancot_eval(const batch& z) noexcept + static XSIMD_INLINE batch base_tancot_eval(const batch& z) noexcept { using batch_type = batch; batch_type zz = z * z; @@ -426,7 +426,7 @@ namespace xsimd } template - static inline batch tan_eval(const batch& z, const BB& test) noexcept + static XSIMD_INLINE batch tan_eval(const batch& z, const BB& test) noexcept { using batch_type = batch; batch_type y = base_tancot_eval(z); @@ -434,7 +434,7 @@ namespace xsimd } template - static inline batch cot_eval(const batch& z, const BB& test) noexcept + static XSIMD_INLINE batch cot_eval(const batch& z, const BB& test) noexcept { using batch_type = batch; batch_type y = base_tancot_eval(z); @@ -451,7 +451,7 @@ namespace xsimd * ==================================================== */ template - static inline batch cos_eval(const batch& z) noexcept + static XSIMD_INLINE batch cos_eval(const batch& z) noexcept { using batch_type = batch; batch_type y = detail::horner - static inline batch sin_eval(const batch& z, const batch& x) noexcept + static XSIMD_INLINE batch sin_eval(const batch& z, const batch& x) noexcept { using batch_type = batch; batch_type y = detail::horner - static inline batch base_tancot_eval(const batch& z) noexcept + static XSIMD_INLINE batch base_tancot_eval(const batch& z) noexcept { using batch_type = batch; batch_type zz = z * z; @@ -497,7 +497,7 @@ namespace xsimd } template - static inline batch tan_eval(const batch& z, const BB& test) noexcept + static XSIMD_INLINE batch tan_eval(const batch& z, const BB& test) noexcept { using batch_type = batch; batch_type y = base_tancot_eval(z); @@ -505,7 +505,7 @@ namespace xsimd } template - static inline batch cot_eval(const batch& z, const BB& test) noexcept + static XSIMD_INLINE batch cot_eval(const batch& z, const BB& test) noexcept { using batch_type = batch; batch_type y = base_tancot_eval(z); @@ -531,7 +531,7 @@ namespace xsimd template struct trigo_reducer { - static inline B reduce(const B& x, B& xr) noexcept + static XSIMD_INLINE B reduce(const B& x, B& xr) noexcept { if (all(x <= constants::pio4())) { @@ -606,7 +606,7 @@ namespace xsimd template struct trigo_reducer { - static inline B reduce(const B& x, B& xr) noexcept + static XSIMD_INLINE B reduce(const B& x, B& xr) noexcept { B xi = nearbyint(x * B(2.)); B x2 = x - xi * B(0.5); @@ -617,7 +617,7 @@ namespace xsimd } template - inline batch cos(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch cos(batch const& self, requires_arch) noexcept { using batch_type = batch; const batch_type x = abs(self); @@ -634,7 +634,7 @@ namespace xsimd } template - inline batch, A> cos(batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> cos(batch, A> const& z, requires_arch) noexcept { return { cos(z.real()) * cosh(z.imag()), -sin(z.real()) * sinh(z.imag()) }; } @@ -652,7 +652,7 @@ namespace xsimd */ template - inline batch cosh(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch cosh(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type x = abs(self); @@ -663,7 +663,7 @@ namespace xsimd return select(test1, tmp1 * tmp, detail::average(tmp, batch_type(1.) / tmp)); } template - inline batch, A> cosh(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> cosh(const batch, A>& z, requires_arch) noexcept { auto x = z.real(); auto y = z.imag(); @@ -674,7 +674,7 @@ namespace xsimd namespace detail { template - inline batch sin(batch const& self, Tag = Tag()) noexcept + XSIMD_INLINE batch sin(batch const& self, Tag = Tag()) noexcept { using batch_type = batch; const batch_type x = abs(self); @@ -692,20 +692,20 @@ namespace xsimd } template - inline batch sin(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch sin(batch const& self, requires_arch) noexcept { return detail::sin(self); } template - inline batch, A> sin(batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> sin(batch, A> const& z, requires_arch) noexcept { return { sin(z.real()) * cosh(z.imag()), cos(z.real()) * sinh(z.imag()) }; } // sincos template - inline std::pair, batch> sincos(batch const& self, requires_arch) noexcept + XSIMD_INLINE std::pair, batch> sincos(batch const& self, requires_arch) noexcept { using batch_type = batch; const batch_type x = abs(self); @@ -724,7 +724,7 @@ namespace xsimd } template - inline std::pair, A>, batch, A>> + XSIMD_INLINE std::pair, A>, batch, A>> sincos(batch, A> const& z, requires_arch) noexcept { using batch_type = batch, A>; @@ -749,7 +749,7 @@ namespace xsimd * ==================================================== */ template - inline batch sinh_kernel(batch const& self) noexcept + XSIMD_INLINE batch sinh_kernel(batch const& self) noexcept { using batch_type = batch; batch_type sqr_self = self * self; @@ -763,7 +763,7 @@ namespace xsimd } template - inline batch sinh_kernel(batch const& self) noexcept + XSIMD_INLINE batch sinh_kernel(batch const& self) noexcept { using batch_type = batch; batch_type sqrself = self * self; @@ -792,7 +792,7 @@ namespace xsimd * ==================================================== */ template - inline batch sinh(batch const& a, requires_arch) noexcept + XSIMD_INLINE batch sinh(batch const& a, requires_arch) noexcept { using batch_type = batch; batch_type half(0.5); @@ -814,7 +814,7 @@ namespace xsimd return select(lt1, z, r) ^ bts; } template - inline batch, A> sinh(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> sinh(const batch, A>& z, requires_arch) noexcept { auto x = z.real(); auto y = z.imag(); @@ -823,7 +823,7 @@ namespace xsimd // tan template - inline batch tan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch tan(batch const& self, requires_arch) noexcept { using batch_type = batch; const batch_type x = abs(self); @@ -836,7 +836,7 @@ namespace xsimd return y ^ bitofsign(self); } template - inline batch, A> tan(batch, A> const& z, requires_arch) noexcept + XSIMD_INLINE batch, A> tan(batch, A> const& z, requires_arch) noexcept { using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; @@ -867,7 +867,7 @@ namespace xsimd struct tanh_kernel> { using batch_type = batch; - static inline batch_type tanh(const batch_type& x) noexcept + static XSIMD_INLINE batch_type tanh(const batch_type& x) noexcept { batch_type sqrx = x * x; return fma(detail::horner> { using batch_type = batch; - static inline batch_type tanh(const batch_type& x) noexcept + static XSIMD_INLINE batch_type tanh(const batch_type& x) noexcept { batch_type sqrx = x * x; return fma(sqrx * p(sqrx) / q(sqrx), x, x); } - static inline batch_type cotanh(const batch_type& x) noexcept + static XSIMD_INLINE batch_type cotanh(const batch_type& x) noexcept { batch_type sqrx = x * x; batch_type qval = q(sqrx); return qval / (x * fma(p(sqrx), sqrx, qval)); } - static inline batch_type p(const batch_type& x) noexcept + static XSIMD_INLINE batch_type p(const batch_type& x) noexcept { return detail::horner(x); } - static inline batch_type q(const batch_type& x) noexcept + static XSIMD_INLINE batch_type q(const batch_type& x) noexcept { return detail::horner1 - inline batch tanh(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch tanh(batch const& self, requires_arch) noexcept { using batch_type = batch; batch_type one(1.); @@ -952,7 +952,7 @@ namespace xsimd return select(test, z, r) ^ bts; } template - inline batch, A> tanh(const batch, A>& z, requires_arch) noexcept + XSIMD_INLINE batch, A> tanh(const batch, A>& z, requires_arch) noexcept { using real_batch = typename batch, A>::real_batch; auto x = z.real(); diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index 66bcb4502..f41702bab 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -27,39 +27,39 @@ namespace xsimd // fwd template - inline batch insert(batch const& self, T val, index, requires_arch) noexcept; + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept; namespace detail { - inline void split_avx(__m256i val, __m128i& low, __m128i& high) noexcept + XSIMD_INLINE void split_avx(__m256i val, __m128i& low, __m128i& high) noexcept { low = _mm256_castsi256_si128(val); high = _mm256_extractf128_si256(val, 1); } - inline void split_avx(__m256 val, __m128& low, __m128& high) noexcept + XSIMD_INLINE void split_avx(__m256 val, __m128& low, __m128& high) noexcept { low = _mm256_castps256_ps128(val); high = _mm256_extractf128_ps(val, 1); } - inline void split_avx(__m256d val, __m128d& low, __m128d& high) noexcept + XSIMD_INLINE void split_avx(__m256d val, __m128d& low, __m128d& high) noexcept { low = _mm256_castpd256_pd128(val); high = _mm256_extractf128_pd(val, 1); } - inline __m256i merge_sse(__m128i low, __m128i high) noexcept + XSIMD_INLINE __m256i merge_sse(__m128i low, __m128i high) noexcept { return _mm256_insertf128_si256(_mm256_castsi128_si256(low), high, 1); } - inline __m256 merge_sse(__m128 low, __m128 high) noexcept + XSIMD_INLINE __m256 merge_sse(__m128 low, __m128 high) noexcept { return _mm256_insertf128_ps(_mm256_castps128_ps256(low), high, 1); } - inline __m256d merge_sse(__m128d low, __m128d high) noexcept + XSIMD_INLINE __m256d merge_sse(__m128d low, __m128d high) noexcept { return _mm256_insertf128_pd(_mm256_castpd128_pd256(low), high, 1); } template - inline __m256i fwd_to_sse(F f, __m256i self) noexcept + XSIMD_INLINE __m256i fwd_to_sse(F f, __m256i self) noexcept { __m128i self_low, self_high; split_avx(self, self_low, self_high); @@ -68,7 +68,7 @@ namespace xsimd return merge_sse(res_low, res_high); } template - inline __m256i fwd_to_sse(F f, __m256i self, __m256i other) noexcept + XSIMD_INLINE __m256i fwd_to_sse(F f, __m256i self, __m256i other) noexcept { __m128i self_low, self_high, other_low, other_high; split_avx(self, self_low, self_high); @@ -78,7 +78,7 @@ namespace xsimd return merge_sse(res_low, res_high); } template - inline __m256i fwd_to_sse(F f, __m256i self, int32_t other) noexcept + XSIMD_INLINE __m256i fwd_to_sse(F f, __m256i self, int32_t other) noexcept { __m128i self_low, self_high; split_avx(self, self_low, self_high); @@ -90,13 +90,13 @@ namespace xsimd // abs template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { __m256 sign_mask = _mm256_set1_ps(-0.f); // -0.f = 1 << 31 return _mm256_andnot_ps(sign_mask, self); } template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { __m256d sign_mask = _mm256_set1_pd(-0.f); // -0.f = 1 << 31 return _mm256_andnot_pd(sign_mask, self); @@ -104,96 +104,96 @@ namespace xsimd // add template ::value, void>::type> - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return add(batch(s), batch(o)); }, self, other); } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_add_ps(self, other); } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_add_pd(self, other); } // all template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return _mm256_testc_ps(self, batch_bool(true)) != 0; } template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return _mm256_testc_pd(self, batch_bool(true)) != 0; } template ::value, void>::type> - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return _mm256_testc_si256(self, batch_bool(true)) != 0; } // any template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return !_mm256_testz_ps(self, self); } template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return !_mm256_testz_pd(self, self); } template ::value, void>::type> - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return !_mm256_testz_si256(self, self); } // batch_bool_cast template - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept { return { bitwise_cast(batch(self.data)).data }; } // bitwise_and template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_and_ps(self, other); } template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_and_pd(self, other); } template - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_and_ps(self, other); } template - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_and_pd(self, other); } template ::value, void>::type> - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_and(batch(s), batch(o)); }, self, other); } template ::value, void>::type> - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_and(batch(s), batch(o)); }, @@ -202,36 +202,36 @@ namespace xsimd // bitwise_andnot template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_andnot_ps(other, self); } template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_andnot_pd(other, self); } template - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_andnot_ps(other, self); } template - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_andnot_pd(other, self); } template ::value, void>::type> - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_andnot(batch(s), batch(o)); }, self, other); } template ::value, void>::type> - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_andnot(batch(s), batch(o)); }, @@ -240,7 +240,7 @@ namespace xsimd // bitwise_lshift template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, int32_t o) noexcept { return bitwise_lshift(batch(s), o, sse4_2 {}); }, @@ -249,14 +249,14 @@ namespace xsimd // bitwise_not template ::value, void>::type> - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s) noexcept { return bitwise_not(batch(s), sse4_2 {}); }, self); } template ::value, void>::type> - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s) noexcept { return bitwise_not(batch_bool(s), sse4_2 {}); }, @@ -265,34 +265,34 @@ namespace xsimd // bitwise_or template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_or_ps(self, other); } template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_or_pd(self, other); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_or_ps(self, other); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_or_pd(self, other); } template ::value, void>::type> - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_or(batch(s), batch(o)); }, self, other); } template ::value, void>::type> - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_or(batch_bool(s), batch_bool(o)); }, @@ -301,7 +301,7 @@ namespace xsimd // bitwise_rshift template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, int32_t o) noexcept { return bitwise_rshift(batch(s), o, sse4_2 {}); }, @@ -310,34 +310,34 @@ namespace xsimd // bitwise_xor template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_xor_ps(self, other); } template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_xor_pd(self, other); } template - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_xor_ps(self, other); } template - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_xor_pd(self, other); } template ::value, void>::type> - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_xor(batch(s), batch(o), sse4_2 {}); }, self, other); } template ::value, void>::type> - inline batch bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return bitwise_xor(batch_bool(s), batch_bool(o), sse4_2 {}); }, @@ -346,66 +346,66 @@ namespace xsimd // bitwise_cast template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_castsi256_ps(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_castsi256_pd(self); } template ::type>::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return batch(self.data); } template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_castps_pd(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_castps_si256(self); } template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_castpd_ps(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_castpd_si256(self); } // bitwise_not template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm256_xor_ps(self, _mm256_castsi256_ps(_mm256_set1_epi32(-1))); } template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm256_xor_pd(self, _mm256_castsi256_pd(_mm256_set1_epi32(-1))); } template - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return _mm256_xor_ps(self, _mm256_castsi256_ps(_mm256_set1_epi32(-1))); } template - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return _mm256_xor_pd(self, _mm256_castsi256_pd(_mm256_set1_epi32(-1))); } // broadcast template ::value, void>::type> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -430,24 +430,24 @@ namespace xsimd } } template - inline batch broadcast(float val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(float val, requires_arch) noexcept { return _mm256_set1_ps(val); } template - inline batch broadcast(double val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(double val, requires_arch) noexcept { return _mm256_set1_pd(val); } // ceil template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return _mm256_ceil_ps(self); } template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return _mm256_ceil_pd(self); } @@ -457,7 +457,7 @@ namespace xsimd // On clang, _mm256_extractf128_ps is built upon build_shufflevector // which require index parameter to be a constant template - inline B get_half_complex_f(const B& real, const B& imag) noexcept + XSIMD_INLINE B get_half_complex_f(const B& real, const B& imag) noexcept { __m128 tmp0 = _mm256_extractf128_ps(real, index); __m128 tmp1 = _mm256_extractf128_ps(imag, index); @@ -469,7 +469,7 @@ namespace xsimd return res; } template - inline B get_half_complex_d(const B& real, const B& imag) noexcept + XSIMD_INLINE B get_half_complex_d(const B& real, const B& imag) noexcept { __m128d tmp0 = _mm256_extractf128_pd(real, index); __m128d tmp1 = _mm256_extractf128_pd(imag, index); @@ -483,24 +483,24 @@ namespace xsimd // complex_low template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { return get_half_complex_f<0>(self.real(), self.imag()); } template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { return get_half_complex_d<0>(self.real(), self.imag()); } // complex_high template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { return get_half_complex_f<1>(self.real(), self.imag()); } template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { return get_half_complex_d<1>(self.real(), self.imag()); } @@ -510,13 +510,13 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_cvtepi32_ps(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm256_cvttps_epi32(self); } @@ -524,46 +524,46 @@ namespace xsimd // decr_if template ::value, void>::type> - inline batch decr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch decr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept { return self + batch(mask.data); } // div template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_div_ps(self, other); } template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_div_pd(self, other); } // eq template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_ps(self, other, _CMP_EQ_OQ); } template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_pd(self, other, _CMP_EQ_OQ); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return ~(self != other); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return ~(self != other); } template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return eq(batch(s), batch(o), sse4_2 {}); }, @@ -571,26 +571,26 @@ namespace xsimd } template ::value, void>::type> - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return ~(self != other); } // floor template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return _mm256_floor_ps(self); } template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return _mm256_floor_pd(self); } // from_mask template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint64_t lut32[] = { 0x0000000000000000ul, @@ -602,7 +602,7 @@ namespace xsimd return _mm256_castsi256_ps(_mm256_setr_epi64x(lut32[mask & 0x3], lut32[(mask >> 2) & 0x3], lut32[(mask >> 4) & 0x3], lut32[mask >> 6])); } template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint64_t lut64[][4] = { { 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul }, @@ -626,7 +626,7 @@ namespace xsimd return _mm256_castsi256_pd(_mm256_load_si256((const __m256i*)lut64[mask])); } template ::value, void>::type> - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint32_t lut32[] = { 0x00000000, @@ -689,7 +689,7 @@ namespace xsimd // haddp template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { // row = (a,b,c,d,e,f,g,h) // tmp0 = (a0+a1, a2+a3, b0+b1, b2+b3, a4+a5, a6+a7, b4+b5, b6+b7) @@ -715,7 +715,7 @@ namespace xsimd return _mm256_add_ps(tmp0, tmp1); } template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { // row = (a,b,c,d) // tmp0 = (a0+a1, b0+b1, a2+a3, b2+b3) @@ -731,14 +731,14 @@ namespace xsimd // incr_if template ::value, void>::type> - inline batch incr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch incr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept { return self - batch(mask.data); } // insert template ::value, void>::type> - inline batch insert(batch const& self, T val, index pos, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index pos, requires_arch) noexcept { #if !defined(_MSC_VER) || _MSC_VER > 1900 XSIMD_IF_CONSTEXPR(sizeof(T) == 1) @@ -763,41 +763,41 @@ namespace xsimd // isnan template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return _mm256_cmp_ps(self, self, _CMP_UNORD_Q); } template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return _mm256_cmp_pd(self, self, _CMP_UNORD_Q); } // le template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_ps(self, other, _CMP_LE_OQ); } template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_pd(self, other, _CMP_LE_OQ); } // load_aligned template ::value, void>::type> - inline batch load_aligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept { return _mm256_load_si256((__m256i const*)mem); } template - inline batch load_aligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(float const* mem, convert, requires_arch) noexcept { return _mm256_load_ps(mem); } template - inline batch load_aligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(double const* mem, convert, requires_arch) noexcept { return _mm256_load_pd(mem); } @@ -806,7 +806,7 @@ namespace xsimd { // load_complex template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { using batch_type = batch; __m128 tmp0 = _mm256_extractf128_ps(hi, 0); @@ -825,7 +825,7 @@ namespace xsimd return { real, imag }; } template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { using batch_type = batch; __m128d tmp0 = _mm256_extractf128_pd(hi, 0); @@ -845,35 +845,35 @@ namespace xsimd // load_unaligned template ::value, void>::type> - inline batch load_unaligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept { return _mm256_loadu_si256((__m256i const*)mem); } template - inline batch load_unaligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(float const* mem, convert, requires_arch) noexcept { return _mm256_loadu_ps(mem); } template - inline batch load_unaligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(double const* mem, convert, requires_arch) noexcept { return _mm256_loadu_pd(mem); } // lt template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_ps(self, other, _CMP_LT_OQ); } template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_pd(self, other, _CMP_LT_OQ); } template ::value, void>::type> - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return lt(batch(s), batch(o)); }, @@ -882,7 +882,7 @@ namespace xsimd // mask template ::value, void>::type> - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1 || sizeof(T) == 2) { @@ -905,86 +905,86 @@ namespace xsimd } } template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return _mm256_movemask_ps(self); } template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return _mm256_movemask_pd(self); } // max template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_max_ps(self, other); } template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_max_pd(self, other); } template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return select(self > other, self, other); } // min template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_min_ps(self, other); } template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_min_pd(self, other); } template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return select(self <= other, self, other); } // mul template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_mul_ps(self, other); } template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_mul_pd(self, other); } // nearbyint template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return _mm256_round_ps(self, _MM_FROUND_TO_NEAREST_INT); } template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return _mm256_round_pd(self, _MM_FROUND_TO_NEAREST_INT); } // nearbyint_as_int template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { return _mm256_cvtps_epi32(self); } // neg template ::value, void>::type> - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return 0 - self; } @@ -994,55 +994,55 @@ namespace xsimd return _mm256_xor_ps(self, _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); } template - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return _mm256_xor_pd(self, _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000))); } // neq template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_ps(self, other, _CMP_NEQ_UQ); } template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_cmp_pd(self, other, _CMP_NEQ_UQ); } template ::value, void>::type> - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return ~(self == other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_xor_ps(self, other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_xor_pd(self, other); } template ::value, void>::type> - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(self.data), _mm256_castsi256_ps(other.data))); } // reciprocal template - inline batch reciprocal(batch const& self, - kernel::requires_arch) noexcept + XSIMD_INLINE batch reciprocal(batch const& self, + kernel::requires_arch) noexcept { return _mm256_rcp_ps(self); } // reduce_add template - inline float reduce_add(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& rhs, requires_arch) noexcept { // Warning about _mm256_hadd_ps: // _mm256_hadd_ps(a,b) gives @@ -1060,7 +1060,7 @@ namespace xsimd return _mm_cvtss_f32(_mm256_extractf128_ps(tmp, 0)); } template - inline double reduce_add(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE double reduce_add(batch const& rhs, requires_arch) noexcept { // rhs = (x0, x1, x2, x3) // tmp = (x2, x3, x0, x1) @@ -1072,7 +1072,7 @@ namespace xsimd return _mm_cvtsd_f64(_mm256_extractf128_pd(tmp, 0)); } template ::value, void>::type> - inline T reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept { __m128i low, high; detail::split_avx(self, low, high); @@ -1082,7 +1082,7 @@ namespace xsimd // reduce_max template ::type> - inline T reduce_max(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_max(batch const& self, requires_arch) noexcept { constexpr auto mask = detail::shuffle(1, 0); batch step = _mm256_permute2f128_si256(self, self, mask); @@ -1093,7 +1093,7 @@ namespace xsimd // reduce_min template ::type> - inline T reduce_min(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_min(batch const& self, requires_arch) noexcept { constexpr auto mask = detail::shuffle(1, 0); batch step = _mm256_permute2f128_si256(self, self, mask); @@ -1104,19 +1104,19 @@ namespace xsimd // rsqrt template - inline batch rsqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept { return _mm256_rsqrt_ps(val); } template - inline batch rsqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept { return _mm256_cvtps_pd(_mm_rsqrt_ps(_mm256_cvtpd_ps(val))); } // sadd template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1135,17 +1135,17 @@ namespace xsimd // select template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm256_blendv_ps(false_br, true_br, cond); } template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm256_blendv_pd(false_br, true_br, cond); } template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { __m128i cond_low, cond_hi; detail::split_avx(cond, cond_low, cond_hi); @@ -1161,20 +1161,20 @@ namespace xsimd return detail::merge_sse(res_low, res_hi); } template ::value, void>::type> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { Values... }, true_br, false_br, avx2 {}); } template - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { constexpr auto mask = batch_bool_constant::mask(); return _mm256_blend_ps(false_br, true_br, mask); } template - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { constexpr auto mask = batch_bool_constant::mask(); return _mm256_blend_pd(false_br, true_br, mask); @@ -1182,55 +1182,55 @@ namespace xsimd // set template - inline batch set(batch const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch::size, "consistent init"); return _mm256_setr_ps(values...); } template - inline batch set(batch const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch::size, "consistent init"); return _mm256_setr_pd(values...); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3) noexcept { return _mm256_set_epi64x(v3, v2, v1, v0); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept { return _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept { return _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, - T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept { return _mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31); } template ::value, void>::type> - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { return set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data; } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); return _mm256_castsi256_ps(set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data); } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); return _mm256_castsi256_pd(set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data); @@ -1238,7 +1238,7 @@ namespace xsimd // shuffle template - inline batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept { constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3); // shuffle within lane @@ -1253,7 +1253,7 @@ namespace xsimd } template - inline batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept { constexpr uint32_t smask = (I0 & 0x1) | ((I1 & 0x1) << 1) | ((I2 & 0x1) << 2) | ((I3 & 0x1) << 3); // shuffle within lane @@ -1269,7 +1269,7 @@ namespace xsimd // slide_left template - inline batch slide_left(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept { constexpr unsigned BitCount = N * 8; if (BitCount == 0) @@ -1310,7 +1310,7 @@ namespace xsimd // slide_right template - inline batch slide_right(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept { constexpr unsigned BitCount = N * 8; if (BitCount == 0) @@ -1350,19 +1350,19 @@ namespace xsimd // sqrt template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return _mm256_sqrt_ps(val); } template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return _mm256_sqrt_pd(val); } // ssub template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1377,70 +1377,70 @@ namespace xsimd // store_aligned template ::value, void>::type> - inline void store_aligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch const& self, requires_arch) noexcept { return _mm256_store_si256((__m256i*)mem, self); } template ::value, void>::type> - inline void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept { return _mm256_store_si256((__m256i*)mem, self); } template - inline void store_aligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(float* mem, batch const& self, requires_arch) noexcept { return _mm256_store_ps(mem, self); } template - inline void store_aligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(double* mem, batch const& self, requires_arch) noexcept { return _mm256_store_pd(mem, self); } // store_unaligned template ::value, void>::type> - inline void store_unaligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch const& self, requires_arch) noexcept { return _mm256_storeu_si256((__m256i*)mem, self); } template ::value, void>::type> - inline void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept { return _mm256_storeu_si256((__m256i*)mem, self); } template - inline void store_unaligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(float* mem, batch const& self, requires_arch) noexcept { return _mm256_storeu_ps(mem, self); } template - inline void store_unaligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(double* mem, batch const& self, requires_arch) noexcept { return _mm256_storeu_pd(mem, self); } // sub template ::value, void>::type> - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept { return sub(batch(s), batch(o)); }, self, other); } template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_sub_ps(self, other); } template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_sub_pd(self, other); } // swizzle (dynamic mask) template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { // duplicate low and high part of input __m256 hi = _mm256_castps128_ps256(_mm256_extractf128_ps(self, 1)); @@ -1464,7 +1464,7 @@ namespace xsimd } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { // duplicate low and high part of input __m256d hi = _mm256_castpd128_pd256(_mm256_extractf128_pd(self, 1)); @@ -1488,14 +1488,14 @@ namespace xsimd } template = 0> - inline batch swizzle(batch const& self, batch const& mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch const& mask, requires_arch) noexcept { return bitwise_cast( swizzle(bitwise_cast(self), mask)); } template = 0> - inline batch + XSIMD_INLINE batch swizzle(batch const& self, batch const& mask, requires_arch) noexcept { return bitwise_cast( @@ -1504,7 +1504,7 @@ namespace xsimd // swizzle (constant mask) template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { // duplicate low and high part of input __m256 hi = _mm256_castps128_ps256(_mm256_extractf128_ps(self, 1)); @@ -1529,7 +1529,7 @@ namespace xsimd } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { // duplicate low and high part of input __m256d hi = _mm256_castpd128_pd256(_mm256_extractf128_pd(self, 1)); @@ -1563,17 +1563,17 @@ namespace xsimd uint32_t V6, uint32_t V7, detail::enable_sized_integral_t = 0> - inline batch swizzle(batch const& self, - batch_constant const& mask, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant const& mask, + requires_arch) noexcept { return bitwise_cast( swizzle(bitwise_cast(self), mask)); @@ -1586,7 +1586,7 @@ namespace xsimd uint64_t V2, uint64_t V3, detail::enable_sized_integral_t = 0> - inline batch + XSIMD_INLINE batch swizzle(batch const& self, batch_constant const& mask, requires_arch) noexcept @@ -1597,19 +1597,19 @@ namespace xsimd // trunc template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return _mm256_round_ps(self, _MM_FROUND_TO_ZERO); } template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return _mm256_round_pd(self, _MM_FROUND_TO_ZERO); } // zip_hi template ::value, void>::type> - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1 || sizeof(T) == 2) { @@ -1656,14 +1656,14 @@ namespace xsimd } } template - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm256_unpacklo_ps(self, other); auto hi = _mm256_unpackhi_ps(self, other); return _mm256_permute2f128_ps(lo, hi, 0x31); } template - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm256_unpacklo_pd(self, other); auto hi = _mm256_unpackhi_pd(self, other); @@ -1672,7 +1672,7 @@ namespace xsimd // zip_lo template ::value, void>::type> - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1 || sizeof(T) == 2) { @@ -1720,14 +1720,14 @@ namespace xsimd } template - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm256_unpacklo_ps(self, other); auto hi = _mm256_unpackhi_ps(self, other); return _mm256_insertf128_ps(lo, _mm256_castps256_ps128(hi), 1); } template - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm256_unpacklo_pd(self, other); auto hi = _mm256_unpackhi_pd(self, other); diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index c052e0f26..a7b421d8e 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -26,7 +26,7 @@ namespace xsimd // abs template ::value, void>::type> - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { if (std::is_signed::value) { @@ -52,7 +52,7 @@ namespace xsimd // add template ::value, void>::type> - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -78,7 +78,7 @@ namespace xsimd // avgr template ::value, void>::type> - inline batch avgr(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -96,7 +96,7 @@ namespace xsimd // avg template ::value, void>::type> - inline batch avg(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -116,43 +116,43 @@ namespace xsimd // bitwise_and template ::value, void>::type> - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_and_si256(self, other); } template ::value, void>::type> - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_and_si256(self, other); } // bitwise_andnot template ::value, void>::type> - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_andnot_si256(other, self); } template ::value, void>::type> - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_andnot_si256(other, self); } // bitwise_not template ::value, void>::type> - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm256_xor_si256(self, _mm256_set1_epi32(-1)); } template ::value, void>::type> - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return _mm256_xor_si256(self, _mm256_set1_epi32(-1)); } // bitwise_lshift template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 2) { @@ -173,7 +173,7 @@ namespace xsimd } template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 4) { @@ -191,19 +191,19 @@ namespace xsimd // bitwise_or template ::value, void>::type> - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_or_si256(self, other); } template ::value, void>::type> - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_or_si256(self, other); } // bitwise_rshift template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept { if (std::is_signed::value) { @@ -253,7 +253,7 @@ namespace xsimd } template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -285,19 +285,19 @@ namespace xsimd // bitwise_xor template ::value, void>::type> - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm256_xor_si256(self, other); } template ::value, void>::type> - inline batch bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm256_xor_si256(self, other); } // complex_low template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { __m256d tmp0 = _mm256_permute4x64_pd(self.real(), _MM_SHUFFLE(3, 1, 1, 0)); __m256d tmp1 = _mm256_permute4x64_pd(self.imag(), _MM_SHUFFLE(1, 2, 0, 0)); @@ -306,7 +306,7 @@ namespace xsimd // complex_high template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { __m256d tmp0 = _mm256_permute4x64_pd(self.real(), _MM_SHUFFLE(3, 3, 1, 2)); __m256d tmp1 = _mm256_permute4x64_pd(self.imag(), _MM_SHUFFLE(3, 2, 2, 0)); @@ -318,7 +318,7 @@ namespace xsimd { template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx // adapted to avx @@ -332,7 +332,7 @@ namespace xsimd } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx // adapted to avx @@ -349,7 +349,7 @@ namespace xsimd // eq template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -375,16 +375,16 @@ namespace xsimd // gather template = 0, detail::enable_sized_integral_t = 0> - inline batch gather(batch const&, T const* src, batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, + kernel::requires_arch) noexcept { // scatter for this one is AVX512F+AVX512VL return _mm256_i32gather_epi32(reinterpret_cast(src), index, sizeof(T)); } template = 0, detail::enable_sized_integral_t = 0> - inline batch gather(batch const&, T const* src, batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, + kernel::requires_arch) noexcept { // scatter for this one is AVX512F+AVX512VL return _mm256_i64gather_epi64(reinterpret_cast(src), index, sizeof(T)); @@ -392,18 +392,18 @@ namespace xsimd template = 0> - inline batch gather(batch const&, float const* src, - batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, float const* src, + batch const& index, + kernel::requires_arch) noexcept { // scatter for this one is AVX512F+AVX512VL return _mm256_i32gather_ps(src, index, sizeof(float)); } template = 0> - inline batch gather(batch const&, double const* src, - batch const& index, - requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, double const* src, + batch const& index, + requires_arch) noexcept { // scatter for this one is AVX512F+AVX512VL return _mm256_i64gather_pd(src, index, sizeof(double)); @@ -411,9 +411,9 @@ namespace xsimd // gather: handmade conversions template = 0> - inline batch gather(batch const&, double const* src, - batch const& index, - requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, double const* src, + batch const& index, + requires_arch) noexcept { const batch low(_mm256_i32gather_pd(src, _mm256_castsi256_si128(index.data), sizeof(double))); const batch high(_mm256_i32gather_pd(src, _mm256_extractf128_si256(index.data, 1), sizeof(double))); @@ -421,9 +421,9 @@ namespace xsimd } template = 0> - inline batch gather(batch const&, double const* src, - batch const& index, - requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, double const* src, + batch const& index, + requires_arch) noexcept { const batch low(_mm256_i32gather_pd(src, _mm256_castsi256_si128(index.data), sizeof(double))); const batch high(_mm256_i32gather_pd(src, _mm256_extractf128_si256(index.data, 1), sizeof(double))); @@ -432,7 +432,7 @@ namespace xsimd // lt template ::value, void>::type> - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -465,7 +465,7 @@ namespace xsimd // load_complex template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { using batch_type = batch; batch_type real = _mm256_castpd_ps( @@ -479,7 +479,7 @@ namespace xsimd return { real, imag }; } template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { using batch_type = batch; batch_type real = _mm256_permute4x64_pd(_mm256_unpacklo_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0)); @@ -488,7 +488,7 @@ namespace xsimd } // mask template ::value, void>::type> - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -507,7 +507,7 @@ namespace xsimd // max template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -551,7 +551,7 @@ namespace xsimd // min template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -595,7 +595,7 @@ namespace xsimd // mul template ::value, void>::type> - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -623,7 +623,7 @@ namespace xsimd // reduce_add template ::value, void>::type> - inline T reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 4) { @@ -657,19 +657,19 @@ namespace xsimd // rotate_right template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { return _mm256_alignr_epi8(self, self, N); } template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { return bitwise_cast(rotate_right(bitwise_cast(self), avx2 {})); } // sadd template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -705,7 +705,7 @@ namespace xsimd // select template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -729,7 +729,7 @@ namespace xsimd } } template ::value, void>::type> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { constexpr int mask = batch_bool_constant::mask(); // FIXME: for some reason mask here is not considered as an immediate, @@ -752,7 +752,7 @@ namespace xsimd // slide_left template - inline batch slide_left(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept { constexpr unsigned BitCount = N * 8; if (BitCount == 0) @@ -783,7 +783,7 @@ namespace xsimd // slide_right template - inline batch slide_right(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept { constexpr unsigned BitCount = N * 8; if (BitCount == 0) @@ -814,7 +814,7 @@ namespace xsimd // ssub template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -850,7 +850,7 @@ namespace xsimd // sub template ::value, void>::type> - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -876,13 +876,13 @@ namespace xsimd // swizzle (dynamic mask) template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm256_permutevar8x32_ps(self, mask); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { batch broadcaster = { 0, 1, 0, 1, 0, 1, 0, 1 }; constexpr uint64_t comb = 0x0000000100000001ul * 2; @@ -890,65 +890,65 @@ namespace xsimd } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx2 {})); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx2 {})); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm256_permutevar8x32_epi32(self, mask); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx2 {})); } // swizzle (constant mask) template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return _mm256_permutevar8x32_ps(self, mask.as_batch()); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr auto mask = detail::shuffle(V0, V1, V2, V3); return _mm256_permute4x64_pd(self, mask); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr auto mask = detail::shuffle(V0, V1, V2, V3); return _mm256_permute4x64_epi64(self, mask); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx2 {})); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return _mm256_permutevar8x32_epi32(self, mask.as_batch()); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx2 {})); } // zip_hi template ::value, void>::type> - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -983,7 +983,7 @@ namespace xsimd // zip_lo template ::value, void>::type> - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { diff --git a/include/xsimd/arch/xsimd_avx512bw.hpp b/include/xsimd/arch/xsimd_avx512bw.hpp index db72b73ca..8b381986c 100644 --- a/include/xsimd/arch/xsimd_avx512bw.hpp +++ b/include/xsimd/arch/xsimd_avx512bw.hpp @@ -27,7 +27,7 @@ namespace xsimd namespace detail { template - inline batch_bool compare_int_avx512bw(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool compare_int_avx512bw(batch const& self, batch const& other) noexcept { using register_type = typename batch_bool::register_type; if (std::is_signed::value) @@ -73,7 +73,7 @@ namespace xsimd // abs template ::value, void>::type> - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { if (std::is_unsigned::value) { @@ -96,7 +96,7 @@ namespace xsimd // add template ::value, void>::type> - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -114,7 +114,7 @@ namespace xsimd // avgr template ::value, void>::type> - inline batch avgr(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -132,7 +132,7 @@ namespace xsimd // avg template ::value, void>::type> - inline batch avg(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -152,7 +152,7 @@ namespace xsimd // bitwise_lshift template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { #if defined(XSIMD_AVX512_SHIFT_INTRINSICS_IMM_ONLY) XSIMD_IF_CONSTEXPR(sizeof(T) == 2) @@ -172,7 +172,7 @@ namespace xsimd // bitwise_rshift template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept { if (std::is_signed::value) { @@ -226,42 +226,42 @@ namespace xsimd // eq template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512bw(self, other); } // ge template ::value, void>::type> - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512bw(self, other); } // gt template ::value, void>::type> - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512bw(self, other); } // le template ::value, void>::type> - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512bw(self, other); } // lt template ::value, void>::type> - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512bw(self, other); } // max template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -297,7 +297,7 @@ namespace xsimd // min template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -333,7 +333,7 @@ namespace xsimd // mul template ::value, void>::type> - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -353,26 +353,26 @@ namespace xsimd // neq template ::value, void>::type> - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512bw(self, other); } // rotate_right template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { return _mm512_alignr_epi8(self, self, N); } template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { return bitwise_cast(rotate_right(bitwise_cast(self), avx2 {})); } // sadd template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -408,7 +408,7 @@ namespace xsimd // select template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -446,7 +446,7 @@ namespace xsimd } template - inline batch slide_left(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept { constexpr unsigned BitCount = N * 8; if (BitCount == 0) @@ -505,7 +505,7 @@ namespace xsimd } } template - inline batch slide_right(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept { constexpr unsigned BitCount = N * 8; if (BitCount == 0) @@ -538,7 +538,7 @@ namespace xsimd // ssub template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -574,7 +574,7 @@ namespace xsimd // sub template ::value, void>::type> - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -592,57 +592,57 @@ namespace xsimd // swizzle (dynamic version) template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm512_permutexvar_epi16(mask, self); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512bw {})); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm512_shuffle_epi8(self, mask); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512bw {})); } // swizzle (static version) template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512bw {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512bw {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512bw {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512bw {}); } // zip_hi template ::value, void>::type> - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { __m512i lo, hi; XSIMD_IF_CONSTEXPR(sizeof(T) == 1) @@ -670,7 +670,7 @@ namespace xsimd // zip_lo template ::value, void>::type> - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { __m512i lo, hi; XSIMD_IF_CONSTEXPR(sizeof(T) == 1) diff --git a/include/xsimd/arch/xsimd_avx512dq.hpp b/include/xsimd/arch/xsimd_avx512dq.hpp index 7840ea8fc..4788d19e9 100644 --- a/include/xsimd/arch/xsimd_avx512dq.hpp +++ b/include/xsimd/arch/xsimd_avx512dq.hpp @@ -23,54 +23,54 @@ namespace xsimd // bitwise_and template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_and_ps(self, other); } template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_and_pd(self, other); } // bitwise_andnot template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_andnot_ps(other, self); } template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_andnot_pd(other, self); } // bitwise_not template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_xor_ps(self, _mm512_castsi512_ps(_mm512_set1_epi32(-1))); } template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_xor_pd(self, _mm512_castsi512_pd(_mm512_set1_epi32(-1))); } // bitwise_or template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_or_ps(self, other); } template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_or_pd(self, other); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self.data | other.data); @@ -78,19 +78,19 @@ namespace xsimd // bitwise_xor template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_xor_ps(self, other); } template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_xor_pd(self, other); } // haddp template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { // The following folds over the vector once: // tmp1 = [a0..8, b0..8] @@ -152,35 +152,35 @@ namespace xsimd // ldexp template - inline batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept + XSIMD_INLINE batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept { return _mm512_scalef_pd(self, _mm512_cvtepi64_pd(other)); } // mul template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_mullo_epi64(self, other); } template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_mullo_epi64(self, other); } // nearbyint_as_int template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { return _mm512_cvtpd_epi64(self); } // reduce_add template - inline float reduce_add(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& rhs, requires_arch) noexcept { __m256 tmp1 = _mm512_extractf32x8_ps(rhs, 1); __m256 tmp2 = _mm512_extractf32x8_ps(rhs, 0); @@ -192,13 +192,13 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { return _mm512_cvtepi64_pd(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_cvttpd_epi64(self); } diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index d94c68101..c2b485a30 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -27,30 +27,30 @@ namespace xsimd namespace detail { - inline void split_avx512(__m512 val, __m256& low, __m256& high) noexcept + XSIMD_INLINE void split_avx512(__m512 val, __m256& low, __m256& high) noexcept { low = _mm512_castps512_ps256(val); high = _mm512_extractf32x8_ps(val, 1); } - inline void split_avx512(__m512d val, __m256d& low, __m256d& high) noexcept + XSIMD_INLINE void split_avx512(__m512d val, __m256d& low, __m256d& high) noexcept { low = _mm512_castpd512_pd256(val); high = _mm512_extractf64x4_pd(val, 1); } - inline void split_avx512(__m512i val, __m256i& low, __m256i& high) noexcept + XSIMD_INLINE void split_avx512(__m512i val, __m256i& low, __m256i& high) noexcept { low = _mm512_castsi512_si256(val); high = _mm512_extracti64x4_epi64(val, 1); } - inline __m512i merge_avx(__m256i low, __m256i high) noexcept + XSIMD_INLINE __m512i merge_avx(__m256i low, __m256i high) noexcept { return _mm512_inserti64x4(_mm512_castsi256_si512(low), high, 1); } - inline __m512 merge_avx(__m256 low, __m256 high) noexcept + XSIMD_INLINE __m512 merge_avx(__m256 low, __m256 high) noexcept { return _mm512_castpd_ps(_mm512_insertf64x4(_mm512_castpd256_pd512(_mm256_castps_pd(low)), _mm256_castps_pd(high), 1)); } - inline __m512d merge_avx(__m256d low, __m256d high) noexcept + XSIMD_INLINE __m512d merge_avx(__m256d low, __m256d high) noexcept { return _mm512_insertf64x4(_mm512_castpd256_pd512(low), high, 1); } @@ -86,7 +86,7 @@ namespace xsimd namespace detail { - inline uint32_t morton(uint16_t x, uint16_t y) noexcept + XSIMD_INLINE uint32_t morton(uint16_t x, uint16_t y) noexcept { static const unsigned short MortonTable256[256] = { @@ -129,7 +129,7 @@ namespace xsimd } template - inline batch_bool compare_int_avx512f(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool compare_int_avx512f(batch const& self, batch const& other) noexcept { using register_type = typename batch_bool::register_type; if (std::is_signed::value) @@ -217,7 +217,7 @@ namespace xsimd // abs template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { __m512 self_asf = (__m512)self; __m512i self_asi = *reinterpret_cast<__m512i*>(&self_asf); @@ -225,7 +225,7 @@ namespace xsimd return *reinterpret_cast<__m512*>(&res_asi); } template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { __m512d self_asd = (__m512d)self; __m512i self_asi = *reinterpret_cast<__m512i*>(&self_asd); @@ -234,7 +234,7 @@ namespace xsimd return *reinterpret_cast<__m512d*>(&res_asi); } template ::value, void>::type> - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { if (std::is_unsigned::value) { @@ -270,7 +270,7 @@ namespace xsimd // add template ::value, void>::type> - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -299,19 +299,19 @@ namespace xsimd } } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_add_ps(self, other); } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_add_pd(self, other); } // all template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return self.data == register_type(-1); @@ -319,7 +319,7 @@ namespace xsimd // any template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return self.data != register_type(0); @@ -327,14 +327,14 @@ namespace xsimd // batch_bool_cast template - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept { return self.data; } // bitwise_and template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { #if defined(_MSC_VER) return _mm512_and_ps(self, other); @@ -343,19 +343,19 @@ namespace xsimd #endif } template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(self), _mm512_castpd_si512(other))); } template ::value, void>::type> - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_and_si512(self, other); } template - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self.data & other.data); @@ -363,24 +363,24 @@ namespace xsimd // bitwise_andnot template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_ps(_mm512_andnot_si512(_mm512_castps_si512(other), _mm512_castps_si512(self))); } template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_pd(_mm512_andnot_si512(_mm512_castpd_si512(other), _mm512_castpd_si512(self))); } template ::value, void>::type> - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_andnot_si512(other, self); } template - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self.data & ~other.data); @@ -388,7 +388,7 @@ namespace xsimd // bitwise_lshift template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -433,56 +433,56 @@ namespace xsimd // bitwise_not template ::value, void>::type> - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_xor_si512(self, _mm512_set1_epi32(-1)); } template - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(~self.data); } template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(self), _mm512_set1_epi32(-1))); } template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm512_castsi512_pd(_mm512_xor_si512(_mm512_castpd_si512(self), _mm512_set1_epi32(-1))); } // bitwise_or template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_ps(_mm512_or_si512(_mm512_castps_si512(self), _mm512_castps_si512(other))); } template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_pd(_mm512_or_si512(_mm512_castpd_si512(self), _mm512_castpd_si512(other))); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self.data | other.data); } template ::value, void>::type> - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_or_si512(self, other); } // bitwise_rshift template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept { if (std::is_signed::value) { @@ -552,69 +552,69 @@ namespace xsimd // bitwise_xor template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(self), _mm512_castps_si512(other))); } template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_castsi512_pd(_mm512_xor_si512(_mm512_castpd_si512(self), _mm512_castpd_si512(other))); } template - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self.data | other.data); } template ::value, void>::type> - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_xor_si512(self, other); } // bitwise_cast template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_castsi512_ps(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_castsi512_pd(self); } template ::type>::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return batch(self.data); } template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_castps_pd(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_castps_si512(self); } template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_castpd_ps(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_castpd_si512(self); } // broadcast template ::value, void>::type> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -639,56 +639,56 @@ namespace xsimd } } template - inline batch broadcast(float val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(float val, requires_arch) noexcept { return _mm512_set1_ps(val); } template - batch inline broadcast(double val, requires_arch) noexcept + batch XSIMD_INLINE broadcast(double val, requires_arch) noexcept { return _mm512_set1_pd(val); } // ceil template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return _mm512_roundscale_ps(self, _MM_FROUND_TO_POS_INF); } template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return _mm512_roundscale_pd(self, _MM_FROUND_TO_POS_INF); } // compress template - inline batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_compress_ps(mask.mask(), self); } template - inline batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_compress_pd(mask.mask(), self); } template - inline batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_compress_epi32(mask.mask(), self); } template - inline batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_compress_epi32(mask.mask(), self); } template - inline batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_compress_epi64(mask.mask(), self); } template - inline batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_compress_epi64(mask.mask(), self); } @@ -697,19 +697,19 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_cvtepi32_ps(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_cvttps_epi32(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm512_cvtepu32_ps(self); } @@ -725,13 +725,13 @@ namespace xsimd { // complex_low template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { __m512i idx = _mm512_setr_epi32(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); return _mm512_permutex2var_ps(self.real(), idx, self.imag()); } template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { __m512i idx = _mm512_setr_epi64(0, 8, 1, 9, 2, 10, 3, 11); return _mm512_permutex2var_pd(self.real(), idx, self.imag()); @@ -739,13 +739,13 @@ namespace xsimd // complex_high template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { __m512i idx = _mm512_setr_epi32(8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); return _mm512_permutex2var_ps(self.real(), idx, self.imag()); } template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { __m512i idx = _mm512_setr_epi64(4, 12, 5, 13, 6, 14, 7, 15); return _mm512_permutex2var_pd(self.real(), idx, self.imag()); @@ -754,35 +754,35 @@ namespace xsimd // div template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_div_ps(self, other); } template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_div_pd(self, other); } // eq template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, other, _CMP_EQ_OQ); } template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, other, _CMP_EQ_OQ); } template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512f(self, other); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(~self.data ^ other.data); @@ -790,126 +790,126 @@ namespace xsimd // expand template - inline batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_expand_ps(mask.mask(), self); } template - inline batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_expand_pd(mask.mask(), self); } template - inline batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_expand_epi32(mask.mask(), self); } template - inline batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_expand_epi32(mask.mask(), self); } template - inline batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_expand_epi64(mask.mask(), self); } template - inline batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch expand(batch const& self, batch_bool const& mask, requires_arch) noexcept { return _mm512_maskz_expand_epi64(mask.mask(), self); } // floor template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return _mm512_roundscale_ps(self, _MM_FROUND_TO_NEG_INF); } template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return _mm512_roundscale_pd(self, _MM_FROUND_TO_NEG_INF); } // fnma template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm512_fnmadd_ps(x, y, z); } template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm512_fnmadd_pd(x, y, z); } // fma template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm512_fmadd_ps(x, y, z); } template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm512_fmadd_pd(x, y, z); } // fms template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm512_fmsub_ps(x, y, z); } template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return _mm512_fmsub_pd(x, y, z); } // from bool template - inline batch from_bool(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& self, requires_arch) noexcept { return select(self, batch(1), batch(0)); } // from_mask template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { return static_cast::register_type>(mask); } // gather template = 0, detail::enable_sized_integral_t = 0> - inline batch gather(batch const&, T const* src, batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, + kernel::requires_arch) noexcept { return _mm512_i32gather_epi32(index, static_cast(src), sizeof(T)); } template = 0, detail::enable_sized_integral_t = 0> - inline batch gather(batch const&, T const* src, batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, + kernel::requires_arch) noexcept { return _mm512_i64gather_epi64(index, static_cast(src), sizeof(T)); } template = 0> - inline batch gather(batch const&, float const* src, - batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, float const* src, + batch const& index, + kernel::requires_arch) noexcept { return _mm512_i32gather_ps(index, src, sizeof(float)); } template = 0> - inline batch + XSIMD_INLINE batch gather(batch const&, double const* src, batch const& index, kernel::requires_arch) noexcept { @@ -918,9 +918,9 @@ namespace xsimd // gather: handmade conversions template = 0> - inline batch gather(batch const&, double const* src, - batch const& index, - requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, double const* src, + batch const& index, + requires_arch) noexcept { const batch low(_mm512_i32gather_pd(_mm512_castsi512_si256(index.data), src, sizeof(double))); const batch high(_mm512_i32gather_pd(_mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castsi512_pd(index.data), 1)), src, sizeof(double))); @@ -928,9 +928,9 @@ namespace xsimd } template = 0> - inline batch gather(batch const&, double const* src, - batch const& index, - requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, double const* src, + batch const& index, + requires_arch) noexcept { const batch low(_mm512_i32gather_pd(_mm512_castsi512_si256(index.data), src, sizeof(double))); const batch high(_mm512_i32gather_pd(_mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castsi512_pd(index.data), 1)), src, sizeof(double))); @@ -939,41 +939,41 @@ namespace xsimd // ge template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, other, _CMP_GE_OQ); } template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, other, _CMP_GE_OQ); } template ::value, void>::type> - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512f(self, other); } // gt template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, other, _CMP_GT_OQ); } template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, other, _CMP_GT_OQ); } template ::value, void>::type> - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512f(self, other); } // haddp template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { // The following folds over the vector once: // tmp1 = [a0..8, b0..8] @@ -1034,7 +1034,7 @@ namespace xsimd } template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { #define step1(I, a, b) \ batch res##I; \ @@ -1069,25 +1069,25 @@ namespace xsimd // isnan template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, self, _CMP_UNORD_Q); } template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, self, _CMP_UNORD_Q); } // ldexp template - inline batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept + XSIMD_INLINE batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept { return _mm512_scalef_ps(self, _mm512_cvtepi32_ps(other)); } template - inline batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept + XSIMD_INLINE batch ldexp(const batch& self, const batch, A>& other, requires_arch) noexcept { // FIXME: potential data loss here when converting other elements to // int32 before converting them back to double. @@ -1097,34 +1097,34 @@ namespace xsimd // le template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, other, _CMP_LE_OQ); } template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, other, _CMP_LE_OQ); } template ::value, void>::type> - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512f(self, other); } // load_aligned template ::value, void>::type> - inline batch load_aligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept { return _mm512_load_si512((__m512i const*)mem); } template - inline batch load_aligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(float const* mem, convert, requires_arch) noexcept { return _mm512_load_ps(mem); } template - inline batch load_aligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(double const* mem, convert, requires_arch) noexcept { return _mm512_load_pd(mem); } @@ -1133,7 +1133,7 @@ namespace xsimd namespace detail { template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { __m512i real_idx = _mm512_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); __m512i imag_idx = _mm512_setr_epi32(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); @@ -1142,7 +1142,7 @@ namespace xsimd return { real, imag }; } template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { __m512i real_idx = _mm512_setr_epi64(0, 2, 4, 6, 8, 10, 12, 14); __m512i imag_idx = _mm512_setr_epi64(1, 3, 5, 7, 9, 11, 13, 15); @@ -1154,59 +1154,59 @@ namespace xsimd // load_unaligned template ::value, void>::type> - inline batch load_unaligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept { return _mm512_loadu_si512((__m512i const*)mem); } template - inline batch load_unaligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(float const* mem, convert, requires_arch) noexcept { return _mm512_loadu_ps(mem); } template - inline batch load_unaligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(double const* mem, convert, requires_arch) noexcept { return _mm512_loadu_pd(mem); } // lt template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, other, _CMP_LT_OQ); } template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, other, _CMP_LT_OQ); } template ::value, void>::type> - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return detail::compare_int_avx512f(self, other); } // mask template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return self.data; } // max template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_max_ps(self, other); } template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_max_pd(self, other); } template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1246,17 +1246,17 @@ namespace xsimd // min template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_min_ps(self, other); } template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_min_pd(self, other); } template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1296,17 +1296,17 @@ namespace xsimd // mul template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_mul_ps(self, other); } template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_mul_pd(self, other); } template ::value, void>::type> - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 4) { @@ -1322,50 +1322,50 @@ namespace xsimd // nearbyint template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return _mm512_roundscale_round_ps(self, _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_CUR_DIRECTION); } template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return _mm512_roundscale_round_pd(self, _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_CUR_DIRECTION); } // nearbyint_as_int template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { return _mm512_cvtps_epi32(self); } // neg template - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return 0 - self; } // neq template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_ps_mask(self, other, _CMP_NEQ_UQ); } template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_cmp_pd_mask(self, other, _CMP_NEQ_UQ); } template ::value, void>::type> - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return ~(self == other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self.data ^ other.data); @@ -1373,7 +1373,7 @@ namespace xsimd // reciprocal template - inline batch + XSIMD_INLINE batch reciprocal(batch const& self, kernel::requires_arch) noexcept { @@ -1381,7 +1381,7 @@ namespace xsimd } template - inline batch + XSIMD_INLINE batch reciprocal(batch const& self, kernel::requires_arch) noexcept { @@ -1390,7 +1390,7 @@ namespace xsimd // reduce_add template - inline float reduce_add(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& rhs, requires_arch) noexcept { __m128 tmp1 = _mm512_extractf32x4_ps(rhs, 0); __m128 tmp2 = _mm512_extractf32x4_ps(rhs, 1); @@ -1402,7 +1402,7 @@ namespace xsimd return reduce_add(batch(res3), sse4_2 {}); } template - inline double reduce_add(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE double reduce_add(batch const& rhs, requires_arch) noexcept { __m256d tmp1 = _mm512_extractf64x4_pd(rhs, 1); __m256d tmp2 = _mm512_extractf64x4_pd(rhs, 0); @@ -1410,7 +1410,7 @@ namespace xsimd return reduce_add(batch(res1), avx2 {}); } template ::value, void>::type> - inline T reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept { __m256i low, high; detail::split_avx512(self, low, high); @@ -1420,7 +1420,7 @@ namespace xsimd // reduce_max template ::type> - inline T reduce_max(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_max(batch const& self, requires_arch) noexcept { constexpr batch_constant mask; batch step = _mm512_permutexvar_epi64(mask.as_batch(), self); @@ -1431,7 +1431,7 @@ namespace xsimd // reduce_min template ::type> - inline T reduce_min(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_min(batch const& self, requires_arch) noexcept { constexpr batch_constant mask; batch step = _mm512_permutexvar_epi64(mask.as_batch(), self); @@ -1442,19 +1442,19 @@ namespace xsimd // rsqrt template - inline batch rsqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept { return _mm512_rsqrt14_ps(val); } template - inline batch rsqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept { return _mm512_rsqrt14_pd(val); } // sadd template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1474,52 +1474,52 @@ namespace xsimd // scatter template ::value || std::is_same::value, void>::type> - inline void scatter(batch const& src, T* dst, - batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE void scatter(batch const& src, T* dst, + batch const& index, + kernel::requires_arch) noexcept { _mm512_i32scatter_epi32(dst, index, src, sizeof(T)); } template ::value || std::is_same::value, void>::type> - inline void scatter(batch const& src, T* dst, - batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE void scatter(batch const& src, T* dst, + batch const& index, + kernel::requires_arch) noexcept { _mm512_i64scatter_epi64(dst, index, src, sizeof(T)); } template - inline void scatter(batch const& src, float* dst, - batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE void scatter(batch const& src, float* dst, + batch const& index, + kernel::requires_arch) noexcept { _mm512_i32scatter_ps(dst, index, src, sizeof(float)); } template - inline void scatter(batch const& src, double* dst, - batch const& index, - kernel::requires_arch) noexcept + XSIMD_INLINE void scatter(batch const& src, double* dst, + batch const& index, + kernel::requires_arch) noexcept { _mm512_i64scatter_pd(dst, index, src, sizeof(double)); } // select template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm512_mask_blend_ps(cond, false_br, true_br); } template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm512_mask_blend_pd(cond, false_br, true_br); } template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1571,7 +1571,7 @@ namespace xsimd } template ::value, void>::type> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { Values... }, true_br, false_br, avx512f {}); } @@ -1589,32 +1589,32 @@ namespace xsimd // set template - inline batch set(batch const&, requires_arch, float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15) noexcept { return _mm512_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } template - inline batch set(batch const&, requires_arch, double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7) noexcept { return _mm512_setr_pd(v0, v1, v2, v3, v4, v5, v6, v7); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept { return _mm512_set_epi64(v7, v6, v5, v4, v3, v2, v1, v0); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept { return _mm512_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } template = 0> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, - T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, - T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, + T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept { #if defined(__clang__) || __GNUC__ return __extension__(__m512i)(__v32hi) { @@ -1628,10 +1628,10 @@ namespace xsimd } template = 0> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, - T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, - T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, + T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept { #if defined(__clang__) || __GNUC__ return __extension__(__m512i)(__v32hu) { @@ -1645,14 +1645,14 @@ namespace xsimd } template = 0> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, - T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, - T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31, - T v32, T v33, T v34, T v35, T v36, T v37, T v38, T v39, - T v40, T v41, T v42, T v43, T v44, T v45, T v46, T v47, - T v48, T v49, T v50, T v51, T v52, T v53, T v54, T v55, - T v56, T v57, T v58, T v59, T v60, T v61, T v62, T v63) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, + T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31, + T v32, T v33, T v34, T v35, T v36, T v37, T v38, T v39, + T v40, T v41, T v42, T v43, T v44, T v45, T v46, T v47, + T v48, T v49, T v50, T v51, T v52, T v53, T v54, T v55, + T v56, T v57, T v58, T v59, T v60, T v61, T v62, T v63) noexcept { #if defined(__clang__) || __GNUC__ @@ -1670,14 +1670,14 @@ namespace xsimd #endif } template = 0> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, - T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, - T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31, - T v32, T v33, T v34, T v35, T v36, T v37, T v38, T v39, - T v40, T v41, T v42, T v43, T v44, T v45, T v46, T v47, - T v48, T v49, T v50, T v51, T v52, T v53, T v54, T v55, - T v56, T v57, T v58, T v59, T v60, T v61, T v62, T v63) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15, + T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, + T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31, + T v32, T v33, T v34, T v35, T v36, T v37, T v38, T v39, + T v40, T v41, T v42, T v43, T v44, T v45, T v46, T v47, + T v48, T v49, T v50, T v51, T v52, T v53, T v54, T v55, + T v56, T v57, T v58, T v59, T v60, T v61, T v62, T v63) noexcept { #if defined(__clang__) || __GNUC__ @@ -1696,7 +1696,7 @@ namespace xsimd } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); using register_type = typename batch_bool::register_type; @@ -1708,9 +1708,9 @@ namespace xsimd // shuffle template - inline batch shuffle(batch const& x, batch const& y, - batch_constant mask, - requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, + batch_constant mask, + requires_arch) noexcept { constexpr uint32_t smask = (I0 & 0x3) | ((I1 & 0x3) << 2) | ((I2 & 0x3) << 4) | ((I3 & 0x3) << 6); @@ -1726,7 +1726,7 @@ namespace xsimd } template - inline batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept { constexpr uint32_t smask = (I0 & 0x1) | ((I1 & 0x1) << 1) | ((I2 & 0x1) << 2) | ((I3 & 0x1) << 3) | ((I4 & 0x1) << 4) | ((I5 & 0x1) << 5) | ((I6 & 0x1) << 6) | ((I7 & 0x1) << 7); // shuffle within lane @@ -1742,7 +1742,7 @@ namespace xsimd // slide_left template - inline batch slide_left(batch const&, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const&, requires_arch) noexcept { static_assert(N == 0xDEAD, "not implemented yet"); return {}; @@ -1750,7 +1750,7 @@ namespace xsimd // slide_right template - inline batch slide_right(batch const&, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const&, requires_arch) noexcept { static_assert(N == 0xDEAD, "not implemented yet"); return {}; @@ -1758,19 +1758,19 @@ namespace xsimd // sqrt template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return _mm512_sqrt_ps(val); } template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return _mm512_sqrt_pd(val); } // ssub template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1785,7 +1785,7 @@ namespace xsimd // store template - inline void store(batch_bool const& self, bool* mem, requires_arch) noexcept + XSIMD_INLINE void store(batch_bool const& self, bool* mem, requires_arch) noexcept { using register_type = typename batch_bool::register_type; constexpr auto size = batch_bool::size; @@ -1795,51 +1795,51 @@ namespace xsimd // store_aligned template ::value, void>::type> - inline void store_aligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch const& self, requires_arch) noexcept { return _mm512_store_si512((__m512i*)mem, self); } template ::value, void>::type> - inline void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept { return _mm512_store_si512((__m512i*)mem, self); } template - inline void store_aligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(float* mem, batch const& self, requires_arch) noexcept { return _mm512_store_ps(mem, self); } template - inline void store_aligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(double* mem, batch const& self, requires_arch) noexcept { return _mm512_store_pd(mem, self); } // store_unaligned template ::value, void>::type> - inline void store_unaligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch const& self, requires_arch) noexcept { return _mm512_storeu_si512((__m512i*)mem, self); } template ::value, void>::type> - inline void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept { return _mm512_storeu_si512((__m512i*)mem, self); } template - inline void store_unaligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(float* mem, batch const& self, requires_arch) noexcept { return _mm512_storeu_ps(mem, self); } template - inline void store_unaligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(double* mem, batch const& self, requires_arch) noexcept { return _mm512_storeu_pd(mem, self); } // sub template ::value, void>::type> - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1868,86 +1868,86 @@ namespace xsimd } } template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_sub_ps(self, other); } template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return _mm512_sub_pd(self, other); } // swizzle (dynamic version) template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm512_permutexvar_ps(mask, self); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm512_permutexvar_pd(mask, self); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm512_permutexvar_epi64(mask, self); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512f {})); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm512_permutexvar_epi32(mask, self); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512f {})); } // swizzle (constant version) template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512f {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512f {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512f {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512f {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512f {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), avx512f {}); } @@ -1980,14 +1980,14 @@ namespace xsimd } template ::value, void>::type> - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr typename detail::fold_batch_constant::type mask32; return _mm512_permutexvar_epi32(static_cast>(mask32), self); } template - inline batch + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { // FIXME: this sequence is very inefficient, but it's here to catch @@ -2004,7 +2004,7 @@ namespace xsimd } template - inline batch + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, avx512f {})); @@ -2012,13 +2012,13 @@ namespace xsimd // trunc template - inline batch + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return _mm512_roundscale_round_ps(self, _MM_FROUND_TO_ZERO, _MM_FROUND_CUR_DIRECTION); } template - inline batch + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return _mm512_roundscale_round_pd(self, _MM_FROUND_TO_ZERO, _MM_FROUND_CUR_DIRECTION); @@ -2026,7 +2026,7 @@ namespace xsimd // zip_hi template ::value, void>::type> - inline batch + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { __m512i lo, hi; @@ -2064,7 +2064,7 @@ namespace xsimd 1); } template - inline batch + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm512_unpacklo_ps(self, other); @@ -2078,7 +2078,7 @@ namespace xsimd 1); } template - inline batch + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm512_castpd_ps(_mm512_unpacklo_pd(self, other)); @@ -2094,7 +2094,7 @@ namespace xsimd // zip_lo template ::value, void>::type> - inline batch + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { __m512i lo, hi; @@ -2132,7 +2132,7 @@ namespace xsimd 2); } template - inline batch + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm512_unpacklo_ps(self, other); @@ -2146,7 +2146,7 @@ namespace xsimd 2); } template - inline batch + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { auto lo = _mm512_castpd_ps(_mm512_unpacklo_pd(self, other)); diff --git a/include/xsimd/arch/xsimd_constants.hpp b/include/xsimd/arch/xsimd_constants.hpp index 22dd5d3e3..51411d287 100644 --- a/include/xsimd/arch/xsimd_constants.hpp +++ b/include/xsimd/arch/xsimd_constants.hpp @@ -24,34 +24,34 @@ namespace xsimd #define XSIMD_DEFINE_CONSTANT(NAME, SINGLE, DOUBLE) \ template \ - inline T NAME() noexcept \ + XSIMD_INLINE T NAME() noexcept \ { \ return T(NAME()); \ } \ template <> \ - inline float NAME() noexcept \ + XSIMD_INLINE float NAME() noexcept \ { \ return SINGLE; \ } \ template <> \ - inline double NAME() noexcept \ + XSIMD_INLINE double NAME() noexcept \ { \ return DOUBLE; \ } #define XSIMD_DEFINE_CONSTANT_HEX(NAME, SINGLE, DOUBLE) \ template \ - inline T NAME() noexcept \ + XSIMD_INLINE T NAME() noexcept \ { \ return T(NAME()); \ } \ template <> \ - inline float NAME() noexcept \ + XSIMD_INLINE float NAME() noexcept \ { \ return bit_cast((uint32_t)SINGLE); \ } \ template <> \ - inline double NAME() noexcept \ + XSIMD_INLINE double NAME() noexcept \ { \ return bit_cast((uint64_t)DOUBLE); \ } @@ -168,7 +168,7 @@ namespace xsimd } template - inline constexpr T allbits() noexcept + XSIMD_INLINE constexpr T allbits() noexcept { return T(detail::allbits_impl::get_value()); } @@ -178,19 +178,19 @@ namespace xsimd *****************************/ template - inline constexpr as_integer_t mask1frexp() noexcept + XSIMD_INLINE constexpr as_integer_t mask1frexp() noexcept { return as_integer_t(mask1frexp()); } template <> - inline constexpr int32_t mask1frexp() noexcept + XSIMD_INLINE constexpr int32_t mask1frexp() noexcept { return 0x7f800000; } template <> - inline constexpr int64_t mask1frexp() noexcept + XSIMD_INLINE constexpr int64_t mask1frexp() noexcept { return 0x7ff0000000000000; } @@ -200,19 +200,19 @@ namespace xsimd *****************************/ template - inline constexpr as_integer_t mask2frexp() noexcept + XSIMD_INLINE constexpr as_integer_t mask2frexp() noexcept { return as_integer_t(mask2frexp()); } template <> - inline constexpr int32_t mask2frexp() noexcept + XSIMD_INLINE constexpr int32_t mask2frexp() noexcept { return 0x3f000000; } template <> - inline constexpr int64_t mask2frexp() noexcept + XSIMD_INLINE constexpr int64_t mask2frexp() noexcept { return 0x3fe0000000000000; } @@ -222,19 +222,19 @@ namespace xsimd ******************************/ template - inline constexpr as_integer_t maxexponent() noexcept + XSIMD_INLINE constexpr as_integer_t maxexponent() noexcept { return as_integer_t(maxexponent()); } template <> - inline constexpr int32_t maxexponent() noexcept + XSIMD_INLINE constexpr int32_t maxexponent() noexcept { return 127; } template <> - inline constexpr int64_t maxexponent() noexcept + XSIMD_INLINE constexpr int64_t maxexponent() noexcept { return 1023; } @@ -244,19 +244,19 @@ namespace xsimd ******************************/ template - inline constexpr as_integer_t maxexponentm1() noexcept + XSIMD_INLINE constexpr as_integer_t maxexponentm1() noexcept { return as_integer_t(maxexponentm1()); } template <> - inline constexpr int32_t maxexponentm1() noexcept + XSIMD_INLINE constexpr int32_t maxexponentm1() noexcept { return 126; } template <> - inline constexpr int64_t maxexponentm1() noexcept + XSIMD_INLINE constexpr int64_t maxexponentm1() noexcept { return 1022; } @@ -266,19 +266,19 @@ namespace xsimd **********************/ template - inline constexpr int32_t nmb() noexcept + XSIMD_INLINE constexpr int32_t nmb() noexcept { return nmb(); } template <> - inline constexpr int32_t nmb() noexcept + XSIMD_INLINE constexpr int32_t nmb() noexcept { return 23; } template <> - inline constexpr int32_t nmb() noexcept + XSIMD_INLINE constexpr int32_t nmb() noexcept { return 52; } @@ -288,7 +288,7 @@ namespace xsimd ***********************/ template - inline constexpr T zero() noexcept + XSIMD_INLINE constexpr T zero() noexcept { return T(typename T::value_type(0)); } @@ -353,7 +353,7 @@ namespace xsimd template <> struct minvalue_impl { - inline static float get_value() noexcept + XSIMD_INLINE static float get_value() noexcept { return bit_cast((uint32_t)0xff7fffff); } @@ -362,7 +362,7 @@ namespace xsimd template <> struct minvalue_impl { - inline static double get_value() noexcept + XSIMD_INLINE static double get_value() noexcept { return bit_cast((uint64_t)0xffefffffffffffff); } diff --git a/include/xsimd/arch/xsimd_emulated.hpp b/include/xsimd/arch/xsimd_emulated.hpp index ac3dd4fef..ef7fd0191 100644 --- a/include/xsimd/arch/xsimd_emulated.hpp +++ b/include/xsimd/arch/xsimd_emulated.hpp @@ -28,7 +28,7 @@ namespace xsimd struct batch_bool_constant; template - inline batch bitwise_cast(batch const& x) noexcept; + XSIMD_INLINE batch bitwise_cast(batch const& x) noexcept; template struct batch_constant; @@ -39,9 +39,9 @@ namespace xsimd // fwd template - inline batch insert(batch const& self, T val, index, requires_arch) noexcept; + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept; template - inline batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept; + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept; namespace detail { @@ -66,7 +66,7 @@ namespace xsimd // abs template ::size> - inline batch abs(batch const& self, requires_arch>) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::abs(v); }, @@ -75,7 +75,7 @@ namespace xsimd // add template ::size> - inline batch add(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::add(v0, v1); }, @@ -84,7 +84,7 @@ namespace xsimd // all template ::size> - inline bool all(batch_bool const& self, requires_arch>) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch>) noexcept { return std::all_of(self.data.begin(), self.data.end(), [](T v) { return bool(v); }); @@ -92,7 +92,7 @@ namespace xsimd // any template ::size> - inline bool any(batch_bool const& self, requires_arch>) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch>) noexcept { return std::any_of(self.data.begin(), self.data.end(), [](T v) { return bool(v); }); @@ -100,14 +100,14 @@ namespace xsimd // batch_bool_cast template ::size> - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch>) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch>) noexcept { return { self.data }; } // bitwise_and template ::size> - inline batch bitwise_and(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::bitwise_and(v0, v1); }, @@ -115,7 +115,7 @@ namespace xsimd } template ::size> - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept { return detail::emulated_apply([](bool v0, bool v1) { return xsimd::bitwise_and(v0, v1); }, @@ -124,7 +124,7 @@ namespace xsimd // bitwise_andnot template ::size> - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::bitwise_andnot(v0, v1); }, @@ -132,7 +132,7 @@ namespace xsimd } template ::size> - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept { return detail::emulated_apply([](bool v0, bool v1) { return xsimd::bitwise_andnot(v0, v1); }, @@ -141,7 +141,7 @@ namespace xsimd // bitwise_lshift template ::size> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch>) noexcept { return detail::emulated_apply([other](T v) { return xsimd::bitwise_lshift(v, other); }, @@ -150,7 +150,7 @@ namespace xsimd // bitwise_not template ::size> - inline batch bitwise_not(batch const& self, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::bitwise_not(v); }, @@ -158,7 +158,7 @@ namespace xsimd } template ::size> - inline batch_bool bitwise_not(batch_bool const& self, requires_arch>) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch>) noexcept { return detail::emulated_apply([](bool v) { return xsimd::bitwise_not(v); }, @@ -167,7 +167,7 @@ namespace xsimd // bitwise_or template ::size> - inline batch bitwise_or(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::bitwise_or(v0, v1); }, @@ -175,7 +175,7 @@ namespace xsimd } template ::size> - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept { return detail::emulated_apply([](bool v0, bool v1) { return xsimd::bitwise_or(v0, v1); }, @@ -184,7 +184,7 @@ namespace xsimd // bitwise_rshift template ::size> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch>) noexcept { return detail::emulated_apply([other](T v) { return xsimd::bitwise_rshift(v, other); }, @@ -193,7 +193,7 @@ namespace xsimd // bitwise_xor template ::size> - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::bitwise_xor(v0, v1); }, @@ -201,7 +201,7 @@ namespace xsimd } template ::size> - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept { return detail::emulated_apply([](bool v0, bool v1) { return xsimd::bitwise_xor(v0, v1); }, @@ -210,7 +210,7 @@ namespace xsimd // bitwise_cast template ::size> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array result; @@ -222,7 +222,7 @@ namespace xsimd // broadcast template ::size> - batch inline broadcast(T val, requires_arch>) noexcept + batch XSIMD_INLINE broadcast(T val, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array r; @@ -235,7 +235,7 @@ namespace xsimd { // complex_low template ::size> - inline batch complex_low(batch, A> const& self, requires_arch>) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array result; @@ -248,7 +248,7 @@ namespace xsimd } // complex_high template ::size> - inline batch complex_high(batch, A> const& self, requires_arch>) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array result; @@ -263,14 +263,14 @@ namespace xsimd // decr_if template ::size> - inline batch decr_if(batch const& self, batch_bool const& mask, requires_arch>) noexcept + XSIMD_INLINE batch decr_if(batch const& self, batch_bool const& mask, requires_arch>) noexcept { return self - batch(mask.data); } // div template ::size> - inline batch div(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::div(v0, v1); }, @@ -281,7 +281,7 @@ namespace xsimd namespace detail { template ::size> - inline batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept { return detail::emulated_apply([](int32_t v) { return float(v); }, @@ -289,7 +289,7 @@ namespace xsimd } template ::size> - inline batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept { return detail::emulated_apply([](uint32_t v) { return float(v); }, @@ -297,7 +297,7 @@ namespace xsimd } template ::size> - inline batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept { return detail::emulated_apply([](int64_t v) { return double(v); }, @@ -305,7 +305,7 @@ namespace xsimd } template ::size> - inline batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept { return detail::emulated_apply([](uint64_t v) { return double(v); }, @@ -313,7 +313,7 @@ namespace xsimd } template ::size> - inline batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept { return detail::emulated_apply([](float v) { return int32_t(v); }, @@ -321,7 +321,7 @@ namespace xsimd } template ::size> - inline batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch>) noexcept { return detail::emulated_apply([](double v) { return int64_t(v); }, @@ -331,7 +331,7 @@ namespace xsimd // eq template ::size> - inline batch_bool> eq(batch> const& self, batch> const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool> eq(batch> const& self, batch> const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::eq(v0, v1); }, @@ -339,7 +339,7 @@ namespace xsimd } template ::size> - inline batch_bool> eq(batch_bool> const& self, batch_bool> const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool> eq(batch_bool> const& self, batch_bool> const& other, requires_arch>) noexcept { return detail::emulated_apply([](bool v0, bool v1) { return xsimd::eq(v0, v1); }, @@ -348,7 +348,7 @@ namespace xsimd // from_bool template ::size> - inline batch from_bool(batch_bool const& self, requires_arch>) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& self, requires_arch>) noexcept { return detail::emulated_apply([](bool v) { return T(v); }, @@ -357,7 +357,7 @@ namespace xsimd // from_mask template ::size> - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch>) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array vmask; @@ -368,7 +368,7 @@ namespace xsimd // ge template ::size> - inline batch_bool> ge(batch> const& self, batch> const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool> ge(batch> const& self, batch> const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::ge(v0, v1); }, @@ -377,7 +377,7 @@ namespace xsimd // gt template ::size> - inline batch_bool> gt(batch> const& self, batch> const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool> gt(batch> const& self, batch> const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::gt(v0, v1); }, @@ -386,7 +386,7 @@ namespace xsimd // haddp template ::size> - inline batch haddp(batch const* row, requires_arch>) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array r; @@ -397,14 +397,14 @@ namespace xsimd // incr_if template ::size> - inline batch incr_if(batch const& self, batch_bool const& mask, requires_arch>) noexcept + XSIMD_INLINE batch incr_if(batch const& self, batch_bool const& mask, requires_arch>) noexcept { return self + batch(mask.data); } // insert template ::size> - inline batch insert(batch const& self, T val, index, requires_arch>) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch>) noexcept { batch other = self; other.data[I] = val; @@ -413,7 +413,7 @@ namespace xsimd // isnan template ::size, class = typename std::enable_if::value, void>::type> - inline batch_bool isnan(batch const& self, requires_arch>) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::isnan(v); }, @@ -422,7 +422,7 @@ namespace xsimd // load_aligned template ::size> - inline batch load_aligned(T const* mem, convert, requires_arch>) noexcept + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array res; @@ -432,7 +432,7 @@ namespace xsimd // load_unaligned template ::size> - inline batch load_unaligned(T const* mem, convert, requires_arch>) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array res; @@ -444,7 +444,7 @@ namespace xsimd namespace detail { template ::size> - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch>) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array real, imag; @@ -464,7 +464,7 @@ namespace xsimd // le template ::size> - inline batch_bool> le(batch> const& self, batch> const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool> le(batch> const& self, batch> const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::le(v0, v1); }, @@ -473,7 +473,7 @@ namespace xsimd // lt template ::size> - inline batch_bool> lt(batch> const& self, batch> const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool> lt(batch> const& self, batch> const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::lt(v0, v1); }, @@ -482,7 +482,7 @@ namespace xsimd // mask template ::size> - inline uint64_t mask(batch_bool const& self, requires_arch>) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch>) noexcept { constexpr size_t size = batch::size; uint64_t res = 0; @@ -493,7 +493,7 @@ namespace xsimd // max template ::size> - inline batch max(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::max(v0, v1); }, @@ -502,7 +502,7 @@ namespace xsimd // min template ::size> - inline batch min(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::min(v0, v1); }, @@ -511,7 +511,7 @@ namespace xsimd // mul template ::size> - inline batch mul(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::mul(v0, v1); }, @@ -520,8 +520,8 @@ namespace xsimd // nearbyint_as_int template ::size> - inline batch, A> nearbyint_as_int(batch const& self, - requires_arch>) noexcept + XSIMD_INLINE batch, A> nearbyint_as_int(batch const& self, + requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::nearbyint_as_int(v); }, @@ -530,7 +530,7 @@ namespace xsimd // neg template ::size> - inline batch neg(batch const& self, requires_arch>) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::neg(v); }, @@ -539,7 +539,7 @@ namespace xsimd // neq template ::size> - inline batch_bool neq(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::neq(v0, v1); }, @@ -547,7 +547,7 @@ namespace xsimd } template ::size> - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch>) noexcept { return detail::emulated_apply([](bool v0, bool v1) { return xsimd::neq(v0, v1); }, @@ -556,7 +556,7 @@ namespace xsimd // reduce_add template ::size> - inline T reduce_add(batch const& self, requires_arch>) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array buffer; @@ -566,7 +566,7 @@ namespace xsimd // reduce_max template ::size> - inline T reduce_max(batch const& self, requires_arch>) noexcept + XSIMD_INLINE T reduce_max(batch const& self, requires_arch>) noexcept { return std::accumulate(self.data.begin() + 1, self.data.end(), *self.data.begin(), [](T const& x, T const& y) { return xsimd::max(x, y); }); @@ -574,7 +574,7 @@ namespace xsimd // reduce_min template ::size> - inline T reduce_min(batch const& self, requires_arch>) noexcept + XSIMD_INLINE T reduce_min(batch const& self, requires_arch>) noexcept { return std::accumulate(self.data.begin() + 1, self.data.end(), *self.data.begin(), [](T const& x, T const& y) { return xsimd::min(x, y); }); @@ -582,7 +582,7 @@ namespace xsimd // rsqrt template ::size> - inline batch rsqrt(batch const& self, requires_arch>) noexcept + XSIMD_INLINE batch rsqrt(batch const& self, requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::rsqrt(v); }, @@ -591,7 +591,7 @@ namespace xsimd // select template ::size> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch>) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch>) noexcept { return detail::emulated_apply([](bool c, T t, T f) { return xsimd::select(c, t, f); }, @@ -599,7 +599,7 @@ namespace xsimd } template - inline batch select(batch_bool_constant const& cond, batch const& true_br, batch const& false_br, requires_arch::size>>) noexcept + XSIMD_INLINE batch select(batch_bool_constant const& cond, batch const& true_br, batch const& false_br, requires_arch::size>>) noexcept { constexpr size_t size = batch::size; static_assert(sizeof...(Values) == size, "consistent init"); @@ -608,7 +608,7 @@ namespace xsimd // shuffle template - inline batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch::size>>) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch::size>>) noexcept { constexpr size_t size = batch::size; batch bmask = mask; @@ -620,7 +620,7 @@ namespace xsimd // sqrt template ::size> - inline batch sqrt(batch const& self, requires_arch>) noexcept + XSIMD_INLINE batch sqrt(batch const& self, requires_arch>) noexcept { return detail::emulated_apply([](T v) { return xsimd::sqrt(v); }, @@ -629,7 +629,7 @@ namespace xsimd // slide_left template ::size> - inline batch slide_left(batch const& x, requires_arch>) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array result; @@ -641,7 +641,7 @@ namespace xsimd // slide_right template ::size> - inline batch slide_right(batch const& x, requires_arch>) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch>) noexcept { constexpr size_t size = batch::size; std::array result; @@ -653,7 +653,7 @@ namespace xsimd // sadd template ::size> - inline batch sadd(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::sadd(v0, v1); }, @@ -662,14 +662,14 @@ namespace xsimd // set template - inline batch> set(batch> const&, requires_arch>, Values... values) noexcept + XSIMD_INLINE batch> set(batch> const&, requires_arch>, Values... values) noexcept { static_assert(sizeof...(Values) == batch>::size, "consistent init"); return { typename batch>::register_type { static_cast(values)... } }; } template - inline batch_bool> set(batch_bool> const&, requires_arch>, Values... values) noexcept + XSIMD_INLINE batch_bool> set(batch_bool> const&, requires_arch>, Values... values) noexcept { static_assert(sizeof...(Values) == batch>::size, "consistent init"); return { std::array { static_cast(values)... } }; @@ -677,7 +677,7 @@ namespace xsimd // ssub template ::size> - inline batch ssub(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::ssub(v0, v1); }, @@ -686,21 +686,21 @@ namespace xsimd // store_aligned template - inline void store_aligned(T* mem, batch> const& self, requires_arch>) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch> const& self, requires_arch>) noexcept { std::copy(self.data.begin(), self.data.end(), mem); } // store_unaligned template - inline void store_unaligned(T* mem, batch> const& self, requires_arch>) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch> const& self, requires_arch>) noexcept { std::copy(self.data.begin(), self.data.end(), mem); } // sub template ::size> - inline batch sub(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch>) noexcept { return detail::emulated_apply([](T v0, T v1) { return xsimd::sub(v0, v1); }, @@ -710,7 +710,7 @@ namespace xsimd // swizzle template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch::size>>) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch::size>>) noexcept { constexpr size_t size = batch::size; batch bmask = mask; @@ -722,7 +722,7 @@ namespace xsimd // zip_hi template ::size> - inline batch zip_hi(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch>) noexcept { constexpr size_t size = batch::size; // Note: irregular behavior for odd numbers. @@ -742,7 +742,7 @@ namespace xsimd // zip_lo template ::size> - inline batch zip_lo(batch const& self, batch const& other, requires_arch>) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch>) noexcept { constexpr size_t size = batch::size; // Note: irregular behavior for odd numbers. diff --git a/include/xsimd/arch/xsimd_fma3_avx.hpp b/include/xsimd/arch/xsimd_fma3_avx.hpp index 64e9ed65d..992625314 100644 --- a/include/xsimd/arch/xsimd_fma3_avx.hpp +++ b/include/xsimd/arch/xsimd_fma3_avx.hpp @@ -23,52 +23,52 @@ namespace xsimd // fnma template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fnmadd_ps(x, y, z); } template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fnmadd_pd(x, y, z); } // fnms template - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fnmsub_ps(x, y, z); } template - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fnmsub_pd(x, y, z); } // fma template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fmadd_ps(x, y, z); } template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fmadd_pd(x, y, z); } // fms template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fmsub_ps(x, y, z); } template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm256_fmsub_pd(x, y, z); } diff --git a/include/xsimd/arch/xsimd_fma3_sse.hpp b/include/xsimd/arch/xsimd_fma3_sse.hpp index 55c38f13a..9b126166a 100644 --- a/include/xsimd/arch/xsimd_fma3_sse.hpp +++ b/include/xsimd/arch/xsimd_fma3_sse.hpp @@ -22,52 +22,52 @@ namespace xsimd using namespace types; // fnma template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fnmadd_ps(x, y, z); } template - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fnmadd_pd(x, y, z); } // fnms template - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fnmsub_ps(x, y, z); } template - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fnmsub_pd(x, y, z); } // fma template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fmadd_ps(x, y, z); } template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fmadd_pd(x, y, z); } // fms template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fmsub_ps(x, y, z); } template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch>) noexcept { return _mm_fmsub_pd(x, y, z); } diff --git a/include/xsimd/arch/xsimd_fma4.hpp b/include/xsimd/arch/xsimd_fma4.hpp index 6a97d711e..e51c7c52a 100644 --- a/include/xsimd/arch/xsimd_fma4.hpp +++ b/include/xsimd/arch/xsimd_fma4.hpp @@ -23,52 +23,52 @@ namespace xsimd // fnma template - inline batch fnma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_nmacc_ps(x, y, z); } template - inline batch fnma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_nmacc_pd(x, y, z); } // fnms template - inline batch fnms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_nmsub_ps(x, y, z); } template - inline batch fnms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_nmsub_pd(x, y, z); } // fma template - inline batch fma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_macc_ps(x, y, z); } template - inline batch fma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_macc_pd(x, y, z); } // fms template - inline batch fms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_msub_ps(x, y, z); } template - inline batch fms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(simd_register const& x, simd_register const& y, simd_register const& z, requires_arch) noexcept { return _mm_msub_pd(x, y, z); } diff --git a/include/xsimd/arch/xsimd_generic_fwd.hpp b/include/xsimd/arch/xsimd_generic_fwd.hpp index 87dcaa886..02708d60f 100644 --- a/include/xsimd/arch/xsimd_generic_fwd.hpp +++ b/include/xsimd/arch/xsimd_generic_fwd.hpp @@ -22,21 +22,21 @@ namespace xsimd { // forward declaration template ::value, void>::type> - inline batch abs(batch const& self, requires_arch) noexcept; + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept; template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept; + XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept; template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, batch const& other, requires_arch) noexcept; + XSIMD_INLINE batch bitwise_rshift(batch const& self, batch const& other, requires_arch) noexcept; template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept; + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept; template ::value, void>::type> - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept; + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept; template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept; + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept; template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept; + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept; template ::value, void>::type> - inline T hadd(batch const& self, requires_arch) noexcept; + XSIMD_INLINE T hadd(batch const& self, requires_arch) noexcept; } } diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp index b0edae863..cd161305f 100644 --- a/include/xsimd/arch/xsimd_neon.hpp +++ b/include/xsimd/arch/xsimd_neon.hpp @@ -23,126 +23,126 @@ // Wrap intrinsics so we can pass them as function pointers // - OP: intrinsics name prefix, e.g., vorrq // - RT: type traits to deduce intrinsics return types -#define WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \ - namespace wrap \ - { \ - inline RT OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \ - { \ - return ::OP##_u8(a, b); \ - } \ - inline RT OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \ - { \ - return ::OP##_u16(a, b); \ - } \ - inline RT OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \ - { \ - return ::OP##_u32(a, b); \ - } \ +#define WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \ + namespace wrap \ + { \ + XSIMD_INLINE RT OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \ + { \ + return ::OP##_u8(a, b); \ + } \ + XSIMD_INLINE RT OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \ + { \ + return ::OP##_u16(a, b); \ + } \ + XSIMD_INLINE RT OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \ + { \ + return ::OP##_u32(a, b); \ + } \ } -#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \ - WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \ - namespace wrap \ - { \ - inline RT OP##_s8(int8x16_t a, int8x16_t b) noexcept \ - { \ - return ::OP##_s8(a, b); \ - } \ - inline RT OP##_s16(int16x8_t a, int16x8_t b) noexcept \ - { \ - return ::OP##_s16(a, b); \ - } \ - inline RT OP##_s32(int32x4_t a, int32x4_t b) noexcept \ - { \ - return ::OP##_s32(a, b); \ - } \ - } - -#define WRAP_BINARY_INT(OP, RT) \ - WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \ - namespace wrap \ - { \ - inline RT OP##_u64(uint64x2_t a, uint64x2_t b) noexcept \ - { \ - return ::OP##_u64(a, b); \ - } \ - inline RT OP##_s64(int64x2_t a, int64x2_t b) noexcept \ - { \ - return ::OP##_s64(a, b); \ - } \ - } - -#define WRAP_BINARY_FLOAT(OP, RT) \ +#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \ + WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \ namespace wrap \ { \ - inline RT OP##_f32(float32x4_t a, float32x4_t b) noexcept \ + XSIMD_INLINE RT OP##_s8(int8x16_t a, int8x16_t b) noexcept \ + { \ + return ::OP##_s8(a, b); \ + } \ + XSIMD_INLINE RT OP##_s16(int16x8_t a, int16x8_t b) noexcept \ + { \ + return ::OP##_s16(a, b); \ + } \ + XSIMD_INLINE RT OP##_s32(int32x4_t a, int32x4_t b) noexcept \ { \ - return ::OP##_f32(a, b); \ + return ::OP##_s32(a, b); \ } \ } -#define WRAP_UNARY_INT_EXCLUDING_64(OP) \ - namespace wrap \ - { \ - inline uint8x16_t OP##_u8(uint8x16_t a) noexcept \ - { \ - return ::OP##_u8(a); \ - } \ - inline int8x16_t OP##_s8(int8x16_t a) noexcept \ - { \ - return ::OP##_s8(a); \ - } \ - inline uint16x8_t OP##_u16(uint16x8_t a) noexcept \ - { \ - return ::OP##_u16(a); \ - } \ - inline int16x8_t OP##_s16(int16x8_t a) noexcept \ - { \ - return ::OP##_s16(a); \ - } \ - inline uint32x4_t OP##_u32(uint32x4_t a) noexcept \ - { \ - return ::OP##_u32(a); \ - } \ - inline int32x4_t OP##_s32(int32x4_t a) noexcept \ - { \ - return ::OP##_s32(a); \ - } \ +#define WRAP_BINARY_INT(OP, RT) \ + WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \ + namespace wrap \ + { \ + XSIMD_INLINE RT OP##_u64(uint64x2_t a, uint64x2_t b) noexcept \ + { \ + return ::OP##_u64(a, b); \ + } \ + XSIMD_INLINE RT OP##_s64(int64x2_t a, int64x2_t b) noexcept \ + { \ + return ::OP##_s64(a, b); \ + } \ } -#define WRAP_UNARY_INT(OP) \ - WRAP_UNARY_INT_EXCLUDING_64(OP) \ - namespace wrap \ - { \ - inline uint64x2_t OP##_u64(uint64x2_t a) noexcept \ - { \ - return ::OP##_u64(a); \ - } \ - inline int64x2_t OP##_s64(int64x2_t a) noexcept \ - { \ - return ::OP##_s64(a); \ - } \ +#define WRAP_BINARY_FLOAT(OP, RT) \ + namespace wrap \ + { \ + XSIMD_INLINE RT OP##_f32(float32x4_t a, float32x4_t b) noexcept \ + { \ + return ::OP##_f32(a, b); \ + } \ } -#define WRAP_UNARY_FLOAT(OP) \ - namespace wrap \ - { \ - inline float32x4_t OP##_f32(float32x4_t a) noexcept \ - { \ - return ::OP##_f32(a); \ - } \ +#define WRAP_UNARY_INT_EXCLUDING_64(OP) \ + namespace wrap \ + { \ + XSIMD_INLINE uint8x16_t OP##_u8(uint8x16_t a) noexcept \ + { \ + return ::OP##_u8(a); \ + } \ + XSIMD_INLINE int8x16_t OP##_s8(int8x16_t a) noexcept \ + { \ + return ::OP##_s8(a); \ + } \ + XSIMD_INLINE uint16x8_t OP##_u16(uint16x8_t a) noexcept \ + { \ + return ::OP##_u16(a); \ + } \ + XSIMD_INLINE int16x8_t OP##_s16(int16x8_t a) noexcept \ + { \ + return ::OP##_s16(a); \ + } \ + XSIMD_INLINE uint32x4_t OP##_u32(uint32x4_t a) noexcept \ + { \ + return ::OP##_u32(a); \ + } \ + XSIMD_INLINE int32x4_t OP##_s32(int32x4_t a) noexcept \ + { \ + return ::OP##_s32(a); \ + } \ + } + +#define WRAP_UNARY_INT(OP) \ + WRAP_UNARY_INT_EXCLUDING_64(OP) \ + namespace wrap \ + { \ + XSIMD_INLINE uint64x2_t OP##_u64(uint64x2_t a) noexcept \ + { \ + return ::OP##_u64(a); \ + } \ + XSIMD_INLINE int64x2_t OP##_s64(int64x2_t a) noexcept \ + { \ + return ::OP##_s64(a); \ + } \ + } + +#define WRAP_UNARY_FLOAT(OP) \ + namespace wrap \ + { \ + XSIMD_INLINE float32x4_t OP##_f32(float32x4_t a) noexcept \ + { \ + return ::OP##_f32(a); \ + } \ } // Dummy identity caster to ease coding -inline uint8x16_t vreinterpretq_u8_u8(uint8x16_t arg) noexcept { return arg; } -inline int8x16_t vreinterpretq_s8_s8(int8x16_t arg) noexcept { return arg; } -inline uint16x8_t vreinterpretq_u16_u16(uint16x8_t arg) noexcept { return arg; } -inline int16x8_t vreinterpretq_s16_s16(int16x8_t arg) noexcept { return arg; } -inline uint32x4_t vreinterpretq_u32_u32(uint32x4_t arg) noexcept { return arg; } -inline int32x4_t vreinterpretq_s32_s32(int32x4_t arg) noexcept { return arg; } -inline uint64x2_t vreinterpretq_u64_u64(uint64x2_t arg) noexcept { return arg; } -inline int64x2_t vreinterpretq_s64_s64(int64x2_t arg) noexcept { return arg; } -inline float32x4_t vreinterpretq_f32_f32(float32x4_t arg) noexcept { return arg; } +XSIMD_INLINE uint8x16_t vreinterpretq_u8_u8(uint8x16_t arg) noexcept { return arg; } +XSIMD_INLINE int8x16_t vreinterpretq_s8_s8(int8x16_t arg) noexcept { return arg; } +XSIMD_INLINE uint16x8_t vreinterpretq_u16_u16(uint16x8_t arg) noexcept { return arg; } +XSIMD_INLINE int16x8_t vreinterpretq_s16_s16(int16x8_t arg) noexcept { return arg; } +XSIMD_INLINE uint32x4_t vreinterpretq_u32_u32(uint32x4_t arg) noexcept { return arg; } +XSIMD_INLINE int32x4_t vreinterpretq_s32_s32(int32x4_t arg) noexcept { return arg; } +XSIMD_INLINE uint64x2_t vreinterpretq_u64_u64(uint64x2_t arg) noexcept { return arg; } +XSIMD_INLINE int64x2_t vreinterpretq_s64_s64(int64x2_t arg) noexcept { return arg; } +XSIMD_INLINE float32x4_t vreinterpretq_f32_f32(float32x4_t arg) noexcept { return arg; } namespace xsimd { @@ -306,55 +306,55 @@ namespace xsimd *************/ template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_u8(uint8_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_s8(int8_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_u16(uint16_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_s16(int16_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_u32(uint32_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_s32(int32_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_u64(uint64_t(val)); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return vdupq_n_s64(int64_t(val)); } template - inline batch broadcast(float val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(float val, requires_arch) noexcept { return vdupq_n_f32(val); } @@ -364,13 +364,13 @@ namespace xsimd *******/ template = 0> - inline batch set(batch const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Args... args) noexcept { return xsimd::types::detail::neon_vector_type { args... }; } template = 0> - inline batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept { using register_type = typename batch_bool::register_type; using unsigned_type = as_unsigned_integer_t; @@ -378,22 +378,22 @@ namespace xsimd } template - inline batch set(batch const&, requires_arch, float f0, float f1, float f2, float f3) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, float f0, float f1, float f2, float f3) noexcept { return float32x4_t { f0, f1, f2, f3 }; } template - inline batch, A> set(batch, A> const&, requires_arch, - std::complex c0, std::complex c1, - std::complex c2, std::complex c3) noexcept + XSIMD_INLINE batch, A> set(batch, A> const&, requires_arch, + std::complex c0, std::complex c1, + std::complex c2, std::complex c3) noexcept { return batch, A>(float32x4_t { c0.real(), c1.real(), c2.real(), c3.real() }, float32x4_t { c0.imag(), c1.imag(), c2.imag(), c3.imag() }); } template - inline batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept { using register_type = typename batch_bool::register_type; using unsigned_type = as_unsigned_integer_t; @@ -405,55 +405,55 @@ namespace xsimd *************/ template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_u8(arg, vdupq_n_u8(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_s8(reinterpret_cast(arg.data), vdupq_n_s8(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_u16(arg, vdupq_n_u16(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_s16(reinterpret_cast(arg.data), vdupq_n_s16(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_u32(arg, vdupq_n_u32(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_s32(reinterpret_cast(arg.data), vdupq_n_s32(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_u64(arg, vdupq_n_u64(1)); } template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vandq_s64(reinterpret_cast(arg.data), vdupq_n_s64(1)); } template - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vreinterpretq_f32_u32(vandq_u32(arg, vreinterpretq_u32_f32(vdupq_n_f32(1.f)))); } @@ -473,50 +473,50 @@ namespace xsimd #endif template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_u8, uint8_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_s8, int8_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_u16, uint16_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_s16, int16_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_u32, uint32_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_s32, int32_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_u64, uint64_t*, src); } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_s64, int64_t*, src); } template - inline batch load_aligned(float const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(float const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_f32, float*, src); } @@ -524,50 +524,50 @@ namespace xsimd #undef xsimd_aligned_load template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_u8((uint8_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_s8((int8_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_u16((uint16_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_s16((int16_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_u32((uint32_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_s32((int32_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_u64((uint64_t*)src); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return vld1q_s64((int64_t*)src); } template - inline batch load_unaligned(float const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(float const* src, convert, requires_arch) noexcept { return vld1q_f32(src); } @@ -577,61 +577,61 @@ namespace xsimd *********/ template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_u8((uint8_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_s8((int8_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_u16((uint16_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_s16((int16_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_u32((uint32_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_s32((int32_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_u64((uint64_t*)dst, src); } template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { vst1q_s64((int64_t*)dst, src); } template - inline void store_aligned(float* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(float* dst, batch const& src, requires_arch) noexcept { vst1q_f32(dst, src); } template - inline void store_unaligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* dst, batch const& src, requires_arch) noexcept { store_aligned(dst, src, A {}); } @@ -641,7 +641,7 @@ namespace xsimd ****************/ template - inline batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept { using real_batch = batch; const float* buf = reinterpret_cast(mem); @@ -652,7 +652,7 @@ namespace xsimd } template - inline batch, A> load_complex_unaligned(std::complex const* mem, convert> cvt, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_unaligned(std::complex const* mem, convert> cvt, requires_arch) noexcept { return load_complex_aligned(mem, cvt, A {}); } @@ -662,7 +662,7 @@ namespace xsimd *****************/ template - inline void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { float32x4x2_t tmp; tmp.val[0] = src.real(); @@ -672,7 +672,7 @@ namespace xsimd } template - inline void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { store_complex_aligned(dst, src, A {}); } @@ -682,55 +682,55 @@ namespace xsimd *******/ template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(rhs))); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vnegq_s8(rhs); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vreinterpretq_u16_s16(vnegq_s16(vreinterpretq_s16_u16(rhs))); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vnegq_s16(rhs); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vreinterpretq_u32_s32(vnegq_s32(vreinterpretq_s32_u32(rhs))); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vnegq_s32(rhs); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return batch { -rhs.get(0), -rhs.get(1) }; } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return batch { -rhs.get(0), -rhs.get(1) }; } template - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vnegq_f32(rhs); } @@ -743,7 +743,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vaddq, detail::identity_return_type) template = 0> - inline batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher::binary dispatcher = { @@ -761,7 +761,7 @@ namespace xsimd WRAP_BINARY_UINT_EXCLUDING_64(vhaddq, detail::identity_return_type) template ::value && sizeof(T) != 8), void>::type> - inline batch avg(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch avg(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher_impl::binary dispatcher = { @@ -777,7 +777,7 @@ namespace xsimd WRAP_BINARY_UINT_EXCLUDING_64(vrhaddq, detail::identity_return_type) template ::value && sizeof(T) != 8), void>::type> - inline batch avgr(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch avgr(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher_impl::binary dispatcher = { @@ -793,7 +793,7 @@ namespace xsimd WRAP_BINARY_INT(vqaddq, detail::identity_return_type) template = 0> - inline batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher::binary dispatcher = { @@ -812,7 +812,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vsubq, detail::identity_return_type) template = 0> - inline batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher::binary dispatcher = { @@ -830,7 +830,7 @@ namespace xsimd WRAP_BINARY_INT(vqsubq, detail::identity_return_type) template = 0> - inline batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher::binary dispatcher = { @@ -849,7 +849,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vmulq, detail::identity_return_type) template = 0> - inline batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_dispatcher::binary dispatcher = { @@ -865,20 +865,20 @@ namespace xsimd #if defined(XSIMD_FAST_INTEGER_DIVISION) template = 0> - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcvtq_s32_f32(vcvtq_f32_s32(lhs) / vcvtq_f32_s32(rhs)); } template = 0> - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcvtq_u32_f32(vcvtq_f32_u32(lhs) / vcvtq_f32_u32(rhs)); } #endif template - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { // from stackoverflow & https://projectne10.github.io/Ne10/doc/NE10__divc_8neon_8c_source.html // get an initial estimate of 1/b. @@ -902,7 +902,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vceqq, detail::comp_return_type) template = 0> - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_comp_dispatcher::binary dispatcher = { @@ -913,7 +913,7 @@ namespace xsimd } template = 0> - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { using register_type = typename batch_bool::register_type; using dispatcher_type = detail::neon_comp_dispatcher_impl::binary; @@ -924,13 +924,13 @@ namespace xsimd } template = 0> - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return batch_bool({ lhs.get(0) == rhs.get(0), lhs.get(1) == rhs.get(1) }); } template = 0> - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return batch_bool({ lhs.get(0) == rhs.get(0), lhs.get(1) == rhs.get(1) }); } @@ -942,25 +942,25 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return vcvtq_f32_s32(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return vcvtq_f32_u32(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return vcvtq_s32_f32(self); } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return vcvtq_u32_f32(self); } @@ -975,7 +975,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vcltq, detail::comp_return_type) template = 0> - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_comp_dispatcher::binary dispatcher = { @@ -986,7 +986,7 @@ namespace xsimd } template = 0> - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return batch_bool({ lhs.get(0) < rhs.get(0), lhs.get(1) < rhs.get(1) }); } @@ -999,7 +999,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vcleq, detail::comp_return_type) template = 0> - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_comp_dispatcher::binary dispatcher = { @@ -1010,7 +1010,7 @@ namespace xsimd } template = 0> - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return batch_bool({ lhs.get(0) <= rhs.get(0), lhs.get(1) <= rhs.get(1) }); } @@ -1023,7 +1023,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vcgtq, detail::comp_return_type) template = 0> - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_comp_dispatcher::binary dispatcher = { @@ -1034,7 +1034,7 @@ namespace xsimd } template = 0> - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return batch_bool({ lhs.get(0) > rhs.get(0), lhs.get(1) > rhs.get(1) }); } @@ -1047,7 +1047,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vcgeq, detail::comp_return_type) template = 0> - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_comp_dispatcher::binary dispatcher = { @@ -1058,7 +1058,7 @@ namespace xsimd } template = 0> - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return batch_bool({ lhs.get(0) >= rhs.get(0), lhs.get(1) >= rhs.get(1) }); } @@ -1068,7 +1068,7 @@ namespace xsimd *******************/ template - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self); @@ -1082,7 +1082,7 @@ namespace xsimd namespace detail { - inline float32x4_t bitwise_and_f32(float32x4_t lhs, float32x4_t rhs) noexcept + XSIMD_INLINE float32x4_t bitwise_and_f32(float32x4_t lhs, float32x4_t rhs) noexcept { return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(lhs), vreinterpretq_u32_f32(rhs))); @@ -1101,14 +1101,14 @@ namespace xsimd } template = 0> - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; return detail::bitwise_and_neon(register_type(lhs), register_type(rhs)); } template = 0> - inline batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return detail::bitwise_and_neon(register_type(lhs), register_type(rhs)); @@ -1122,14 +1122,14 @@ namespace xsimd namespace detail { - inline float32x4_t bitwise_or_f32(float32x4_t lhs, float32x4_t rhs) noexcept + XSIMD_INLINE float32x4_t bitwise_or_f32(float32x4_t lhs, float32x4_t rhs) noexcept { return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(lhs), vreinterpretq_u32_f32(rhs))); } template - inline V bitwise_or_neon(V const& lhs, V const& rhs) noexcept + XSIMD_INLINE V bitwise_or_neon(V const& lhs, V const& rhs) noexcept { const neon_dispatcher::binary dispatcher = { std::make_tuple(wrap::vorrq_u8, wrap::vorrq_s8, wrap::vorrq_u16, wrap::vorrq_s16, @@ -1141,14 +1141,14 @@ namespace xsimd } template = 0> - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; return detail::bitwise_or_neon(register_type(lhs), register_type(rhs)); } template = 0> - inline batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return detail::bitwise_or_neon(register_type(lhs), register_type(rhs)); @@ -1162,14 +1162,14 @@ namespace xsimd namespace detail { - inline float32x4_t bitwise_xor_f32(float32x4_t lhs, float32x4_t rhs) noexcept + XSIMD_INLINE float32x4_t bitwise_xor_f32(float32x4_t lhs, float32x4_t rhs) noexcept { return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(lhs), vreinterpretq_u32_f32(rhs))); } template - inline V bitwise_xor_neon(V const& lhs, V const& rhs) noexcept + XSIMD_INLINE V bitwise_xor_neon(V const& lhs, V const& rhs) noexcept { const neon_dispatcher::binary dispatcher = { std::make_tuple(wrap::veorq_u8, wrap::veorq_s8, wrap::veorq_u16, wrap::veorq_s16, @@ -1181,14 +1181,14 @@ namespace xsimd } template = 0> - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; return detail::bitwise_xor_neon(register_type(lhs), register_type(rhs)); } template = 0> - inline batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return detail::bitwise_xor_neon(register_type(lhs), register_type(rhs)); @@ -1199,7 +1199,7 @@ namespace xsimd *******/ template - inline batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return bitwise_xor(lhs, rhs, A {}); } @@ -1212,23 +1212,23 @@ namespace xsimd namespace detail { - inline int64x2_t bitwise_not_s64(int64x2_t arg) noexcept + XSIMD_INLINE int64x2_t bitwise_not_s64(int64x2_t arg) noexcept { return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(arg))); } - inline uint64x2_t bitwise_not_u64(uint64x2_t arg) noexcept + XSIMD_INLINE uint64x2_t bitwise_not_u64(uint64x2_t arg) noexcept { return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(arg))); } - inline float32x4_t bitwise_not_f32(float32x4_t arg) noexcept + XSIMD_INLINE float32x4_t bitwise_not_f32(float32x4_t arg) noexcept { return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(arg))); } template - inline V bitwise_not_neon(V const& arg) noexcept + XSIMD_INLINE V bitwise_not_neon(V const& arg) noexcept { const neon_dispatcher::unary dispatcher = { std::make_tuple(wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16, @@ -1241,14 +1241,14 @@ namespace xsimd } template = 0> - inline batch bitwise_not(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { using register_type = typename batch::register_type; return detail::bitwise_not_neon(register_type(arg)); } template = 0> - inline batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return detail::bitwise_not_neon(register_type(arg)); @@ -1262,13 +1262,13 @@ namespace xsimd namespace detail { - inline float32x4_t bitwise_andnot_f32(float32x4_t lhs, float32x4_t rhs) noexcept + XSIMD_INLINE float32x4_t bitwise_andnot_f32(float32x4_t lhs, float32x4_t rhs) noexcept { return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(lhs), vreinterpretq_u32_f32(rhs))); } template - inline V bitwise_andnot_neon(V const& lhs, V const& rhs) noexcept + XSIMD_INLINE V bitwise_andnot_neon(V const& lhs, V const& rhs) noexcept { const detail::neon_dispatcher::binary dispatcher = { std::make_tuple(wrap::vbicq_u8, wrap::vbicq_s8, wrap::vbicq_u16, wrap::vbicq_s16, @@ -1280,14 +1280,14 @@ namespace xsimd } template = 0> - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; return detail::bitwise_andnot_neon(register_type(lhs), register_type(rhs)); } template = 0> - inline batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return detail::bitwise_andnot_neon(register_type(lhs), register_type(rhs)); @@ -1301,7 +1301,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vminq, detail::identity_return_type) template = 0> - inline batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_dispatcher::binary dispatcher = { @@ -1312,7 +1312,7 @@ namespace xsimd } template = 0> - inline batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { return { std::min(lhs.get(0), rhs.get(0)), std::min(lhs.get(1), rhs.get(1)) }; } @@ -1325,7 +1325,7 @@ namespace xsimd WRAP_BINARY_FLOAT(vmaxq, detail::identity_return_type) template = 0> - inline batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_dispatcher::binary dispatcher = { @@ -1336,7 +1336,7 @@ namespace xsimd } template = 0> - inline batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { return { std::max(lhs.get(0), rhs.get(0)), std::max(lhs.get(1), rhs.get(1)) }; } @@ -1347,32 +1347,32 @@ namespace xsimd namespace wrap { - inline int8x16_t vabsq_s8(int8x16_t a) noexcept { return ::vabsq_s8(a); } - inline int16x8_t vabsq_s16(int16x8_t a) noexcept { return ::vabsq_s16(a); } - inline int32x4_t vabsq_s32(int32x4_t a) noexcept { return ::vabsq_s32(a); } + XSIMD_INLINE int8x16_t vabsq_s8(int8x16_t a) noexcept { return ::vabsq_s8(a); } + XSIMD_INLINE int16x8_t vabsq_s16(int16x8_t a) noexcept { return ::vabsq_s16(a); } + XSIMD_INLINE int32x4_t vabsq_s32(int32x4_t a) noexcept { return ::vabsq_s32(a); } } WRAP_UNARY_FLOAT(vabsq) namespace detail { - inline uint8x16_t abs_u8(uint8x16_t arg) noexcept + XSIMD_INLINE uint8x16_t abs_u8(uint8x16_t arg) noexcept { return arg; } - inline uint16x8_t abs_u16(uint16x8_t arg) noexcept + XSIMD_INLINE uint16x8_t abs_u16(uint16x8_t arg) noexcept { return arg; } - inline uint32x4_t abs_u32(uint32x4_t arg) noexcept + XSIMD_INLINE uint32x4_t abs_u32(uint32x4_t arg) noexcept { return arg; } } template = 0> - inline batch abs(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::excluding_int64_dispatcher::unary dispatcher = { @@ -1387,7 +1387,7 @@ namespace xsimd ********/ template - inline batch rsqrt(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& arg, requires_arch) noexcept { return vrsqrteq_f32(arg); } @@ -1397,7 +1397,7 @@ namespace xsimd ********/ template - inline batch sqrt(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& arg, requires_arch) noexcept { batch sqrt_reciprocal = vrsqrteq_f32(arg); // one iter @@ -1413,13 +1413,13 @@ namespace xsimd #ifdef __ARM_FEATURE_FMA template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return vfmaq_f32(z, x, y); } template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return vfmaq_f32(-z, x, y); } @@ -1430,7 +1430,7 @@ namespace xsimd *********/ template - inline batch haddp(const batch* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(const batch* row, requires_arch) noexcept { // row = (a,b,c,d) float32x2_t tmp1, tmp2, tmp3; @@ -1455,7 +1455,7 @@ namespace xsimd **************/ template - inline batch + XSIMD_INLINE batch reciprocal(const batch& x, kernel::requires_arch) noexcept { @@ -1467,55 +1467,55 @@ namespace xsimd **********/ template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_u8(val, self, I); } template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_s8(val, self, I); } template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_u16(val, self, I); } template = 0> - inline batch insert(batch const& self, int16_t val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, int16_t val, index, requires_arch) noexcept { return vsetq_lane_s16(val, self, I); } template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_u32(val, self, I); } template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_s32(val, self, I); } template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_u64(val, self, I); } template = 0> - inline batch insert(batch const& self, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept { return vsetq_lane_s64(val, self, I); } template - inline batch insert(batch const& self, float val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, float val, index, requires_arch) noexcept { return vsetq_lane_f32(val, self, I); } @@ -1525,8 +1525,8 @@ namespace xsimd *******************/ template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { /* origin: https://github.com/DLTcollab/sse2neon/blob/cad518a93b326f0f644b7972d488d04eaa2b0475/sse2neon.h#L4028-L4047 */ // Contributors to this work are: @@ -1595,7 +1595,7 @@ namespace xsimd namespace detail { template - inline T sum_batch(V const& arg) noexcept + XSIMD_INLINE T sum_batch(V const& arg) noexcept { T res = T(0); for (std::size_t i = 0; i < batch::size; ++i) @@ -1607,7 +1607,7 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { uint8x8_t tmp = vpadd_u8(vget_low_u8(arg), vget_high_u8(arg)); tmp = vpadd_u8(tmp, tmp); @@ -1617,7 +1617,7 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { int8x8_t tmp = vpadd_s8(vget_low_s8(arg), vget_high_s8(arg)); tmp = vpadd_s8(tmp, tmp); @@ -1627,7 +1627,7 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { uint16x4_t tmp = vpadd_u16(vget_low_u16(arg), vget_high_u16(arg)); tmp = vpadd_u16(tmp, tmp); @@ -1636,7 +1636,7 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { int16x4_t tmp = vpadd_s16(vget_low_s16(arg), vget_high_s16(arg)); tmp = vpadd_s16(tmp, tmp); @@ -1645,7 +1645,7 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { uint32x2_t tmp = vpadd_u32(vget_low_u32(arg), vget_high_u32(arg)); tmp = vpadd_u32(tmp, tmp); @@ -1653,7 +1653,7 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { int32x2_t tmp = vpadd_s32(vget_low_s32(arg), vget_high_s32(arg)); tmp = vpadd_s32(tmp, tmp); @@ -1661,13 +1661,13 @@ namespace xsimd } template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { return arg.get(0) + arg.get(1); } template - inline float reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& arg, requires_arch) noexcept { float32x2_t tmp = vpadd_f32(vget_low_f32(arg), vget_high_f32(arg)); tmp = vpadd_f32(tmp, tmp); @@ -1694,15 +1694,15 @@ namespace xsimd namespace wrap { - inline uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) noexcept { return ::vbslq_u8(a, b, c); } - inline int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) noexcept { return ::vbslq_s8(a, b, c); } - inline uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) noexcept { return ::vbslq_u16(a, b, c); } - inline int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) noexcept { return ::vbslq_s16(a, b, c); } - inline uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) noexcept { return ::vbslq_u32(a, b, c); } - inline int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) noexcept { return ::vbslq_s32(a, b, c); } - inline uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) noexcept { return ::vbslq_u64(a, b, c); } - inline int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) noexcept { return ::vbslq_s64(a, b, c); } - inline float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) noexcept { return ::vbslq_f32(a, b, c); } + XSIMD_INLINE uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) noexcept { return ::vbslq_u8(a, b, c); } + XSIMD_INLINE int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) noexcept { return ::vbslq_s8(a, b, c); } + XSIMD_INLINE uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) noexcept { return ::vbslq_u16(a, b, c); } + XSIMD_INLINE int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) noexcept { return ::vbslq_s16(a, b, c); } + XSIMD_INLINE uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) noexcept { return ::vbslq_u32(a, b, c); } + XSIMD_INLINE int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) noexcept { return ::vbslq_s32(a, b, c); } + XSIMD_INLINE uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) noexcept { return ::vbslq_u64(a, b, c); } + XSIMD_INLINE int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) noexcept { return ::vbslq_s64(a, b, c); } + XSIMD_INLINE float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) noexcept { return ::vbslq_f32(a, b, c); } } namespace detail @@ -1730,7 +1730,7 @@ namespace xsimd } template = 0> - inline batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept { using bool_register_type = typename batch_bool::register_type; using register_type = typename batch::register_type; @@ -1743,7 +1743,7 @@ namespace xsimd } template = 0> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { b... }, true_br, false_br, neon {}); } @@ -1753,61 +1753,61 @@ namespace xsimd **********/ template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { uint8x8x2_t tmp = vzip_u8(vget_low_u8(lhs), vget_low_u8(rhs)); return vcombine_u8(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { int8x8x2_t tmp = vzip_s8(vget_low_s8(lhs), vget_low_s8(rhs)); return vcombine_s8(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { uint16x4x2_t tmp = vzip_u16(vget_low_u16(lhs), vget_low_u16(rhs)); return vcombine_u16(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { int16x4x2_t tmp = vzip_s16(vget_low_s16(lhs), vget_low_s16(rhs)); return vcombine_s16(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { uint32x2x2_t tmp = vzip_u32(vget_low_u32(lhs), vget_low_u32(rhs)); return vcombine_u32(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { int32x2x2_t tmp = vzip_s32(vget_low_s32(lhs), vget_low_s32(rhs)); return vcombine_s32(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcombine_u64(vget_low_u64(lhs), vget_low_u64(rhs)); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcombine_s64(vget_low_s64(lhs), vget_low_s64(rhs)); } template - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { float32x2x2_t tmp = vzip_f32(vget_low_f32(lhs), vget_low_f32(rhs)); return vcombine_f32(tmp.val[0], tmp.val[1]); @@ -1818,61 +1818,61 @@ namespace xsimd **********/ template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { uint8x8x2_t tmp = vzip_u8(vget_high_u8(lhs), vget_high_u8(rhs)); return vcombine_u8(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { int8x8x2_t tmp = vzip_s8(vget_high_s8(lhs), vget_high_s8(rhs)); return vcombine_s8(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { uint16x4x2_t tmp = vzip_u16(vget_high_u16(lhs), vget_high_u16(rhs)); return vcombine_u16(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { int16x4x2_t tmp = vzip_s16(vget_high_s16(lhs), vget_high_s16(rhs)); return vcombine_s16(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { uint32x2x2_t tmp = vzip_u32(vget_high_u32(lhs), vget_high_u32(rhs)); return vcombine_u32(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { int32x2x2_t tmp = vzip_s32(vget_high_s32(lhs), vget_high_s32(rhs)); return vcombine_s32(tmp.val[0], tmp.val[1]); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcombine_u64(vget_high_u64(lhs), vget_high_u64(rhs)); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcombine_s64(vget_high_s64(lhs), vget_high_s64(rhs)); } template - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { float32x2x2_t tmp = vzip_f32(vget_high_f32(lhs), vget_high_f32(rhs)); return vcombine_f32(tmp.val[0], tmp.val[1]); @@ -1885,14 +1885,14 @@ namespace xsimd namespace detail { template - inline batch extract_pair(batch const&, batch const& /*rhs*/, std::size_t, ::xsimd::detail::index_sequence<>) noexcept + XSIMD_INLINE batch extract_pair(batch const&, batch const& /*rhs*/, std::size_t, ::xsimd::detail::index_sequence<>) noexcept { assert(false && "extract_pair out of bounds"); return batch {}; } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1905,7 +1905,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1918,7 +1918,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1931,7 +1931,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1944,7 +1944,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1957,7 +1957,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1970,7 +1970,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1983,7 +1983,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1996,7 +1996,7 @@ namespace xsimd } template - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -2009,7 +2009,7 @@ namespace xsimd } template - inline batch extract_pair_impl(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence<0, Is...>) noexcept + XSIMD_INLINE batch extract_pair_impl(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence<0, Is...>) noexcept { if (n == 0) { @@ -2023,7 +2023,7 @@ namespace xsimd } template - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept { constexpr std::size_t size = batch::size; assert(n < size && "index in bounds"); @@ -2037,14 +2037,14 @@ namespace xsimd namespace detail { template - inline batch bitwise_lshift(batch const& /*lhs*/, int /*n*/, ::xsimd::detail::int_sequence<>) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& /*lhs*/, int /*n*/, ::xsimd::detail::int_sequence<>) noexcept { assert(false && "bitwise_lshift out of bounds"); return batch {}; } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2057,7 +2057,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2070,7 +2070,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2083,7 +2083,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2096,7 +2096,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2109,7 +2109,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2122,7 +2122,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2135,7 +2135,7 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2148,7 +2148,7 @@ namespace xsimd } template - inline batch bitwise_lshift_impl(batch const& lhs, int n, ::xsimd::detail::int_sequence<0, Is...>) noexcept + XSIMD_INLINE batch bitwise_lshift_impl(batch const& lhs, int n, ::xsimd::detail::int_sequence<0, Is...>) noexcept { if (n == 0) { @@ -2162,7 +2162,7 @@ namespace xsimd } template - inline batch bitwise_lshift(batch const& lhs, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, int n, requires_arch) noexcept { constexpr int size = sizeof(typename batch::value_type) * 8; assert(0 <= n && n < size && "index in bounds"); @@ -2170,49 +2170,49 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u8(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s8(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u16(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s16(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u32(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s32(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u64(lhs, rhs); } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s64(lhs, rhs); } @@ -2224,14 +2224,14 @@ namespace xsimd namespace detail { template - inline batch bitwise_rshift(batch const& /*lhs*/, int /*n*/, ::xsimd::detail::int_sequence<>) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& /*lhs*/, int /*n*/, ::xsimd::detail::int_sequence<>) noexcept { assert(false && "bitwise_rshift out of bounds"); return batch {}; } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2244,7 +2244,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2257,7 +2257,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2270,7 +2270,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2283,7 +2283,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2296,7 +2296,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2309,7 +2309,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2322,7 +2322,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, ::xsimd::detail::int_sequence) noexcept { if (n == I) { @@ -2335,7 +2335,7 @@ namespace xsimd } template - inline batch bitwise_rshift_impl(batch const& lhs, int n, ::xsimd::detail::int_sequence<0, Is...>) noexcept + XSIMD_INLINE batch bitwise_rshift_impl(batch const& lhs, int n, ::xsimd::detail::int_sequence<0, Is...>) noexcept { if (n == 0) { @@ -2349,7 +2349,7 @@ namespace xsimd } template - inline batch bitwise_rshift(batch const& lhs, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, requires_arch) noexcept { constexpr int size = sizeof(typename batch::value_type) * 8; assert(0 <= n && n < size && "index in bounds"); @@ -2357,37 +2357,37 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u8(lhs, vnegq_s8(rhs)); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s8(lhs, vnegq_s8(rhs)); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u16(lhs, vnegq_s16(rhs)); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s16(lhs, vnegq_s16(rhs)); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u32(lhs, vnegq_s32(rhs)); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s32(lhs, vnegq_s32(rhs)); } @@ -2399,26 +2399,26 @@ namespace xsimd *******/ template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { uint64x1_t tmp = vand_u64(vget_low_u64(arg), vget_high_u64(arg)); return vget_lane_u64(tmp, 0) == ~0ULL; } template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return all(batch_bool(vreinterpretq_u64_u8(arg)), neon {}); } template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return all(batch_bool(vreinterpretq_u64_u16(arg)), neon {}); } template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return all(batch_bool(vreinterpretq_u64_u32(arg)), neon {}); } @@ -2428,26 +2428,26 @@ namespace xsimd *******/ template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { uint32x2_t tmp = vqmovn_u64(arg); return vget_lane_u64(vreinterpret_u64_u32(tmp), 0) != 0; } template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return any(batch_bool(vreinterpretq_u64_u8(arg)), neon {}); } template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return any(batch_bool(vreinterpretq_u64_u16(arg)), neon {}); } template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return any(batch_bool(vreinterpretq_u64_u32(arg)), neon {}); } @@ -2456,45 +2456,45 @@ namespace xsimd * bitwise_cast * ****************/ -#define WRAP_CAST(SUFFIX, TYPE) \ - namespace wrap \ - { \ - inline TYPE vreinterpretq_##SUFFIX##_u8(uint8x16_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_u8(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_s8(int8x16_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_s8(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_u16(uint16x8_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_u16(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_s16(int16x8_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_s16(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_u32(uint32x4_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_u32(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_s32(int32x4_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_s32(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_u64(uint64x2_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_u64(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_s64(int64x2_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_s64(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_f32(float32x4_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_f32(a); \ - } \ +#define WRAP_CAST(SUFFIX, TYPE) \ + namespace wrap \ + { \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_u8(uint8x16_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_u8(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_s8(int8x16_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_s8(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_u16(uint16x8_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_u16(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_s16(int16x8_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_s16(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_u32(uint32x4_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_u32(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_s32(int32x4_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_s32(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_u64(uint64x2_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_u64(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_s64(int64x2_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_s64(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_f32(float32x4_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_f32(a); \ + } \ } WRAP_CAST(u8, uint8x16_t) @@ -2527,7 +2527,7 @@ namespace xsimd }; template - inline const bitwise_caster_impl make_bitwise_caster_impl(R (*... arg)(T)) noexcept + XSIMD_INLINE const bitwise_caster_impl make_bitwise_caster_impl(R (*... arg)(T)) noexcept { return { std::make_tuple(arg...) }; } @@ -2566,7 +2566,7 @@ namespace xsimd } template - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { const detail::neon_bitwise_caster caster = { std::make_tuple( @@ -2608,7 +2608,7 @@ namespace xsimd *********/ template - inline batch_bool isnan(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& arg, requires_arch) noexcept { return !(arg == arg); } @@ -2620,7 +2620,7 @@ namespace xsimd struct slider_left { template - inline batch operator()(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch operator()(batch const& x, requires_arch) noexcept { const auto left = vdupq_n_u8(0); const auto right = bitwise_cast(x).data; @@ -2633,7 +2633,7 @@ namespace xsimd struct slider_left<0> { template - inline batch operator()(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch operator()(batch const& x, requires_arch) noexcept { return x; } @@ -2641,7 +2641,7 @@ namespace xsimd } // namespace detail template - inline batch slide_left(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept { return detail::slider_left {}(x, A {}); } @@ -2653,7 +2653,7 @@ namespace xsimd struct slider_right { template - inline batch operator()(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch operator()(batch const& x, requires_arch) noexcept { const auto left = bitwise_cast(x).data; const auto right = vdupq_n_u8(0); @@ -2666,7 +2666,7 @@ namespace xsimd struct slider_right<16> { template - inline batch operator()(batch const&, requires_arch) noexcept + XSIMD_INLINE batch operator()(batch const&, requires_arch) noexcept { return batch {}; } @@ -2674,7 +2674,7 @@ namespace xsimd } // namespace detail template - inline batch slide_right(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept { return detail::slider_right {}(x, A {}); } @@ -2685,27 +2685,27 @@ namespace xsimd namespace wrap { template - inline uint8x16_t rotate_right_u8(uint8x16_t a, uint8x16_t b) noexcept { return vextq_u8(a, b, N); } + XSIMD_INLINE uint8x16_t rotate_right_u8(uint8x16_t a, uint8x16_t b) noexcept { return vextq_u8(a, b, N); } template - inline int8x16_t rotate_right_s8(int8x16_t a, int8x16_t b) noexcept { return vextq_s8(a, b, N); } + XSIMD_INLINE int8x16_t rotate_right_s8(int8x16_t a, int8x16_t b) noexcept { return vextq_s8(a, b, N); } template - inline uint16x8_t rotate_right_u16(uint16x8_t a, uint16x8_t b) noexcept { return vextq_u16(a, b, N); } + XSIMD_INLINE uint16x8_t rotate_right_u16(uint16x8_t a, uint16x8_t b) noexcept { return vextq_u16(a, b, N); } template - inline int16x8_t rotate_right_s16(int16x8_t a, int16x8_t b) noexcept { return vextq_s16(a, b, N); } + XSIMD_INLINE int16x8_t rotate_right_s16(int16x8_t a, int16x8_t b) noexcept { return vextq_s16(a, b, N); } template - inline uint32x4_t rotate_right_u32(uint32x4_t a, uint32x4_t b) noexcept { return vextq_u32(a, b, N); } + XSIMD_INLINE uint32x4_t rotate_right_u32(uint32x4_t a, uint32x4_t b) noexcept { return vextq_u32(a, b, N); } template - inline int32x4_t rotate_right_s32(int32x4_t a, int32x4_t b) noexcept { return vextq_s32(a, b, N); } + XSIMD_INLINE int32x4_t rotate_right_s32(int32x4_t a, int32x4_t b) noexcept { return vextq_s32(a, b, N); } template - inline uint64x2_t rotate_right_u64(uint64x2_t a, uint64x2_t b) noexcept { return vextq_u64(a, b, N); } + XSIMD_INLINE uint64x2_t rotate_right_u64(uint64x2_t a, uint64x2_t b) noexcept { return vextq_u64(a, b, N); } template - inline int64x2_t rotate_right_s64(int64x2_t a, int64x2_t b) noexcept { return vextq_s64(a, b, N); } + XSIMD_INLINE int64x2_t rotate_right_s64(int64x2_t a, int64x2_t b) noexcept { return vextq_s64(a, b, N); } template - inline float32x4_t rotate_right_f32(float32x4_t a, float32x4_t b) noexcept { return vextq_f32(a, b, N); } + XSIMD_INLINE float32x4_t rotate_right_f32(float32x4_t a, float32x4_t b) noexcept { return vextq_f32(a, b, N); } } template = 0> - inline batch rotate_right(batch const& a, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& a, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_dispatcher::binary dispatcher = { @@ -2727,9 +2727,9 @@ namespace xsimd ***********/ template - inline batch swizzle(batch const& self, - batch_constant, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant, + requires_arch) noexcept { static_assert(batch::size == sizeof...(idx), "valid swizzle indices"); std::array::size> data; diff --git a/include/xsimd/arch/xsimd_neon64.hpp b/include/xsimd/arch/xsimd_neon64.hpp index 2469b14f3..d09997033 100644 --- a/include/xsimd/arch/xsimd_neon64.hpp +++ b/include/xsimd/arch/xsimd_neon64.hpp @@ -33,25 +33,25 @@ namespace xsimd *******/ template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return vminvq_u32(arg) == ~0U; } template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return all(batch_bool(vreinterpretq_u32_u8(arg)), neon64 {}); } template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return all(batch_bool(vreinterpretq_u32_u16(arg)), neon64 {}); } template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return all(batch_bool(vreinterpretq_u32_u64(arg)), neon64 {}); } @@ -61,25 +61,25 @@ namespace xsimd *******/ template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return vmaxvq_u32(arg) != 0; } template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return any(batch_bool(vreinterpretq_u32_u8(arg)), neon64 {}); } template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return any(batch_bool(vreinterpretq_u32_u16(arg)), neon64 {}); } template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return any(batch_bool(vreinterpretq_u32_u64(arg)), neon64 {}); } @@ -90,13 +90,13 @@ namespace xsimd // Required to avoid ambiguous call template - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return broadcast(val, neon {}); } template - inline batch broadcast(double val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(double val, requires_arch) noexcept { return vdupq_n_f64(val); } @@ -106,13 +106,13 @@ namespace xsimd *******/ template - inline batch set(batch const&, requires_arch, double d0, double d1) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, double d0, double d1) noexcept { return float64x2_t { d0, d1 }; } template - inline batch_bool set(batch_bool const&, requires_arch, bool b0, bool b1) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, bool b0, bool b1) noexcept { using register_type = typename batch_bool::register_type; using unsigned_type = as_unsigned_integer_t; @@ -125,7 +125,7 @@ namespace xsimd *************/ template - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return vreinterpretq_f64_u64(vandq_u64(arg, vreinterpretq_u64_f64(vdupq_n_f64(1.)))); } @@ -142,13 +142,13 @@ namespace xsimd #endif template - inline batch load_aligned(double const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(double const* src, convert, requires_arch) noexcept { return xsimd_aligned_load(vld1q_f64, double*, src); } template - inline batch load_unaligned(double const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(double const* src, convert, requires_arch) noexcept { return vld1q_f64(src); } @@ -159,13 +159,13 @@ namespace xsimd *********/ template - inline void store_aligned(double* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(double* dst, batch const& src, requires_arch) noexcept { vst1q_f64(dst, src); } template - inline void store_unaligned(double* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(double* dst, batch const& src, requires_arch) noexcept { return store_aligned(dst, src, A {}); } @@ -175,7 +175,7 @@ namespace xsimd ****************/ template - inline batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept { using real_batch = batch; const double* buf = reinterpret_cast(mem); @@ -186,7 +186,7 @@ namespace xsimd } template - inline batch, A> load_complex_unaligned(std::complex const* mem, convert> cvt, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_unaligned(std::complex const* mem, convert> cvt, requires_arch) noexcept { return load_complex_aligned(mem, cvt, A {}); } @@ -196,7 +196,7 @@ namespace xsimd *****************/ template - inline void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { float64x2x2_t tmp; tmp.val[0] = src.real(); @@ -206,7 +206,7 @@ namespace xsimd } template - inline void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { store_complex_aligned(dst, src, A {}); } @@ -216,19 +216,19 @@ namespace xsimd *******/ template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vreinterpretq_u64_s64(vnegq_s64(vreinterpretq_s64_u64(rhs))); } template = 0> - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vnegq_s64(rhs); } template - inline batch neg(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& rhs, requires_arch) noexcept { return vnegq_f64(rhs); } @@ -238,7 +238,7 @@ namespace xsimd *******/ template - inline batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vaddq_f64(lhs, rhs); } @@ -248,7 +248,7 @@ namespace xsimd ********/ template - inline batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept { return add(lhs, rhs, neon64 {}); } @@ -258,7 +258,7 @@ namespace xsimd *******/ template - inline batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vsubq_f64(lhs, rhs); } @@ -268,7 +268,7 @@ namespace xsimd ********/ template - inline batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return sub(lhs, rhs, neon64 {}); } @@ -278,7 +278,7 @@ namespace xsimd *******/ template - inline batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vmulq_f64(lhs, rhs); } @@ -289,19 +289,19 @@ namespace xsimd #if defined(XSIMD_FAST_INTEGER_DIVISION) template = 0> - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcvtq_u64_f64(vcvtq_f64_u64(lhs) / vcvtq_f64_u64(rhs)); } template = 0> - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcvtq_s64_f64(vcvtq_f64_s64(lhs) / vcvtq_f64_s64(rhs)); } #endif template - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vdivq_f64(lhs, rhs); } @@ -311,37 +311,37 @@ namespace xsimd ******/ template = 0> - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vceqq_u64(lhs, rhs); } template = 0> - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vceqq_s64(lhs, rhs); } template - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vceqq_f64(lhs, rhs); } template = 0> - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return vceqq_u64(lhs, rhs); } template = 0> - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return vceqq_u64(lhs, rhs); } template - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return vceqq_u64(lhs, rhs); } @@ -352,25 +352,25 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { return vcvtq_f64_s64(x); } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { return vcvtq_f64_u64(x); } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { return vcvtq_s64_f64(x); } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { return vcvtq_u64_f64(x); } @@ -382,19 +382,19 @@ namespace xsimd ******/ template = 0> - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcltq_u64(lhs, rhs); } template = 0> - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcltq_s64(lhs, rhs); } template - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcltq_f64(lhs, rhs); } @@ -404,19 +404,19 @@ namespace xsimd ******/ template = 0> - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcleq_u64(lhs, rhs); } template = 0> - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcleq_s64(lhs, rhs); } template - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcleq_f64(lhs, rhs); } @@ -426,19 +426,19 @@ namespace xsimd ******/ template = 0> - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcgtq_u64(lhs, rhs); } template = 0> - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcgtq_s64(lhs, rhs); } template - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcgtq_f64(lhs, rhs); } @@ -448,19 +448,19 @@ namespace xsimd ******/ template = 0> - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcgeq_u64(lhs, rhs); } template = 0> - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcgeq_s64(lhs, rhs); } template - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vcgeq_f64(lhs, rhs); } @@ -470,7 +470,7 @@ namespace xsimd *******************/ template - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept { using register_type = typename batch_bool::register_type; return register_type(self); @@ -481,14 +481,14 @@ namespace xsimd ***************/ template - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(lhs), vreinterpretq_u64_f64(rhs))); } template - inline batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return vandq_u64(lhs, rhs); } @@ -498,14 +498,14 @@ namespace xsimd **************/ template - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(lhs), vreinterpretq_u64_f64(rhs))); } template - inline batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return vorrq_u64(lhs, rhs); } @@ -515,14 +515,14 @@ namespace xsimd ***************/ template - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(lhs), vreinterpretq_u64_f64(rhs))); } template - inline batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return veorq_u64(lhs, rhs); } @@ -532,7 +532,7 @@ namespace xsimd *******/ template - inline batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return bitwise_xor(lhs, rhs, A {}); } @@ -542,13 +542,13 @@ namespace xsimd ***************/ template - inline batch bitwise_not(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& rhs, requires_arch) noexcept { return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(rhs))); } template - inline batch_bool bitwise_not(batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& rhs, requires_arch) noexcept { return detail::bitwise_not_u64(rhs); } @@ -558,14 +558,14 @@ namespace xsimd ******************/ template - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(lhs), vreinterpretq_u64_f64(rhs))); } template - inline batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return vbicq_u64(lhs, rhs); } @@ -575,7 +575,7 @@ namespace xsimd *******/ template - inline batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vminq_f64(lhs, rhs); } @@ -585,7 +585,7 @@ namespace xsimd *******/ template - inline batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vmaxq_f64(lhs, rhs); } @@ -595,34 +595,34 @@ namespace xsimd *******/ template = 0> - inline batch abs(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& rhs, requires_arch) noexcept { return rhs; } template = 0> - inline batch abs(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& rhs, requires_arch) noexcept { return vabsq_s64(rhs); } template - inline batch abs(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& rhs, requires_arch) noexcept { return vabsq_f64(rhs); } template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { return vcvtnq_s32_f32(self); } #if !defined(__GNUC__) template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { return vcvtnq_s64_f64(self); } @@ -633,7 +633,7 @@ namespace xsimd **************/ template - inline batch + XSIMD_INLINE batch reciprocal(const batch& x, kernel::requires_arch) noexcept { @@ -645,7 +645,7 @@ namespace xsimd ********/ template - inline batch rsqrt(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& rhs, requires_arch) noexcept { return vrsqrteq_f64(rhs); } @@ -655,7 +655,7 @@ namespace xsimd ********/ template - inline batch sqrt(batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& rhs, requires_arch) noexcept { return vsqrtq_f64(rhs); } @@ -666,13 +666,13 @@ namespace xsimd #ifdef __ARM_FEATURE_FMA template - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return vfmaq_f64(z, x, y); } template - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return vfmaq_f64(-z, x, y); } @@ -683,7 +683,7 @@ namespace xsimd *********/ template - inline batch haddp(const batch* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(const batch* row, requires_arch) noexcept { return vpaddq_f64(row[0], row[1]); } @@ -693,7 +693,7 @@ namespace xsimd **********/ template - inline batch insert(batch const& self, double val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, double val, index, requires_arch) noexcept { return vsetq_lane_f64(val, self, I); } @@ -705,60 +705,60 @@ namespace xsimd // Wrap reducer intrinsics so we can pass them as function pointers // - OP: intrinsics name prefix, e.g., vorrq -#define WRAP_REDUCER_INT_EXCLUDING_64(OP) \ - namespace wrap \ - { \ - inline uint8_t OP##_u8(uint8x16_t a) noexcept \ - { \ - return ::OP##_u8(a); \ - } \ - inline int8_t OP##_s8(int8x16_t a) noexcept \ - { \ - return ::OP##_s8(a); \ - } \ - inline uint16_t OP##_u16(uint16x8_t a) noexcept \ - { \ - return ::OP##_u16(a); \ - } \ - inline int16_t OP##_s16(int16x8_t a) noexcept \ - { \ - return ::OP##_s16(a); \ - } \ - inline uint32_t OP##_u32(uint32x4_t a) noexcept \ - { \ - return ::OP##_u32(a); \ - } \ - inline int32_t OP##_s32(int32x4_t a) noexcept \ - { \ - return ::OP##_s32(a); \ - } \ +#define WRAP_REDUCER_INT_EXCLUDING_64(OP) \ + namespace wrap \ + { \ + XSIMD_INLINE uint8_t OP##_u8(uint8x16_t a) noexcept \ + { \ + return ::OP##_u8(a); \ + } \ + XSIMD_INLINE int8_t OP##_s8(int8x16_t a) noexcept \ + { \ + return ::OP##_s8(a); \ + } \ + XSIMD_INLINE uint16_t OP##_u16(uint16x8_t a) noexcept \ + { \ + return ::OP##_u16(a); \ + } \ + XSIMD_INLINE int16_t OP##_s16(int16x8_t a) noexcept \ + { \ + return ::OP##_s16(a); \ + } \ + XSIMD_INLINE uint32_t OP##_u32(uint32x4_t a) noexcept \ + { \ + return ::OP##_u32(a); \ + } \ + XSIMD_INLINE int32_t OP##_s32(int32x4_t a) noexcept \ + { \ + return ::OP##_s32(a); \ + } \ } -#define WRAP_REDUCER_INT(OP) \ - WRAP_REDUCER_INT_EXCLUDING_64(OP) \ - namespace wrap \ - { \ - inline uint64_t OP##_u64(uint64x2_t a) noexcept \ - { \ - return ::OP##_u64(a); \ - } \ - inline int64_t OP##_s64(int64x2_t a) noexcept \ - { \ - return ::OP##_s64(a); \ - } \ +#define WRAP_REDUCER_INT(OP) \ + WRAP_REDUCER_INT_EXCLUDING_64(OP) \ + namespace wrap \ + { \ + XSIMD_INLINE uint64_t OP##_u64(uint64x2_t a) noexcept \ + { \ + return ::OP##_u64(a); \ + } \ + XSIMD_INLINE int64_t OP##_s64(int64x2_t a) noexcept \ + { \ + return ::OP##_s64(a); \ + } \ } -#define WRAP_REDUCER_FLOAT(OP) \ - namespace wrap \ - { \ - inline float OP##_f32(float32x4_t a) noexcept \ - { \ - return ::OP##_f32(a); \ - } \ - inline double OP##_f64(float64x2_t a) noexcept \ - { \ - return ::OP##_f64(a); \ - } \ +#define WRAP_REDUCER_FLOAT(OP) \ + namespace wrap \ + { \ + XSIMD_INLINE float OP##_f32(float32x4_t a) noexcept \ + { \ + return ::OP##_f32(a); \ + } \ + XSIMD_INLINE double OP##_f64(float64x2_t a) noexcept \ + { \ + return ::OP##_f64(a); \ + } \ } namespace detail @@ -852,7 +852,7 @@ namespace xsimd WRAP_REDUCER_FLOAT(vaddvq) template = 0> - inline typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_add(batch const& arg, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_reducer_dispatcher::unary dispatcher = { @@ -872,19 +872,19 @@ namespace xsimd namespace wrap { - inline uint64_t vmaxvq_u64(uint64x2_t a) noexcept + XSIMD_INLINE uint64_t vmaxvq_u64(uint64x2_t a) noexcept { return std::max(vdupd_laneq_u64(a, 0), vdupd_laneq_u64(a, 1)); } - inline int64_t vmaxvq_s64(int64x2_t a) noexcept + XSIMD_INLINE int64_t vmaxvq_s64(int64x2_t a) noexcept { return std::max(vdupd_laneq_s64(a, 0), vdupd_laneq_s64(a, 1)); } } template = 0> - inline typename batch::value_type reduce_max(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_max(batch const& arg, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_reducer_dispatcher::unary dispatcher = { @@ -904,19 +904,19 @@ namespace xsimd namespace wrap { - inline uint64_t vminvq_u64(uint64x2_t a) noexcept + XSIMD_INLINE uint64_t vminvq_u64(uint64x2_t a) noexcept { return std::min(vdupd_laneq_u64(a, 0), vdupd_laneq_u64(a, 1)); } - inline int64_t vminvq_s64(int64x2_t a) noexcept + XSIMD_INLINE int64_t vminvq_s64(int64x2_t a) noexcept { return std::min(vdupd_laneq_s64(a, 0), vdupd_laneq_s64(a, 1)); } } template = 0> - inline typename batch::value_type reduce_min(batch const& arg, requires_arch) noexcept + XSIMD_INLINE typename batch::value_type reduce_min(batch const& arg, requires_arch) noexcept { using register_type = typename batch::register_type; const detail::neon_reducer_dispatcher::unary dispatcher = { @@ -936,16 +936,16 @@ namespace xsimd **********/ template - inline batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept { return vbslq_f64(cond, a, b); } template - inline batch select(batch_bool_constant const&, - batch const& true_br, - batch const& false_br, - requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, + batch const& true_br, + batch const& false_br, + requires_arch) noexcept { return select(batch_bool { b... }, true_br, false_br, neon64 {}); } @@ -953,61 +953,61 @@ namespace xsimd * zip_lo * **********/ template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_u8(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_s8(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_u16(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_s16(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_u32(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_s32(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_u64(lhs, rhs); } template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_s64(lhs, rhs); } template - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_f32(lhs, rhs); } template - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip1q_f64(lhs, rhs); } @@ -1017,61 +1017,61 @@ namespace xsimd **********/ template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_u8(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_s8(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_u16(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_s16(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_u32(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_s32(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_u64(lhs, rhs); } template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_s64(lhs, rhs); } template - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_f32(lhs, rhs); } template - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vzip2q_f64(lhs, rhs); } @@ -1083,8 +1083,8 @@ namespace xsimd namespace detail { template - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, - ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, + ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -1098,7 +1098,7 @@ namespace xsimd } template - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept { constexpr std::size_t size = batch::size; assert(n < size && "index in bounds"); @@ -1110,25 +1110,25 @@ namespace xsimd ******************/ template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, requires_arch) noexcept { return bitwise_rshift(lhs, n, neon {}); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch, A> const& rhs, requires_arch) noexcept { return vshlq_u64(lhs, vnegq_s64(rhs)); } template = 0> - inline batch bitwise_rshift(batch const& lhs, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, int n, requires_arch) noexcept { return bitwise_rshift(lhs, n, neon {}); } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return vshlq_s64(lhs, vnegq_s64(rhs)); } @@ -1137,17 +1137,17 @@ namespace xsimd * bitwise_cast * ****************/ -#define WRAP_CAST(SUFFIX, TYPE) \ - namespace wrap \ - { \ - inline float64x2_t vreinterpretq_f64_##SUFFIX(TYPE a) noexcept \ - { \ - return ::vreinterpretq_f64_##SUFFIX(a); \ - } \ - inline TYPE vreinterpretq_##SUFFIX##_f64(float64x2_t a) noexcept \ - { \ - return ::vreinterpretq_##SUFFIX##_f64(a); \ - } \ +#define WRAP_CAST(SUFFIX, TYPE) \ + namespace wrap \ + { \ + XSIMD_INLINE float64x2_t vreinterpretq_f64_##SUFFIX(TYPE a) noexcept \ + { \ + return ::vreinterpretq_f64_##SUFFIX(a); \ + } \ + XSIMD_INLINE TYPE vreinterpretq_##SUFFIX##_f64(float64x2_t a) noexcept \ + { \ + return ::vreinterpretq_##SUFFIX##_f64(a); \ + } \ } WRAP_CAST(u8, uint8x16_t) @@ -1163,7 +1163,7 @@ namespace xsimd #undef WRAP_CAST template - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { using caster_type = detail::bitwise_caster_impl - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { using caster_type = detail::bitwise_caster_neon64 - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return arg; } @@ -1228,7 +1228,7 @@ namespace xsimd *********/ template - inline batch_bool isnan(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& arg, requires_arch) noexcept { return !(arg == arg); } @@ -1237,7 +1237,7 @@ namespace xsimd * rotate_right * ****************/ template - inline batch rotate_right(batch const& a, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& a, requires_arch) noexcept { return vextq_f64(a, a, N); } @@ -1252,23 +1252,23 @@ namespace xsimd * swizzle (dynamic) * *********************/ template - inline batch swizzle(batch const& self, batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch idx, + requires_arch) noexcept { return vqtbl1q_u8(self, idx); } template - inline batch swizzle(batch const& self, batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch idx, + requires_arch) noexcept { return vqtbl1q_s8(self, idx); } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { using batch_type = batch; using index_type = batch; @@ -1278,17 +1278,17 @@ namespace xsimd } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), idx, neon64 {})); } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { using batch_type = batch; using index_type = batch; @@ -1298,17 +1298,17 @@ namespace xsimd } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), idx, neon64 {})); } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { using batch_type = batch; using index_type = batch; @@ -1318,25 +1318,25 @@ namespace xsimd } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), idx, neon64 {})); } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), idx, neon64 {})); } template - inline batch swizzle(batch const& self, - batch idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch idx, + requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), idx, neon64 {})); } @@ -1388,7 +1388,7 @@ namespace xsimd using index_burst_t = typename index_burst::type; template - inline index_burst_t burst_index(B) + XSIMD_INLINE index_burst_t burst_index(B) { return index_burst_t(); } @@ -1396,106 +1396,106 @@ namespace xsimd template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { return vqtbl1q_u8(self, batch(idx)); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { return vqtbl1q_s8(self, batch(idx)); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_u16_u8(swizzle(batch_type(vreinterpretq_u8_u16(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_s16_s8(swizzle(batch_type(vreinterpretq_s8_s16(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_u32_u8(swizzle(batch_type(vreinterpretq_u8_u32(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_s32_s8(swizzle(batch_type(vreinterpretq_s8_s32(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_u64_u8(swizzle(batch_type(vreinterpretq_u8_u64(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_s64_s8(swizzle(batch_type(vreinterpretq_s8_s64(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_f32_u8(swizzle(batch_type(vreinterpretq_u8_f32(self)), detail::burst_index(idx), A())); } template - inline batch swizzle(batch const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, + batch_constant idx, + requires_arch) noexcept { using batch_type = batch; return vreinterpretq_f64_u8(swizzle(batch_type(vreinterpretq_u8_f64(self)), detail::burst_index(idx), A())); } template - inline batch, A> swizzle(batch, A> const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& self, + batch_constant idx, + requires_arch) noexcept { return batch>(swizzle(self.real(), idx, A()), swizzle(self.imag(), idx, A())); } template - inline batch, A> swizzle(batch, A> const& self, - batch_constant idx, - requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& self, + batch_constant idx, + requires_arch) noexcept { return batch>(swizzle(self.real(), idx, A()), swizzle(self.imag(), idx, A())); } diff --git a/include/xsimd/arch/xsimd_rvv.hpp b/include/xsimd/arch/xsimd_rvv.hpp index 2b8cebe5c..75f1145cd 100644 --- a/include/xsimd/arch/xsimd_rvv.hpp +++ b/include/xsimd/arch/xsimd_rvv.hpp @@ -384,7 +384,7 @@ namespace xsimd } template > - inline batch rvv_to_unsigned_batch(batch const& arg) noexcept + XSIMD_INLINE batch rvv_to_unsigned_batch(batch const& arg) noexcept { return rvvreinterpret(arg.data); } @@ -413,18 +413,18 @@ namespace xsimd , size_t(bvec)); template - inline rvv_bool_t pmask8(uint8_t mask) noexcept + XSIMD_INLINE rvv_bool_t pmask8(uint8_t mask) noexcept { return rvv_bool_t(mask); } template - inline rvv_bool_t pmask(uint64_t mask) noexcept + XSIMD_INLINE rvv_bool_t pmask(uint64_t mask) noexcept { return rvv_bool_t(mask); } template - inline rvv_reg_t vindex() noexcept + XSIMD_INLINE rvv_reg_t vindex() noexcept { auto index = rvvid(T {}); if (shift < 0) @@ -462,7 +462,7 @@ namespace xsimd namespace detail { template - inline detail::rvv_reg_t broadcast(T arg) noexcept + XSIMD_INLINE detail::rvv_reg_t broadcast(T arg) noexcept { // A bit of a dance, here, because rvvmv_splat has no other // argument from which to deduce type, and T=char is not @@ -475,7 +475,7 @@ namespace xsimd // broadcast template - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return detail::broadcast(arg); } @@ -491,13 +491,13 @@ namespace xsimd } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return detail::rvvle(reinterpret_cast const*>(src)); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return load_aligned(src, convert(), rvv {}); } @@ -506,14 +506,14 @@ namespace xsimd namespace detail { template = types::detail::rvv_width_m1, int>::type = 0> - inline rvv_reg_t rvvabut(rvv_reg_t const& lo, rvv_reg_t const& hi) noexcept + XSIMD_INLINE rvv_reg_t rvvabut(rvv_reg_t const& lo, rvv_reg_t const& hi) noexcept { typename rvv_reg_t::register_type tmp; tmp = __riscv_vset(tmp, 0, lo); return __riscv_vset(tmp, 1, hi); } - template ::type = 0> inline rvv_reg_t rvvabut(rvv_reg_t const& lo, rvv_reg_t const& hi) noexcept + template ::type = 0> XSIMD_INLINE rvv_reg_t rvvabut(rvv_reg_t const& lo, rvv_reg_t const& hi) noexcept { return __riscv_vslideup(lo, hi, lo.vl, lo.vl * 2); } @@ -544,7 +544,7 @@ namespace xsimd } template = 0> - inline batch, A> load_complex(batch const& lo, batch const& hi, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& lo, batch const& hi, requires_arch) noexcept { const auto real_index = vindex, 0, 1>(); const auto imag_index = vindex, 1, 1>(); @@ -561,13 +561,13 @@ namespace xsimd *********/ template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { detail::rvvse(reinterpret_cast*>(dst), src); } template = 0> - inline void store_unaligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* dst, batch const& src, requires_arch) noexcept { store_aligned(dst, src, rvv {}); } @@ -590,7 +590,7 @@ namespace xsimd // scatter template = 0> - inline void scatter(batch const& vals, T* dst, batch const& index, kernel::requires_arch) noexcept + XSIMD_INLINE void scatter(batch const& vals, T* dst, batch const& index, kernel::requires_arch) noexcept { using UU = as_unsigned_integer_t; const auto uindex = detail::rvv_to_unsigned_batch(index); @@ -602,7 +602,7 @@ namespace xsimd // gather template = 0> - inline batch gather(batch const&, T const* src, batch const& index, kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, kernel::requires_arch) noexcept { using UU = as_unsigned_integer_t; const auto uindex = detail::rvv_to_unsigned_batch(index); @@ -698,63 +698,63 @@ namespace xsimd // add template = 0> - inline batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvadd(lhs, rhs); } // sadd template = 0> - inline batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvsadd(lhs, rhs); } // sub template = 0> - inline batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvsub(lhs, rhs); } // ssub template = 0> - inline batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvssub(lhs, rhs); } // mul template = 0> - inline batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmul(lhs, rhs); } // div template = 0> - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvdiv(lhs, rhs); } // max template = 0> - inline batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmax(lhs, rhs); } // min template = 0> - inline batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmin(lhs, rhs); } // neg template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { using S = as_signed_integer_t; const auto as_signed = detail::rvvreinterpret(arg); @@ -763,27 +763,27 @@ namespace xsimd } template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return detail::rvvneg(arg); } // abs template = 0> - inline batch abs(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return arg; } template = 0> - inline batch abs(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return detail::rvvabs(arg); } // fma: x * y + z template = 0> - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also detail::rvvmadd(x, y, z); return detail::rvvmacc(z, x, y); @@ -791,7 +791,7 @@ namespace xsimd // fnma: z - x * y template = 0> - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also detail::rvvnmsub(x, y, z); return detail::rvvnmsac(z, x, y); @@ -799,7 +799,7 @@ namespace xsimd // fms: x * y - z template = 0> - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also vfmsac(z, x, y), but lacking integer version // also vfmsub(x, y, z), but lacking integer version @@ -808,7 +808,7 @@ namespace xsimd // fnms: - x * y - z template = 0> - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { // also vfnmacc(z, x, y), but lacking integer version // also vfnmadd(x, y, z), but lacking integer version @@ -835,13 +835,13 @@ namespace xsimd // bitwise_and template = 0> - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvand(lhs, rhs); } template = 0> - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs); @@ -850,21 +850,21 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmand(lhs, rhs); } // bitwise_andnot template = 0> - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto not_rhs = detail::rvvnot(rhs); return detail::rvvand(lhs, not_rhs); } template = 0> - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs); @@ -874,20 +874,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmandn(lhs, rhs); } // bitwise_or template = 0> - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvor(lhs, rhs); } template = 0> - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs); @@ -896,20 +896,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmor(lhs, rhs); } // bitwise_xor template = 0> - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvxor(lhs, rhs); } template = 0> - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs); const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs); @@ -918,20 +918,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmxor(lhs, rhs); } // bitwise_not template = 0> - inline batch bitwise_not(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { return detail::rvvnot(arg); } template = 0> - inline batch bitwise_not(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { const auto arg_bits = detail::rvv_to_unsigned_batch(arg); const auto result_bits = detail::rvvnot(arg_bits); @@ -939,7 +939,7 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept { return detail::rvvmnot(arg); } @@ -962,7 +962,7 @@ namespace xsimd // bitwise_lshift template = 0> - inline batch bitwise_lshift(batch const& arg, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& arg, int n, requires_arch) noexcept { constexpr size_t size = sizeof(typename batch::value_type) * 8; assert(0 <= n && static_cast(n) < size && "index in bounds"); @@ -970,14 +970,14 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvsll(lhs, detail::rvv_to_unsigned_batch(rhs)); } // bitwise_rshift template = 0> - inline batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept { constexpr size_t size = sizeof(typename batch::value_type) * 8; assert(0 <= n && static_cast(n) < size && "index in bounds"); @@ -985,7 +985,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvsr(lhs, detail::rvv_to_unsigned_batch(rhs)); } @@ -1019,14 +1019,14 @@ namespace xsimd (__riscv_vfslide1down), , vec(vec, T)) template - inline T reduce_scalar(rvv_reg_t const& arg) + XSIMD_INLINE T reduce_scalar(rvv_reg_t const& arg) { return detail::rvvmv_lane0(rvv_reg_t(arg.get_bytes(), types::detail::XSIMD_RVV_BITCAST)); } } // reduce_add template ::value_type, detail::rvv_enable_all_t = 0> - inline V reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE V reduce_add(batch const& arg, requires_arch) noexcept { const auto zero = detail::broadcast(T(0)); const auto r = detail::rvvredsum(arg, zero); @@ -1035,7 +1035,7 @@ namespace xsimd // reduce_max template = 0> - inline T reduce_max(batch const& arg, requires_arch) noexcept + XSIMD_INLINE T reduce_max(batch const& arg, requires_arch) noexcept { const auto lowest = detail::broadcast(std::numeric_limits::lowest()); const auto r = detail::rvvredmax(arg, lowest); @@ -1044,7 +1044,7 @@ namespace xsimd // reduce_min template = 0> - inline T reduce_min(batch const& arg, requires_arch) noexcept + XSIMD_INLINE T reduce_min(batch const& arg, requires_arch) noexcept { const auto max = detail::broadcast(std::numeric_limits::max()); const auto r = detail::rvvredmin(arg, max); @@ -1053,7 +1053,7 @@ namespace xsimd // haddp template = 0> - inline batch haddp(const batch* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(const batch* row, requires_arch) noexcept { constexpr std::size_t size = batch::size; T sums[size]; @@ -1071,13 +1071,13 @@ namespace xsimd // eq template = 0> - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmseq(lhs, rhs); } template = 0> - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { const auto neq_result = detail::rvvmxor(lhs, rhs); return detail::rvvmnot(neq_result); @@ -1085,41 +1085,41 @@ namespace xsimd // neq template = 0> - inline batch_bool neq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsne(lhs, rhs); } template = 0> - inline batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return detail::rvvmxor(lhs, rhs); } // lt template = 0> - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmslt(lhs, rhs); } // le template = 0> - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsle(lhs, rhs); } // gt template = 0> - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsgt(lhs, rhs); } // ge template = 0> - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return detail::rvvmsge(lhs, rhs); } @@ -1133,7 +1133,7 @@ namespace xsimd } // compress template - inline batch compress(batch const& x, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch compress(batch const& x, batch_bool const& mask, requires_arch) noexcept { return detail::rvvcompress(x, mask); } @@ -1150,7 +1150,7 @@ namespace xsimd // swizzle template - inline batch swizzle(batch const& arg, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& arg, batch_constant, requires_arch) noexcept { static_assert(batch::size == sizeof...(idx), "invalid swizzle indices"); const batch indices { idx... }; @@ -1158,9 +1158,9 @@ namespace xsimd } template - inline batch, A> swizzle(batch, A> const& self, - batch_constant, - requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& self, + batch_constant, + requires_arch) noexcept { const auto real = swizzle(self.real(), batch_constant {}, rvv {}); const auto imag = swizzle(self.imag(), batch_constant {}, rvv {}); @@ -1174,7 +1174,7 @@ namespace xsimd // extract_pair template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, size_t n, requires_arch) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, size_t n, requires_arch) noexcept { const auto tmp = detail::rvvslidedown(rhs, n); return detail::rvvslideup(tmp, lhs, lhs.size - n); @@ -1182,20 +1182,20 @@ namespace xsimd // select template = 0> - inline batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept { return detail::rvvmerge(b, a, cond); } template - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { b... }, true_br, false_br, rvv {}); } // zip_lo template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto index = detail::vindex, 0, -1>(); const auto mask = detail::pmask8(0xaa); @@ -1206,7 +1206,7 @@ namespace xsimd // zip_hi template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto index = detail::vindex, batch::size / 2, -1>(); const auto mask = detail::pmask8(0xaa); @@ -1217,7 +1217,7 @@ namespace xsimd // store_complex template = 0> - inline void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { const auto lo = zip_lo(src.real(), src.imag()); const auto hi = zip_hi(src.real(), src.imag()); @@ -1227,7 +1227,7 @@ namespace xsimd } template = 0> - inline void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { store_complex_aligned(dst, src, rvv {}); } @@ -1245,7 +1245,7 @@ namespace xsimd // rsqrt template = 0> - inline batch rsqrt(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& arg, requires_arch) noexcept { auto approx = detail::rvvfrsqrt7(arg); approx = approx * (1.5 - (0.5 * arg * approx * approx)); @@ -1254,14 +1254,14 @@ namespace xsimd // sqrt template = 0> - inline batch sqrt(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& arg, requires_arch) noexcept { return detail::rvvfsqrt(arg); } // reciprocal template = 0> - inline batch reciprocal(const batch& arg, requires_arch) noexcept + XSIMD_INLINE batch reciprocal(const batch& arg, requires_arch) noexcept { return detail::rvvfrec7(arg); } @@ -1293,12 +1293,12 @@ namespace xsimd using rvv_enable_itof_t = typename std::enable_if<(sizeof(T) == sizeof(U) && !std::is_floating_point::value && std::is_floating_point::value), int>::type; template = 0> - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return rvvfcvt_rtz(U {}, arg); } template = 0> - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return rvvfcvt_f(arg); } @@ -1310,22 +1310,22 @@ namespace xsimd // set template - inline batch set(batch const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Args... args) noexcept { const std::array::size> tmp { args... }; return load_unaligned(tmp.data(), convert(), rvv {}); } template - inline batch, A> set(batch, A> const&, requires_arch, - Args... args_complex) noexcept + XSIMD_INLINE batch, A> set(batch, A> const&, requires_arch, + Args... args_complex) noexcept { return batch>(set(batch {}, rvv {}, args_complex.real()...), set(batch {}, rvv {}, args_complex.imag()...)); } template - inline batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept { using U = as_unsigned_integer_t; const auto values = set(batch {}, rvv {}, static_cast(args)...); @@ -1336,7 +1336,7 @@ namespace xsimd // insert template = 0> - inline batch insert(batch const& arg, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& arg, T val, index, requires_arch) noexcept { const auto mask = detail::pmask(uint64_t(1) << I); return detail::rvvmerge_splat(arg, val, mask); @@ -1344,14 +1344,14 @@ namespace xsimd // get template = 0> - inline T get(batch const& arg, size_t i, requires_arch) noexcept + XSIMD_INLINE T get(batch const& arg, size_t i, requires_arch) noexcept { const auto tmp = detail::rvvslidedown(arg, i); return detail::rvvmv_lane0(tmp); } template = 0> - inline std::complex get(batch, A> const& arg, size_t i, requires_arch) noexcept + XSIMD_INLINE std::complex get(batch, A> const& arg, size_t i, requires_arch) noexcept { const auto tmpr = detail::rvvslidedown(arg.real(), i); const auto tmpi = detail::rvvslidedown(arg.imag(), i); @@ -1360,28 +1360,28 @@ namespace xsimd // all template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return detail::rvvcpop(arg) == batch_bool::size; } // any template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return detail::rvvcpop(arg) > 0; } // bitwise_cast template = 0, detail::rvv_enable_all_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return detail::rvv_reg_t(arg.data.get_bytes(), types::detail::XSIMD_RVV_BITCAST); } // batch_bool_cast template = 0> - inline batch_bool batch_bool_cast(batch_bool const& arg, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& arg, batch_bool const&, requires_arch) noexcept { using intermediate_t = typename detail::rvv_bool_t; return intermediate_t(arg.data); @@ -1389,7 +1389,7 @@ namespace xsimd // from_bool template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { const auto zero = broadcast(T(0), rvv {}); return detail::rvvmerge_splat(zero, T(1), arg); @@ -1398,26 +1398,26 @@ namespace xsimd namespace detail { template - inline vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) + XSIMD_INLINE vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) { return __riscv_vslidedown(arg, i, types::detail::rvv_width_m1 / 8); } template <> - inline vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) + XSIMD_INLINE vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) { const auto bytes = __riscv_vlmul_trunc_u8mf2(arg); const auto result = __riscv_vslidedown(bytes, i, types::detail::rvv_width_mf2 / 8); return __riscv_vlmul_ext_u8m1(result); } template <> - inline vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) + XSIMD_INLINE vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) { const auto bytes = __riscv_vlmul_trunc_u8mf4(arg); const auto result = __riscv_vslidedown(bytes, i, types::detail::rvv_width_mf4 / 8); return __riscv_vlmul_ext_u8m1(result); } template <> - inline vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) + XSIMD_INLINE vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i) { const auto bytes = __riscv_vlmul_trunc_u8mf8(arg); const auto result = __riscv_vslidedown(bytes, i, types::detail::rvv_width_mf8 / 8); @@ -1427,7 +1427,7 @@ namespace xsimd // slide_left template = 0> - inline batch slide_left(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& arg, requires_arch) noexcept { const auto zero = broadcast(uint8_t(0), rvv {}); const auto bytes = arg.data.get_bytes(); @@ -1436,7 +1436,7 @@ namespace xsimd // slide_right template = 0> - inline batch slide_right(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& arg, requires_arch) noexcept { using reg_t = detail::rvv_reg_t; const auto bytes = arg.data.get_bytes(); @@ -1445,7 +1445,7 @@ namespace xsimd // isnan template = 0> - inline batch_bool isnan(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& arg, requires_arch) noexcept { return !(arg == arg); } @@ -1456,13 +1456,13 @@ namespace xsimd using rvv_as_signed_integer_t = as_signed_integer_t>; template > - inline batch rvvfcvt_default(batch const& arg) noexcept + XSIMD_INLINE batch rvvfcvt_default(batch const& arg) noexcept { return rvvfcvt_rne(U {}, arg); } template > - inline batch rvvfcvt_afz(batch const& arg) noexcept + XSIMD_INLINE batch rvvfcvt_afz(batch const& arg) noexcept { return rvvfcvt_rmm(U {}, arg); } @@ -1470,7 +1470,7 @@ namespace xsimd // nearbyint_as_int template > - inline batch nearbyint_as_int(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& arg, requires_arch) noexcept { // Reference rounds ties to nearest even return detail::rvvfcvt_default(arg); @@ -1478,7 +1478,7 @@ namespace xsimd // round template = 0> - inline batch round(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch round(batch const& arg, requires_arch) noexcept { // Round ties away from zero. const auto mask = abs(arg) < constants::maxflint>(); @@ -1487,7 +1487,7 @@ namespace xsimd // nearbyint template = 0> - inline batch nearbyint(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& arg, requires_arch) noexcept { // Round according to current rounding mode. const auto mask = abs(arg) < constants::maxflint>(); diff --git a/include/xsimd/arch/xsimd_scalar.hpp b/include/xsimd/arch/xsimd_scalar.hpp index 38b9f841d..3e8913d75 100644 --- a/include/xsimd/arch/xsimd_scalar.hpp +++ b/include/xsimd/arch/xsimd_scalar.hpp @@ -20,6 +20,8 @@ #include #include +#include "xsimd/config/xsimd_inline.hpp" + #ifdef XSIMD_ENABLE_XTL_COMPLEX #include "xtl/xcomplex.hpp" #endif @@ -86,7 +88,7 @@ namespace xsimd using std::tgamma; using std::trunc; - inline signed char abs(signed char v) + XSIMD_INLINE signed char abs(signed char v) { return v < 0 ? -v : v; } @@ -96,43 +98,43 @@ namespace xsimd // Use templated type here to prevent automatic instantiation that may // ends up in a warning template - inline char abs(char_type v, std::true_type) + XSIMD_INLINE char abs(char_type v, std::true_type) { return v; } template - inline char abs(char_type v, std::false_type) + XSIMD_INLINE char abs(char_type v, std::false_type) { return v < 0 ? -v : v; } } - inline char abs(char v) + XSIMD_INLINE char abs(char v) { return detail::abs(v, std::is_unsigned::type {}); } - inline short abs(short v) + XSIMD_INLINE short abs(short v) { return v < 0 ? -v : v; } - inline unsigned char abs(unsigned char v) + XSIMD_INLINE unsigned char abs(unsigned char v) { return v; } - inline unsigned short abs(unsigned short v) + XSIMD_INLINE unsigned short abs(unsigned short v) { return v; } - inline unsigned int abs(unsigned int v) + XSIMD_INLINE unsigned int abs(unsigned int v) { return v; } - inline unsigned long abs(unsigned long v) + XSIMD_INLINE unsigned long abs(unsigned long v) { return v; } - inline unsigned long long abs(unsigned long long v) + XSIMD_INLINE unsigned long long abs(unsigned long long v) { return v; } @@ -145,42 +147,42 @@ namespace xsimd // Windows defines catch all templates template - inline typename std::enable_if::value, bool>::type + XSIMD_INLINE typename std::enable_if::value, bool>::type isfinite(T var) noexcept { return std::isfinite(var); } template - inline typename std::enable_if::value, bool>::type + XSIMD_INLINE typename std::enable_if::value, bool>::type isfinite(T var) noexcept { return isfinite(double(var)); } template - inline typename std::enable_if::value, bool>::type + XSIMD_INLINE typename std::enable_if::value, bool>::type isinf(T var) noexcept { return std::isinf(var); } template - inline typename std::enable_if::value, bool>::type + XSIMD_INLINE typename std::enable_if::value, bool>::type isinf(T var) noexcept { return isinf(double(var)); } template - inline typename std::enable_if::value, bool>::type + XSIMD_INLINE typename std::enable_if::value, bool>::type isnan(T var) noexcept { return std::isnan(var); } template - inline typename std::enable_if::value, bool>::type + XSIMD_INLINE typename std::enable_if::value, bool>::type isnan(T var) noexcept { return isnan(double(var)); @@ -188,13 +190,13 @@ namespace xsimd #endif template - inline typename std::common_type::type add(T const& x, Tp const& y) noexcept + XSIMD_INLINE typename std::common_type::type add(T const& x, Tp const& y) noexcept { return x + y; } template - inline typename std::common_type::type avg(T const& x, Tp const& y) noexcept + XSIMD_INLINE typename std::common_type::type avg(T const& x, Tp const& y) noexcept { using common_type = typename std::common_type::type; if (std::is_floating_point::value) @@ -215,7 +217,7 @@ namespace xsimd } template - inline typename std::common_type::type avgr(T const& x, Tp const& y) noexcept + XSIMD_INLINE typename std::common_type::type avgr(T const& x, Tp const& y) noexcept { using common_type = typename std::common_type::type; if (std::is_floating_point::value) @@ -227,41 +229,41 @@ namespace xsimd } template - inline T incr(T const& x) noexcept + XSIMD_INLINE T incr(T const& x) noexcept { return x + T(1); } template - inline T incr_if(T const& x, bool mask) noexcept + XSIMD_INLINE T incr_if(T const& x, bool mask) noexcept { return x + T(mask ? 1 : 0); } - inline bool all(bool mask) + XSIMD_INLINE bool all(bool mask) { return mask; } - inline bool any(bool mask) + XSIMD_INLINE bool any(bool mask) { return mask; } - inline bool none(bool mask) + XSIMD_INLINE bool none(bool mask) { return !mask; } template - inline typename std::enable_if::value, T>::type + XSIMD_INLINE typename std::enable_if::value, T>::type bitwise_and(T x, T y) noexcept { return x & y; } template - inline T_out bitwise_cast(T_in x) noexcept + XSIMD_INLINE T_out bitwise_cast(T_in x) noexcept { static_assert(sizeof(T_in) == sizeof(T_out), "bitwise_cast between types of the same size"); T_out r; @@ -269,7 +271,7 @@ namespace xsimd return r; } - inline float bitwise_and(float x, float y) noexcept + XSIMD_INLINE float bitwise_and(float x, float y) noexcept { uint32_t ix, iy; std::memcpy((void*)&ix, (void*)&x, sizeof(float)); @@ -280,7 +282,7 @@ namespace xsimd return r; } - inline double bitwise_and(double x, double y) noexcept + XSIMD_INLINE double bitwise_and(double x, double y) noexcept { uint64_t ix, iy; std::memcpy((void*)&ix, (void*)&x, sizeof(double)); @@ -292,32 +294,32 @@ namespace xsimd } template - inline typename std::enable_if::value && std::is_integral::value, T0>::type + XSIMD_INLINE typename std::enable_if::value && std::is_integral::value, T0>::type bitwise_lshift(T0 x, T1 shift) noexcept { return x << shift; } template - inline typename std::enable_if::value && std::is_integral::value, T0>::type + XSIMD_INLINE typename std::enable_if::value && std::is_integral::value, T0>::type bitwise_rshift(T0 x, T1 shift) noexcept { return x >> shift; } template - inline typename std::enable_if::value, T>::type + XSIMD_INLINE typename std::enable_if::value, T>::type bitwise_not(T x) noexcept { return ~x; } - inline bool bitwise_not(bool x) noexcept + XSIMD_INLINE bool bitwise_not(bool x) noexcept { return !x; } - inline float bitwise_not(float x) noexcept + XSIMD_INLINE float bitwise_not(float x) noexcept { uint32_t ix; std::memcpy((void*)&ix, (void*)&x, sizeof(float)); @@ -327,7 +329,7 @@ namespace xsimd return r; } - inline double bitwise_not(double x) noexcept + XSIMD_INLINE double bitwise_not(double x) noexcept { uint64_t ix; std::memcpy((void*)&ix, (void*)&x, sizeof(double)); @@ -338,19 +340,19 @@ namespace xsimd } template - inline typename std::enable_if::value, T>::type bitwise_andnot(T x, T y) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type bitwise_andnot(T x, T y) noexcept { return bitwise_and(x, bitwise_not(y)); } template - inline typename std::enable_if::value, T>::type + XSIMD_INLINE typename std::enable_if::value, T>::type bitwise_or(T x, T y) noexcept { return x | y; } - inline float bitwise_or(float x, float y) noexcept + XSIMD_INLINE float bitwise_or(float x, float y) noexcept { uint32_t ix, iy; std::memcpy((void*)&ix, (void*)&x, sizeof(float)); @@ -361,7 +363,7 @@ namespace xsimd return r; } - inline double bitwise_or(double x, double y) noexcept + XSIMD_INLINE double bitwise_or(double x, double y) noexcept { uint64_t ix, iy; std::memcpy((void*)&ix, (void*)&x, sizeof(double)); @@ -373,13 +375,13 @@ namespace xsimd } template - inline typename std::enable_if::value, T>::type + XSIMD_INLINE typename std::enable_if::value, T>::type bitwise_xor(T x, T y) noexcept { return x ^ y; } - inline float bitwise_xor(float x, float y) noexcept + XSIMD_INLINE float bitwise_xor(float x, float y) noexcept { uint32_t ix, iy; std::memcpy((void*)&ix, (void*)&x, sizeof(float)); @@ -390,7 +392,7 @@ namespace xsimd return r; } - inline double bitwise_xor(double x, double y) noexcept + XSIMD_INLINE double bitwise_xor(double x, double y) noexcept { uint64_t ix, iy; std::memcpy((void*)&ix, (void*)&x, sizeof(double)); @@ -402,47 +404,47 @@ namespace xsimd } template - inline typename std::common_type::type div(T const& x, Tp const& y) noexcept + XSIMD_INLINE typename std::common_type::type div(T const& x, Tp const& y) noexcept { return x / y; } template - inline auto mod(T const& x, Tp const& y) noexcept -> decltype(x % y) + XSIMD_INLINE auto mod(T const& x, Tp const& y) noexcept -> decltype(x % y) { return x % y; } template - inline typename std::common_type::type mul(T const& x, Tp const& y) noexcept + XSIMD_INLINE typename std::common_type::type mul(T const& x, Tp const& y) noexcept { return x * y; } template - inline T neg(T const& x) noexcept + XSIMD_INLINE T neg(T const& x) noexcept { return -x; } template - inline auto pos(T const& x) noexcept -> decltype(+x) + XSIMD_INLINE auto pos(T const& x) noexcept -> decltype(+x) { return +x; } - inline float reciprocal(float const& x) noexcept + XSIMD_INLINE float reciprocal(float const& x) noexcept { return 1.f / x; } - inline double reciprocal(double const& x) noexcept + XSIMD_INLINE double reciprocal(double const& x) noexcept { return 1. / x; } template - inline typename std::enable_if::value && std::is_integral::value, T0>::type + XSIMD_INLINE typename std::enable_if::value && std::is_integral::value, T0>::type rotl(T0 x, T1 shift) noexcept { constexpr auto N = std::numeric_limits::digits; @@ -450,7 +452,7 @@ namespace xsimd } template - inline typename std::enable_if::value && std::is_integral::value, T0>::type + XSIMD_INLINE typename std::enable_if::value && std::is_integral::value, T0>::type rotr(T0 x, T1 shift) noexcept { constexpr auto N = std::numeric_limits::digits; @@ -458,19 +460,19 @@ namespace xsimd } template - inline bool isnan(std::complex var) noexcept + XSIMD_INLINE bool isnan(std::complex var) noexcept { return std::isnan(std::real(var)) || std::isnan(std::imag(var)); } template - inline bool isinf(std::complex var) noexcept + XSIMD_INLINE bool isinf(std::complex var) noexcept { return std::isinf(std::real(var)) || std::isinf(std::imag(var)); } template - inline bool isfinite(std::complex var) noexcept + XSIMD_INLINE bool isfinite(std::complex var) noexcept { return std::isfinite(std::real(var)) && std::isfinite(std::imag(var)); } @@ -499,130 +501,130 @@ namespace xsimd #endif template ::value>::type> - inline T clip(const T& val, const T& low, const T& hi) noexcept + XSIMD_INLINE T clip(const T& val, const T& low, const T& hi) noexcept { assert(low <= hi && "ordered clipping bounds"); return low > val ? low : (hi < val ? hi : val); } template ::value>::type> - inline bool is_flint(const T& x) noexcept + XSIMD_INLINE bool is_flint(const T& x) noexcept { return std::isnan(x - x) ? false : (x - std::trunc(x)) == T(0); } template ::value>::type> - inline bool is_even(const T& x) noexcept + XSIMD_INLINE bool is_even(const T& x) noexcept { return is_flint(x * T(0.5)); } template ::value>::type> - inline bool is_odd(const T& x) noexcept + XSIMD_INLINE bool is_odd(const T& x) noexcept { return is_even(x - 1.); } - inline int32_t nearbyint_as_int(float var) noexcept + XSIMD_INLINE int32_t nearbyint_as_int(float var) noexcept { return static_cast(std::nearbyint(var)); } - inline int64_t nearbyint_as_int(double var) noexcept + XSIMD_INLINE int64_t nearbyint_as_int(double var) noexcept { return static_cast(std::nearbyint(var)); } template ::value>::type> - inline bool eq(const T& x0, const T& x1) noexcept + XSIMD_INLINE bool eq(const T& x0, const T& x1) noexcept { return x0 == x1; } template - inline bool eq(const std::complex& x0, const std::complex& x1) noexcept + XSIMD_INLINE bool eq(const std::complex& x0, const std::complex& x1) noexcept { return x0 == x1; } template ::value>::type> - inline bool ge(const T& x0, const T& x1) noexcept + XSIMD_INLINE bool ge(const T& x0, const T& x1) noexcept { return x0 >= x1; } template ::value>::type> - inline bool gt(const T& x0, const T& x1) noexcept + XSIMD_INLINE bool gt(const T& x0, const T& x1) noexcept { return x0 > x1; } template ::value>::type> - inline bool le(const T& x0, const T& x1) noexcept + XSIMD_INLINE bool le(const T& x0, const T& x1) noexcept { return x0 <= x1; } template ::value>::type> - inline bool lt(const T& x0, const T& x1) noexcept + XSIMD_INLINE bool lt(const T& x0, const T& x1) noexcept { return x0 < x1; } template ::value>::type> - inline bool neq(const T& x0, const T& x1) noexcept + XSIMD_INLINE bool neq(const T& x0, const T& x1) noexcept { return x0 != x1; } template - inline bool neq(const std::complex& x0, const std::complex& x1) noexcept + XSIMD_INLINE bool neq(const std::complex& x0, const std::complex& x1) noexcept { return !(x0 == x1); } #if defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED > 1080) - inline float exp10(const float& x) noexcept + XSIMD_INLINE float exp10(const float& x) noexcept { return __exp10f(x); } - inline double exp10(const double& x) noexcept + XSIMD_INLINE double exp10(const double& x) noexcept { return __exp10(x); } #elif defined(__GLIBC__) - inline float exp10(const float& x) noexcept + XSIMD_INLINE float exp10(const float& x) noexcept { return ::exp10f(x); } - inline double exp10(const double& x) noexcept + XSIMD_INLINE double exp10(const double& x) noexcept { return ::exp10(x); } #elif !defined(__clang__) && defined(__GNUC__) && (__GNUC__ >= 5) - inline float exp10(const float& x) noexcept + XSIMD_INLINE float exp10(const float& x) noexcept { return __builtin_exp10f(x); } - inline double exp10(const double& x) noexcept + XSIMD_INLINE double exp10(const double& x) noexcept { return __builtin_exp10(x); } #elif defined(_WIN32) template ::value>::type> - inline T exp10(const T& x) noexcept + XSIMD_INLINE T exp10(const T& x) noexcept { // Very inefficient but other implementations give incorrect results // on Windows return std::pow(T(10), x); } #else - inline float exp10(const float& x) noexcept + XSIMD_INLINE float exp10(const float& x) noexcept { const float ln10 = std::log(10.f); return std::exp(ln10 * x); } - inline double exp10(const double& x) noexcept + XSIMD_INLINE double exp10(const double& x) noexcept { const double ln10 = std::log(10.); return std::exp(ln10 * x); @@ -630,7 +632,7 @@ namespace xsimd #endif template ::value>::type> - inline auto rsqrt(const T& x) noexcept -> decltype(std::sqrt(x)) + XSIMD_INLINE auto rsqrt(const T& x) noexcept -> decltype(std::sqrt(x)) { using float_type = decltype(std::sqrt(x)); return static_cast(1) / std::sqrt(x); @@ -639,7 +641,7 @@ namespace xsimd namespace detail { template - inline C expm1_complex_scalar_impl(const C& val) noexcept + XSIMD_INLINE C expm1_complex_scalar_impl(const C& val) noexcept { using T = typename C::value_type; T isin = std::sin(val.imag()); @@ -651,14 +653,14 @@ namespace xsimd } template - inline std::complex expm1(const std::complex& val) noexcept + XSIMD_INLINE std::complex expm1(const std::complex& val) noexcept { return detail::expm1_complex_scalar_impl(val); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex expm1(const xtl::xcomplex& val) noexcept + XSIMD_INLINE xtl::xcomplex expm1(const xtl::xcomplex& val) noexcept { return detail::expm1_complex_scalar_impl(val); } @@ -667,7 +669,7 @@ namespace xsimd namespace detail { template - inline C log1p_complex_scalar_impl(const C& val) noexcept + XSIMD_INLINE C log1p_complex_scalar_impl(const C& val) noexcept { using T = typename C::value_type; C u = C(1.) + val; @@ -676,19 +678,19 @@ namespace xsimd } template - inline std::complex log1p(const std::complex& val) noexcept + XSIMD_INLINE std::complex log1p(const std::complex& val) noexcept { return detail::log1p_complex_scalar_impl(val); } template - inline std::complex log2(const std::complex& val) noexcept + XSIMD_INLINE std::complex log2(const std::complex& val) noexcept { return log(val) / std::log(T(2)); } template ::value>::type> - inline T sadd(const T& lhs, const T& rhs) noexcept + XSIMD_INLINE T sadd(const T& lhs, const T& rhs) noexcept { if (std::numeric_limits::is_signed) { @@ -719,7 +721,7 @@ namespace xsimd } template ::value>::type> - inline T ssub(const T& lhs, const T& rhs) noexcept + XSIMD_INLINE T ssub(const T& lhs, const T& rhs) noexcept { if (std::numeric_limits::is_signed) { @@ -755,7 +757,7 @@ namespace xsimd using value_type_or_type = typename value_type_or_type_helper::type; template - inline typename std::enable_if::value, T0>::type + XSIMD_INLINE typename std::enable_if::value, T0>::type ipow(const T0& x, const T1& n) noexcept { static_assert(std::is_integral::value, "second argument must be an integer"); @@ -781,14 +783,14 @@ namespace xsimd } template - inline typename std::enable_if::value, T0>::type + XSIMD_INLINE typename std::enable_if::value, T0>::type pow(const T0& x, const T1& n) noexcept { return detail::ipow(x, n); } template - inline auto + XSIMD_INLINE auto pow(const T0& t0, const T1& t1) noexcept -> typename std::enable_if::value && std::is_floating_point::value, decltype(std::pow(t0, t1))>::type { @@ -796,21 +798,21 @@ namespace xsimd } template - inline typename std::enable_if::value, std::complex>::type + XSIMD_INLINE typename std::enable_if::value, std::complex>::type pow(const std::complex& t0, const T1& t1) noexcept { return detail::ipow(t0, t1); } template - inline typename std::enable_if::value, std::complex>::type + XSIMD_INLINE typename std::enable_if::value, std::complex>::type pow(const std::complex& t0, const T1& t1) noexcept { return std::pow(t0, t1); } template - inline auto + XSIMD_INLINE auto pow(const T0& t0, const std::complex& t1) noexcept -> typename std::enable_if::value, decltype(std::pow(t0, t1))>::type { @@ -818,24 +820,24 @@ namespace xsimd } template ::value>::type> - inline T bitofsign(T const& x) noexcept + XSIMD_INLINE T bitofsign(T const& x) noexcept { return T(x < T(0)); } template - inline auto signbit(T const& v) noexcept -> decltype(bitofsign(v)) + XSIMD_INLINE auto signbit(T const& v) noexcept -> decltype(bitofsign(v)) { return bitofsign(v); } - inline double sign(bool const& v) noexcept + XSIMD_INLINE double sign(bool const& v) noexcept { return v; } template ::value>::type> - inline T sign(const T& v) noexcept + XSIMD_INLINE T sign(const T& v) noexcept { return v < T(0) ? T(-1.) : v == T(0) ? T(0.) : T(1.); @@ -844,7 +846,7 @@ namespace xsimd namespace detail { template - inline C sign_complex_scalar_impl(const C& v) noexcept + XSIMD_INLINE C sign_complex_scalar_impl(const C& v) noexcept { using value_type = typename C::value_type; if (v.real()) @@ -859,51 +861,51 @@ namespace xsimd } template - inline std::complex sign(const std::complex& v) noexcept + XSIMD_INLINE std::complex sign(const std::complex& v) noexcept { return detail::sign_complex_scalar_impl(v); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex sign(const xtl::xcomplex& v) noexcept + XSIMD_INLINE xtl::xcomplex sign(const xtl::xcomplex& v) noexcept { return detail::sign_complex_scalar_impl(v); } #endif - inline double signnz(bool const&) noexcept + XSIMD_INLINE double signnz(bool const&) noexcept { return 1; } template ::value>::type> - inline T signnz(const T& v) noexcept + XSIMD_INLINE T signnz(const T& v) noexcept { return v < T(0) ? T(-1.) : T(1.); } template - inline typename std::common_type::type sub(T const& x, Tp const& y) noexcept + XSIMD_INLINE typename std::common_type::type sub(T const& x, Tp const& y) noexcept { return x - y; } template - inline T decr(T const& x) noexcept + XSIMD_INLINE T decr(T const& x) noexcept { return x - T(1); } template - inline T decr_if(T const& x, bool mask) noexcept + XSIMD_INLINE T decr_if(T const& x, bool mask) noexcept { return x - T(mask ? 1 : 0); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex log2(const xtl::xcomplex& val) noexcept + XSIMD_INLINE xtl::xcomplex log2(const xtl::xcomplex& val) noexcept { return log(val) / log(T(2)); } @@ -911,14 +913,14 @@ namespace xsimd #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex log1p(const xtl::xcomplex& val) noexcept + XSIMD_INLINE xtl::xcomplex log1p(const xtl::xcomplex& val) noexcept { return detail::log1p_complex_scalar_impl(val); } #endif template - inline auto min(T0 const& self, T1 const& other) noexcept + XSIMD_INLINE auto min(T0 const& self, T1 const& other) noexcept -> typename std::enable_if::value && std::is_scalar::value, typename std::decay other ? other : self)>::type>::type { @@ -927,14 +929,14 @@ namespace xsimd // numpy defines minimum operator on complex using lexical comparison template - inline std::complex::type> + XSIMD_INLINE std::complex::type> min(std::complex const& self, std::complex const& other) noexcept { return (self.real() < other.real()) ? (self) : (self.real() == other.real() ? (self.imag() < other.imag() ? self : other) : other); } template - inline auto max(T0 const& self, T1 const& other) noexcept + XSIMD_INLINE auto max(T0 const& self, T1 const& other) noexcept -> typename std::enable_if::value && std::is_scalar::value, typename std::decay other ? other : self)>::type>::type { @@ -943,26 +945,26 @@ namespace xsimd // numpy defines maximum operator on complex using lexical comparison template - inline std::complex::type> + XSIMD_INLINE std::complex::type> max(std::complex const& self, std::complex const& other) noexcept { return (self.real() > other.real()) ? (self) : (self.real() == other.real() ? (self.imag() > other.imag() ? self : other) : other); } template - inline typename std::enable_if::value, T>::type fma(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fma(const T& a, const T& b, const T& c) noexcept { return a * b + c; } template - inline typename std::enable_if::value, T>::type fma(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fma(const T& a, const T& b, const T& c) noexcept { return std::fma(a, b, c); } template - inline typename std::enable_if::value, T>::type fms(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fms(const T& a, const T& b, const T& c) noexcept { return a * b - c; } @@ -970,7 +972,7 @@ namespace xsimd namespace detail { template - inline C fma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + XSIMD_INLINE C fma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept { return { fms(a.real(), b.real(), fms(a.imag(), b.imag(), c.real())), fma(a.real(), b.imag(), fma(a.imag(), b.real(), c.imag())) }; @@ -978,14 +980,14 @@ namespace xsimd } template - inline std::complex fma(const std::complex& a, const std::complex& b, const std::complex& c) noexcept + XSIMD_INLINE std::complex fma(const std::complex& a, const std::complex& b, const std::complex& c) noexcept { return detail::fma_complex_scalar_impl(a, b, c); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex fma(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept + XSIMD_INLINE xtl::xcomplex fma(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept { return detail::fma_complex_scalar_impl(a, b, c); } @@ -994,7 +996,7 @@ namespace xsimd namespace detail { template - inline C fms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + XSIMD_INLINE C fms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept { return { fms(a.real(), b.real(), fma(a.imag(), b.imag(), c.real())), fma(a.real(), b.imag(), fms(a.imag(), b.real(), c.imag())) }; @@ -1002,27 +1004,27 @@ namespace xsimd } template - inline std::complex fms(const std::complex& a, const std::complex& b, const std::complex& c) noexcept + XSIMD_INLINE std::complex fms(const std::complex& a, const std::complex& b, const std::complex& c) noexcept { return detail::fms_complex_scalar_impl(a, b, c); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex fms(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept + XSIMD_INLINE xtl::xcomplex fms(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept { return detail::fms_complex_scalar_impl(a, b, c); } #endif template - inline typename std::enable_if::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept { return -(a * b) + c; } template - inline typename std::enable_if::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept { return std::fma(-a, b, c); } @@ -1030,7 +1032,7 @@ namespace xsimd namespace detail { template - inline C fnma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + XSIMD_INLINE C fnma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept { return { fms(a.imag(), b.imag(), fms(a.real(), b.real(), c.real())), -fma(a.real(), b.imag(), fms(a.imag(), b.real(), c.imag())) }; @@ -1038,27 +1040,27 @@ namespace xsimd } template - inline std::complex fnma(const std::complex& a, const std::complex& b, const std::complex& c) noexcept + XSIMD_INLINE std::complex fnma(const std::complex& a, const std::complex& b, const std::complex& c) noexcept { return detail::fnma_complex_scalar_impl(a, b, c); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex fnma(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept + XSIMD_INLINE xtl::xcomplex fnma(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept { return detail::fnma_complex_scalar_impl(a, b, c); } #endif template - inline typename std::enable_if::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept { return -(a * b) - c; } template - inline typename std::enable_if::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept + XSIMD_INLINE typename std::enable_if::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept { return -std::fma(a, b, c); } @@ -1066,7 +1068,7 @@ namespace xsimd namespace detail { template - inline C fnms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + XSIMD_INLINE C fnms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept { return { fms(a.imag(), b.imag(), fma(a.real(), b.real(), c.real())), -fma(a.real(), b.imag(), fma(a.imag(), b.real(), c.imag())) }; @@ -1074,14 +1076,14 @@ namespace xsimd } template - inline std::complex fnms(const std::complex& a, const std::complex& b, const std::complex& c) noexcept + XSIMD_INLINE std::complex fnms(const std::complex& a, const std::complex& b, const std::complex& c) noexcept { return detail::fnms_complex_scalar_impl(a, b, c); } #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline xtl::xcomplex fnms(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept + XSIMD_INLINE xtl::xcomplex fnms(const xtl::xcomplex& a, const xtl::xcomplex& b, const xtl::xcomplex& c) noexcept { return detail::fnms_complex_scalar_impl(a, b, c); } @@ -1089,14 +1091,14 @@ namespace xsimd namespace detail { -#define XSIMD_HASSINCOS_TRAIT(func) \ - template \ - struct has##func \ - { \ - template \ - static inline auto get(T* ptr) -> decltype(func(std::declval(), std::declval(), std::declval()), std::true_type {}); \ - static inline std::false_type get(...); \ - static constexpr bool value = decltype(get((S*)nullptr))::value; \ +#define XSIMD_HASSINCOS_TRAIT(func) \ + template \ + struct has##func \ + { \ + template \ + static XSIMD_INLINE auto get(T* ptr) -> decltype(func(std::declval(), std::declval(), std::declval()), std::true_type {}); \ + static XSIMD_INLINE std::false_type get(...); \ + static constexpr bool value = decltype(get((S*)nullptr))::value; \ } #define XSIMD_HASSINCOS(func, T) has##func::value @@ -1109,21 +1111,21 @@ namespace xsimd struct generic_sincosf { template - inline typename std::enable_if::type + XSIMD_INLINE typename std::enable_if::type operator()(float val, T& s, T& c) { sincosf(val, &s, &c); } template - inline typename std::enable_if::type + XSIMD_INLINE typename std::enable_if::type operator()(float val, T& s, T& c) { __sincosf(val, &s, &c); } template - inline typename std::enable_if::type + XSIMD_INLINE typename std::enable_if::type operator()(float val, T& s, T& c) { s = std::sin(val); @@ -1134,21 +1136,21 @@ namespace xsimd struct generic_sincos { template - inline typename std::enable_if::type + XSIMD_INLINE typename std::enable_if::type operator()(double val, T& s, T& c) { sincos(val, &s, &c); } template - inline typename std::enable_if::type + XSIMD_INLINE typename std::enable_if::type operator()(double val, T& s, T& c) { __sincos(val, &s, &c); } template - inline typename std::enable_if::type + XSIMD_INLINE typename std::enable_if::type operator()(double val, T& s, T& c) { s = std::sin(val); @@ -1160,14 +1162,14 @@ namespace xsimd #undef XSIMD_HASSINCOS } - inline std::pair sincos(float val) noexcept + XSIMD_INLINE std::pair sincos(float val) noexcept { float s, c; detail::generic_sincosf {}(val, s, c); return std::make_pair(s, c); } - inline std::pair sincos(double val) noexcept + XSIMD_INLINE std::pair sincos(double val) noexcept { double s, c; detail::generic_sincos {}(val, s, c); @@ -1175,7 +1177,7 @@ namespace xsimd } template - inline std::pair, std::complex> + XSIMD_INLINE std::pair, std::complex> sincos(const std::complex& val) noexcept { return std::make_pair(std::sin(val), std::cos(val)); @@ -1183,20 +1185,20 @@ namespace xsimd #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline std::pair, xtl::xcomplex> sincos(const xtl::xcomplex& val) noexcept + XSIMD_INLINE std::pair, xtl::xcomplex> sincos(const xtl::xcomplex& val) noexcept { return std::make_pair(sin(val), cos(val)); } #endif template ::value, void>::type> - inline T frexp(T const& val, int& exp) noexcept + XSIMD_INLINE T frexp(T const& val, int& exp) noexcept { return std::frexp(val, &exp); } template - inline T select(bool cond, T const& true_br, T const& false_br) noexcept + XSIMD_INLINE T select(bool cond, T const& true_br, T const& false_br) noexcept { return cond ? true_br : false_br; } diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index d733a8c36..67b74f548 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -24,7 +24,7 @@ namespace xsimd struct batch_bool_constant; template - inline batch bitwise_cast(batch const& x) noexcept; + XSIMD_INLINE batch bitwise_cast(batch const& x) noexcept; template struct batch_constant; @@ -57,23 +57,23 @@ namespace xsimd // fwd template - inline batch insert(batch const& self, T val, index, requires_arch) noexcept; + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept; template - inline batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept; + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept; template - inline batch avg(batch const&, batch const&, requires_arch) noexcept; + XSIMD_INLINE batch avg(batch const&, batch const&, requires_arch) noexcept; template - inline batch avgr(batch const&, batch const&, requires_arch) noexcept; + XSIMD_INLINE batch avgr(batch const&, batch const&, requires_arch) noexcept; // abs template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { __m128d sign_mask = _mm_set1_pd(-0.f); // -0.f = 1 << 31 return _mm_andnot_pd(sign_mask, self); } template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31 return _mm_andnot_ps(sign_mask, self); @@ -81,7 +81,7 @@ namespace xsimd // add template ::value, void>::type> - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -107,54 +107,54 @@ namespace xsimd } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return _mm_add_ps(self, other); } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return _mm_add_pd(self, other); } // all template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_ps(self) == 0x0F; } template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_pd(self) == 0x03; } template ::value, void>::type> - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_epi8(self) == 0xFFFF; } // any template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_ps(self) != 0; } template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_pd(self) != 0; } template ::value, void>::type> - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_epi8(self) != 0; } // avgr template ::value, void>::type> - inline batch avgr(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -172,7 +172,7 @@ namespace xsimd // avg template ::value, void>::type> - inline batch avg(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -192,83 +192,83 @@ namespace xsimd // batch_bool_cast template - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept { return { bitwise_cast(batch(self.data)).data }; } // bitwise_and template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm_and_ps(self, other); } template - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_and_ps(self, other); } template ::value, void>::type> - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm_and_si128(self, other); } template ::value, void>::type> - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_and_si128(self, other); } template - batch inline bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + batch XSIMD_INLINE bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return _mm_and_pd(self, other); } template - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_and_pd(self, other); } // bitwise_andnot template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm_andnot_ps(other, self); } template - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_andnot_ps(other, self); } template ::value, void>::type> - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm_andnot_si128(other, self); } template ::value, void>::type> - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_andnot_si128(other, self); } template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return _mm_andnot_pd(other, self); } template - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_andnot_pd(other, self); } // bitwise_lshift template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -295,73 +295,73 @@ namespace xsimd // bitwise_not template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm_xor_ps(self, _mm_castsi128_ps(_mm_set1_epi32(-1))); } template - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return _mm_xor_ps(self, _mm_castsi128_ps(_mm_set1_epi32(-1))); } template ::value, void>::type> - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm_xor_si128(self, _mm_set1_epi32(-1)); } template ::value, void>::type> - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return _mm_xor_si128(self, _mm_set1_epi32(-1)); } template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return _mm_xor_pd(self, _mm_castsi128_pd(_mm_set1_epi32(-1))); } template - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return _mm_xor_pd(self, _mm_castsi128_pd(_mm_set1_epi32(-1))); } // bitwise_or template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm_or_ps(self, other); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_or_ps(self, other); } template ::value, void>::type> - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm_or_si128(self, other); } template ::value, void>::type> - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_or_si128(self, other); } template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return _mm_or_pd(self, other); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_or_pd(self, other); } // bitwise_rshift template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept { if (std::is_signed::value) { @@ -423,81 +423,81 @@ namespace xsimd // bitwise_xor template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm_xor_ps(self, other); } template - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_xor_ps(self, other); } template ::value, void>::type> - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm_xor_si128(self, other); } template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return _mm_xor_pd(self, other); } template - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_xor_pd(self, other); } template ::value, void>::type> - inline batch bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_xor_si128(self, other); } // bitwise_cast template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_castsi128_ps(self); } template ::type>::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return batch(self.data); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_castps_si128(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_castsi128_pd(self); } template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_castps_pd(self); } template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_castpd_ps(self); } template ::value, void>::type> - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_castpd_si128(self); } // broadcast template - batch inline broadcast(float val, requires_arch) noexcept + batch XSIMD_INLINE broadcast(float val, requires_arch) noexcept { return _mm_set1_ps(val); } template ::value, void>::type> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -522,7 +522,7 @@ namespace xsimd } } template - inline batch broadcast(double val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(double val, requires_arch) noexcept { return _mm_set1_pd(val); } @@ -533,23 +533,23 @@ namespace xsimd // Override these methods in SSE-based archs, no need to override store_aligned / store_unaligned // complex_low template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { return _mm_unpacklo_ps(self.real(), self.imag()); } // complex_high template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { return _mm_unpackhi_ps(self.real(), self.imag()); } template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { return _mm_unpacklo_pd(self.real(), self.imag()); } template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { return _mm_unpackhi_pd(self.real(), self.imag()); } @@ -557,19 +557,19 @@ namespace xsimd // decr_if template ::value, void>::type> - inline batch decr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch decr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept { return self + batch(mask.data); } // div template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return _mm_div_ps(self, other); } template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return _mm_div_pd(self, other); } @@ -578,13 +578,13 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_cvtepi32_ps(self); } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx // adapted to sse2 @@ -597,7 +597,7 @@ namespace xsimd } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx // adapted to sse2 @@ -611,7 +611,7 @@ namespace xsimd } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return _mm_cvttps_epi32(self); } @@ -619,17 +619,17 @@ namespace xsimd // eq template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpeq_ps(self, other); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(self), _mm_castps_si128(other))); } template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -658,24 +658,24 @@ namespace xsimd } } template ::value, void>::type> - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return ~(self != other); } template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpeq_pd(self, other); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other))); } // from_mask template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint32_t lut[][4] = { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, @@ -699,7 +699,7 @@ namespace xsimd return _mm_castsi128_ps(_mm_load_si128((const __m128i*)lut[mask])); } template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint64_t lut[][4] = { { 0x0000000000000000ul, 0x0000000000000000ul }, @@ -711,7 +711,7 @@ namespace xsimd return _mm_castsi128_pd(_mm_load_si128((const __m128i*)lut[mask])); } template ::value, void>::type> - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint64_t lut64[] = { 0x0000000000000000, @@ -771,24 +771,24 @@ namespace xsimd // ge template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpge_ps(self, other); } template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpge_pd(self, other); } // gt template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpgt_ps(self, other); } template ::value, void>::type> - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -816,14 +816,14 @@ namespace xsimd } template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpgt_pd(self, other); } // haddp template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { __m128 tmp0 = _mm_unpacklo_ps(row[0], row[1]); __m128 tmp1 = _mm_unpackhi_ps(row[0], row[1]); @@ -836,7 +836,7 @@ namespace xsimd return _mm_add_ps(tmp0, tmp2); } template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { return _mm_add_pd(_mm_unpacklo_pd(row[0], row[1]), _mm_unpackhi_pd(row[0], row[1])); @@ -844,14 +844,14 @@ namespace xsimd // incr_if template ::value, void>::type> - inline batch incr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept + XSIMD_INLINE batch incr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept { return self - batch(mask.data); } // insert template ::value, void>::type> - inline batch insert(batch const& self, T val, index pos, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index pos, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 2) { @@ -865,46 +865,46 @@ namespace xsimd // isnan template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return _mm_cmpunord_ps(self, self); } template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return _mm_cmpunord_pd(self, self); } // load_aligned template - inline batch load_aligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(float const* mem, convert, requires_arch) noexcept { return _mm_load_ps(mem); } template ::value, void>::type> - inline batch load_aligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept { return _mm_load_si128((__m128i const*)mem); } template - inline batch load_aligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(double const* mem, convert, requires_arch) noexcept { return _mm_load_pd(mem); } // load_unaligned template - inline batch load_unaligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(float const* mem, convert, requires_arch) noexcept { return _mm_loadu_ps(mem); } template ::value, void>::type> - inline batch load_unaligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept { return _mm_loadu_si128((__m128i const*)mem); } template - inline batch load_unaligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(double const* mem, convert, requires_arch) noexcept { return _mm_loadu_pd(mem); } @@ -914,12 +914,12 @@ namespace xsimd { // Redefine these methods in the SSE-based archs if required template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { return { _mm_shuffle_ps(hi, lo, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(hi, lo, _MM_SHUFFLE(3, 1, 3, 1)) }; } template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { return { _mm_shuffle_pd(hi, lo, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(hi, lo, _MM_SHUFFLE2(1, 1)) }; } @@ -927,24 +927,24 @@ namespace xsimd // le template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmple_ps(self, other); } template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmple_pd(self, other); } // lt template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmplt_ps(self, other); } template ::value, void>::type> - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1011,7 +1011,7 @@ namespace xsimd } template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmplt_pd(self, other); } @@ -1021,7 +1021,7 @@ namespace xsimd */ namespace detail { - inline int mask_lut(int mask) + XSIMD_INLINE int mask_lut(int mask) { // clang-format off static const int mask_lut[256] = { @@ -1049,7 +1049,7 @@ namespace xsimd // mask template ::value, void>::type> - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1075,92 +1075,92 @@ namespace xsimd } } template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_ps(self); } template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return _mm_movemask_pd(self); } // max template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return _mm_max_ps(self, other); } template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return select(self > other, self, other); } template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return _mm_max_pd(self, other); } // min template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return _mm_min_ps(self, other); } template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return select(self <= other, self, other); } template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return _mm_min_pd(self, other); } // mul template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm_mul_ps(self, other); } template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm_mul_pd(self, other); } // mul template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return _mm_mullo_epi16(self, other); } // nearbyint_as_int template - inline batch nearbyint_as_int(batch const& self, - requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& self, + requires_arch) noexcept { return _mm_cvtps_epi32(self); } // neg template ::value, void>::type> - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return 0 - self; } template - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return _mm_xor_ps(self, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } template - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return _mm_xor_pd( self, _mm_castsi128_pd(_mm_setr_epi32(0, 0x80000000, 0, 0x80000000))); @@ -1168,48 +1168,48 @@ namespace xsimd // neq template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpneq_ps(self, other); } template ::value, void>::type> - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return ~(self == other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_xor_ps(self, other); } template ::value, void>::type> - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(self.data), _mm_castsi128_ps(other.data))); } template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpneq_pd(self, other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return _mm_xor_pd(self, other); } // reciprocal template - inline batch reciprocal(batch const& self, - kernel::requires_arch) + XSIMD_INLINE batch reciprocal(batch const& self, + kernel::requires_arch) { return _mm_rcp_ps(self); } // reduce_add template - inline float reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& self, requires_arch) noexcept { __m128 tmp0 = _mm_add_ps(self, _mm_movehl_ps(self, self)); __m128 tmp1 = _mm_add_ss(tmp0, _mm_shuffle_ps(tmp0, tmp0, 1)); @@ -1217,7 +1217,7 @@ namespace xsimd } template ::value, void>::type> - inline T reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 4) { @@ -1248,14 +1248,14 @@ namespace xsimd } template - inline double reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE double reduce_add(batch const& self, requires_arch) noexcept { return _mm_cvtsd_f64(_mm_add_sd(self, _mm_unpackhi_pd(self, self))); } // reduce_max template ::type> - inline T reduce_max(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_max(batch const& self, requires_arch) noexcept { constexpr auto mask0 = detail::shuffle(2, 3, 0, 0); batch step0 = _mm_shuffle_epi32(self, mask0); @@ -1277,7 +1277,7 @@ namespace xsimd // reduce_min template ::type> - inline T reduce_min(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_min(batch const& self, requires_arch) noexcept { constexpr auto mask0 = detail::shuffle(2, 3, 0, 0); batch step0 = _mm_shuffle_epi32(self, mask0); @@ -1299,42 +1299,42 @@ namespace xsimd // rsqrt template - inline batch rsqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept { return _mm_rsqrt_ps(val); } template - inline batch rsqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(val))); } // select template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm_or_ps(_mm_and_ps(cond, true_br), _mm_andnot_ps(cond, false_br)); } template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm_or_si128(_mm_and_si128(cond, true_br), _mm_andnot_si128(cond, false_br)); } template ::value, void>::type> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { Values... }, true_br, false_br, sse2 {}); } template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm_or_pd(_mm_and_pd(cond, true_br), _mm_andnot_pd(cond, false_br)); } // shuffle template - inline batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept { constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3); // shuffle within lane @@ -1348,7 +1348,7 @@ namespace xsimd } template - inline batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant mask, requires_arch) noexcept { constexpr uint32_t smask = detail::mod_shuffle(I0, I1); // shuffle within lane @@ -1363,26 +1363,26 @@ namespace xsimd // sqrt template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return _mm_sqrt_ps(val); } template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return _mm_sqrt_pd(val); } // slide_left template - inline batch slide_left(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept { return _mm_slli_si128(x, N); } // slide_right template - inline batch slide_right(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept { return _mm_srli_si128(x, N); } @@ -1390,7 +1390,7 @@ namespace xsimd // sadd template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1426,55 +1426,55 @@ namespace xsimd // set template - inline batch set(batch const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch::size, "consistent init"); return _mm_setr_ps(values...); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1) noexcept { return _mm_set_epi64x(v1, v0); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3) noexcept { return _mm_setr_epi32(v0, v1, v2, v3); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept { return _mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept { return _mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } template - inline batch set(batch const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch::size, "consistent init"); return _mm_setr_pd(values...); } template ::value, void>::type> - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { return set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data; } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); return _mm_castsi128_ps(set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data); } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); return _mm_castsi128_pd(set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data); @@ -1483,7 +1483,7 @@ namespace xsimd // ssub template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1519,56 +1519,56 @@ namespace xsimd // store_aligned template - inline void store_aligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(float* mem, batch const& self, requires_arch) noexcept { return _mm_store_ps(mem, self); } template ::value, void>::type> - inline void store_aligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch const& self, requires_arch) noexcept { return _mm_store_si128((__m128i*)mem, self); } template ::value, void>::type> - inline void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept { return _mm_store_si128((__m128i*)mem, self); } template - inline void store_aligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(double* mem, batch const& self, requires_arch) noexcept { return _mm_store_pd(mem, self); } // store_unaligned template - inline void store_unaligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(float* mem, batch const& self, requires_arch) noexcept { return _mm_storeu_ps(mem, self); } template ::value, void>::type> - inline void store_unaligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch const& self, requires_arch) noexcept { return _mm_storeu_si128((__m128i*)mem, self); } template ::value, void>::type> - inline void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept { return _mm_storeu_si128((__m128i*)mem, self); } template - inline void store_unaligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(double* mem, batch const& self, requires_arch) noexcept { return _mm_storeu_pd(mem, self); } // sub template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return _mm_sub_ps(self, other); } template ::value, void>::type> - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1593,7 +1593,7 @@ namespace xsimd } } template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return _mm_sub_pd(self, other); } @@ -1601,53 +1601,53 @@ namespace xsimd // swizzle template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr uint32_t index = detail::shuffle(V0, V1, V2, V3); return _mm_shuffle_ps(self, self, index); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr uint32_t index = detail::shuffle(V0, V1); return _mm_shuffle_pd(self, self, index); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr uint32_t index = detail::shuffle(2 * V0, 2 * V0 + 1, 2 * V1, 2 * V1 + 1); return _mm_shuffle_epi32(self, index); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, sse2 {})); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr uint32_t index = detail::shuffle(V0, V1, V2, V3); return _mm_shuffle_epi32(self, index); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, sse2 {})); } // zip_hi template - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { return _mm_unpackhi_ps(self, other); } template ::value, void>::type> - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1672,19 +1672,19 @@ namespace xsimd } } template - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { return _mm_unpackhi_pd(self, other); } // zip_lo template - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { return _mm_unpacklo_ps(self, other); } template ::value, void>::type> - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1709,7 +1709,7 @@ namespace xsimd } } template - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { return _mm_unpacklo_pd(self, other); } diff --git a/include/xsimd/arch/xsimd_sse3.hpp b/include/xsimd/arch/xsimd_sse3.hpp index ccc049795..ffdc5bc9f 100644 --- a/include/xsimd/arch/xsimd_sse3.hpp +++ b/include/xsimd/arch/xsimd_sse3.hpp @@ -24,34 +24,34 @@ namespace xsimd // haddp template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { return _mm_hadd_ps(_mm_hadd_ps(row[0], row[1]), _mm_hadd_ps(row[2], row[3])); } template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { return _mm_hadd_pd(row[0], row[1]); } // load_unaligned template ::value, void>::type> - inline batch load_unaligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept { return _mm_lddqu_si128((__m128i const*)mem); } // reduce_add template - inline float reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& self, requires_arch) noexcept { __m128 tmp0 = _mm_hadd_ps(self, self); __m128 tmp1 = _mm_hadd_ps(tmp0, tmp0); return _mm_cvtss_f32(tmp1); } template - inline double reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE double reduce_add(batch const& self, requires_arch) noexcept { __m128d tmp0 = _mm_hadd_pd(self, self); return _mm_cvtsd_f64(tmp0); diff --git a/include/xsimd/arch/xsimd_sse4_1.hpp b/include/xsimd/arch/xsimd_sse4_1.hpp index 18cfe38ce..7fce2c314 100644 --- a/include/xsimd/arch/xsimd_sse4_1.hpp +++ b/include/xsimd/arch/xsimd_sse4_1.hpp @@ -24,18 +24,18 @@ namespace xsimd using namespace types; // any template ::value, void>::type> - inline bool any(batch const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch const& self, requires_arch) noexcept { return !_mm_testz_si128(self, self); } // ceil template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return _mm_ceil_ps(self); } template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return _mm_ceil_pd(self); } @@ -44,7 +44,7 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx __m128i xH = _mm_srai_epi32(x, 16); @@ -56,7 +56,7 @@ namespace xsimd } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx __m128i xH = _mm_srli_epi64(x, 32); @@ -69,7 +69,7 @@ namespace xsimd // eq template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 8) { @@ -83,19 +83,19 @@ namespace xsimd // floor template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return _mm_floor_ps(self); } template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return _mm_floor_pd(self); } // insert template ::value, void>::type> - inline batch insert(batch const& self, T val, index pos, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index pos, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -124,7 +124,7 @@ namespace xsimd // max template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -168,7 +168,7 @@ namespace xsimd // min template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -212,7 +212,7 @@ namespace xsimd // mul template ::value, void>::type> - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -247,12 +247,12 @@ namespace xsimd // nearbyint template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return _mm_round_ps(self, _MM_FROUND_TO_NEAREST_INT); } template - inline batch nearbyint(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& self, requires_arch) noexcept { return _mm_round_pd(self, _MM_FROUND_TO_NEAREST_INT); } @@ -261,30 +261,30 @@ namespace xsimd namespace detail { template - inline constexpr T interleave(T const& cond) noexcept + XSIMD_INLINE constexpr T interleave(T const& cond) noexcept { return (((cond * 0x0101010101010101ULL & 0x8040201008040201ULL) * 0x0102040810204081ULL >> 49) & 0x5555) | (((cond * 0x0101010101010101ULL & 0x8040201008040201ULL) * 0x0102040810204081ULL >> 48) & 0xAAAA); } } template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm_blendv_epi8(false_br, true_br, cond); } template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm_blendv_ps(false_br, true_br, cond); } template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return _mm_blendv_pd(false_br, true_br, cond); } template ::value, void>::type> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { constexpr int mask = batch_bool_constant::mask(); XSIMD_IF_CONSTEXPR(sizeof(T) == 2) @@ -308,13 +308,13 @@ namespace xsimd } } template - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { constexpr int mask = batch_bool_constant::mask(); return _mm_blend_ps(false_br, true_br, mask); } template - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { constexpr int mask = batch_bool_constant::mask(); return _mm_blend_pd(false_br, true_br, mask); @@ -322,12 +322,12 @@ namespace xsimd // trunc template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return _mm_round_ps(self, _MM_FROUND_TO_ZERO); } template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return _mm_round_pd(self, _MM_FROUND_TO_ZERO); } diff --git a/include/xsimd/arch/xsimd_sse4_2.hpp b/include/xsimd/arch/xsimd_sse4_2.hpp index 8f9b7a76e..5265182f9 100644 --- a/include/xsimd/arch/xsimd_sse4_2.hpp +++ b/include/xsimd/arch/xsimd_sse4_2.hpp @@ -25,12 +25,12 @@ namespace xsimd // lt template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return _mm_cmpgt_epi64(other, self); } template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { auto xself = _mm_xor_si128(self, _mm_set1_epi64x(std::numeric_limits::lowest())); auto xother = _mm_xor_si128(other, _mm_set1_epi64x(std::numeric_limits::lowest())); diff --git a/include/xsimd/arch/xsimd_ssse3.hpp b/include/xsimd/arch/xsimd_ssse3.hpp index d4c0b171c..9424d4ada 100644 --- a/include/xsimd/arch/xsimd_ssse3.hpp +++ b/include/xsimd/arch/xsimd_ssse3.hpp @@ -27,7 +27,7 @@ namespace xsimd // abs template ::value && std::is_signed::value, void>::type> - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -57,13 +57,13 @@ namespace xsimd { template - inline batch extract_pair(batch const&, batch const& other, std::size_t, ::xsimd::detail::index_sequence<>) noexcept + XSIMD_INLINE batch extract_pair(batch const&, batch const& other, std::size_t, ::xsimd::detail::index_sequence<>) noexcept { return other; } template - inline batch extract_pair(batch const& self, batch const& other, std::size_t i, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch extract_pair(batch const& self, batch const& other, std::size_t i, ::xsimd::detail::index_sequence) noexcept { if (i == I) { @@ -75,7 +75,7 @@ namespace xsimd } template ::value, void>::type> - inline batch extract_pair(batch const& self, batch const& other, std::size_t i, requires_arch) noexcept + XSIMD_INLINE batch extract_pair(batch const& self, batch const& other, std::size_t i, requires_arch) noexcept { constexpr std::size_t size = batch::size; assert(0 <= i && i < size && "index in bounds"); @@ -84,7 +84,7 @@ namespace xsimd // reduce_add template ::value, void>::type> - inline T reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 2) { @@ -107,30 +107,30 @@ namespace xsimd // rotate_right template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { return _mm_alignr_epi8(self, self, N); } template - inline batch rotate_right(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& self, requires_arch) noexcept { return bitwise_cast(rotate_right(bitwise_cast(self), ssse3 {})); } // swizzle (dynamic mask) template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm_shuffle_epi8(self, mask); } template - inline batch swizzle(batch const& self, batch mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch mask, requires_arch) noexcept { return _mm_shuffle_epi8(self, mask); } template - inline typename std::enable_if::value, batch>::type + XSIMD_INLINE typename std::enable_if::value, batch>::type swizzle(batch const& self, batch mask, requires_arch) noexcept { constexpr auto pikes = static_cast>(0x0706050403020100ul); @@ -140,7 +140,7 @@ namespace xsimd // swizzle (constant mask) template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { constexpr batch_constant @@ -149,21 +149,21 @@ namespace xsimd } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, ssse3 {})); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), ssse3 {}); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return swizzle(self, mask.as_batch(), ssse3 {}); } diff --git a/include/xsimd/arch/xsimd_sve.hpp b/include/xsimd/arch/xsimd_sve.hpp index 553f026cc..1586b8e0b 100644 --- a/include/xsimd/arch/xsimd_sve.hpp +++ b/include/xsimd/arch/xsimd_sve.hpp @@ -31,22 +31,22 @@ namespace xsimd using xsimd::types::detail::sve_vector_type; // predicate creation - inline svbool_t sve_ptrue_impl(index<1>) noexcept { return svptrue_b8(); } - inline svbool_t sve_ptrue_impl(index<2>) noexcept { return svptrue_b16(); } - inline svbool_t sve_ptrue_impl(index<4>) noexcept { return svptrue_b32(); } - inline svbool_t sve_ptrue_impl(index<8>) noexcept { return svptrue_b64(); } + XSIMD_INLINE svbool_t sve_ptrue_impl(index<1>) noexcept { return svptrue_b8(); } + XSIMD_INLINE svbool_t sve_ptrue_impl(index<2>) noexcept { return svptrue_b16(); } + XSIMD_INLINE svbool_t sve_ptrue_impl(index<4>) noexcept { return svptrue_b32(); } + XSIMD_INLINE svbool_t sve_ptrue_impl(index<8>) noexcept { return svptrue_b64(); } template svbool_t sve_ptrue() noexcept { return sve_ptrue_impl(index {}); } // count active lanes in a predicate - inline uint64_t sve_pcount_impl(svbool_t p, index<1>) noexcept { return svcntp_b8(p, p); } - inline uint64_t sve_pcount_impl(svbool_t p, index<2>) noexcept { return svcntp_b16(p, p); } - inline uint64_t sve_pcount_impl(svbool_t p, index<4>) noexcept { return svcntp_b32(p, p); } - inline uint64_t sve_pcount_impl(svbool_t p, index<8>) noexcept { return svcntp_b64(p, p); } + XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<1>) noexcept { return svcntp_b8(p, p); } + XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<2>) noexcept { return svcntp_b16(p, p); } + XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<4>) noexcept { return svcntp_b32(p, p); } + XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<8>) noexcept { return svcntp_b64(p, p); } template - inline uint64_t sve_pcount(svbool_t p) noexcept { return sve_pcount_impl(p, index {}); } + XSIMD_INLINE uint64_t sve_pcount(svbool_t p) noexcept { return sve_pcount_impl(p, index {}); } // enable for signed integers template @@ -84,20 +84,20 @@ namespace xsimd } template = 0> - inline batch load_aligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* src, convert, requires_arch) noexcept { return svld1(detail::sve_ptrue(), reinterpret_cast const*>(src)); } template = 0> - inline batch load_unaligned(T const* src, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* src, convert, requires_arch) noexcept { return load_aligned(src, convert(), sve {}); } // load_complex template = 0> - inline batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_aligned(std::complex const* mem, convert>, requires_arch) noexcept { const T* buf = reinterpret_cast(mem); const auto tmp = svld2(detail::sve_ptrue(), buf); @@ -107,7 +107,7 @@ namespace xsimd } template = 0> - inline batch, A> load_complex_unaligned(std::complex const* mem, convert>, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex_unaligned(std::complex const* mem, convert>, requires_arch) noexcept { return load_complex_aligned(mem, convert> {}, sve {}); } @@ -117,20 +117,20 @@ namespace xsimd *********/ template = 0> - inline void store_aligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* dst, batch const& src, requires_arch) noexcept { svst1(detail::sve_ptrue(), reinterpret_cast*>(dst), src); } template = 0> - inline void store_unaligned(T* dst, batch const& src, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* dst, batch const& src, requires_arch) noexcept { store_aligned(dst, src, sve {}); } // store_complex template = 0> - inline void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_aligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { using v2type = typename std::conditional<(sizeof(T) == 4), svfloat32x2_t, svfloat64x2_t>::type; v2type tmp {}; @@ -141,7 +141,7 @@ namespace xsimd } template = 0> - inline void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept + XSIMD_INLINE void store_complex_unaligned(std::complex* dst, batch, A> const& src, requires_arch) noexcept { store_complex_aligned(dst, src, sve {}); } @@ -158,14 +158,14 @@ namespace xsimd // scatter template = 0> - inline void scatter(batch const& src, T* dst, batch const& index, kernel::requires_arch) noexcept + XSIMD_INLINE void scatter(batch const& src, T* dst, batch const& index, kernel::requires_arch) noexcept { svst1_scatter_index(detail::sve_ptrue(), dst, index.data, src.data); } // gather template = 0> - inline batch gather(batch const&, T const* src, batch const& index, kernel::requires_arch) noexcept + XSIMD_INLINE batch gather(batch const&, T const* src, batch const& index, kernel::requires_arch) noexcept { return svld1_gather_index(detail::sve_ptrue(), src, index.data); } @@ -176,67 +176,67 @@ namespace xsimd // broadcast template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_u8(uint8_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_s8(int8_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_u16(uint16_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_s16(int16_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_u32(uint32_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_s32(int32_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_u64(uint64_t(arg)); } template = 0> - inline batch broadcast(T arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T arg, requires_arch) noexcept { return svdup_n_s64(int64_t(arg)); } template - inline batch broadcast(float arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(float arg, requires_arch) noexcept { return svdup_n_f32(arg); } template - inline batch broadcast(double arg, requires_arch) noexcept + XSIMD_INLINE batch broadcast(double arg, requires_arch) noexcept { return svdup_n_f64(arg); } template = 0> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { return broadcast(val, sve {}); } @@ -247,128 +247,128 @@ namespace xsimd // add template = 0> - inline batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svadd_x(detail::sve_ptrue(), lhs, rhs); } // sadd template = 0> - inline batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svqadd(lhs, rhs); } // sub template = 0> - inline batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svsub_x(detail::sve_ptrue(), lhs, rhs); } // ssub template = 0> - inline batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svqsub(lhs, rhs); } // mul template = 0> - inline batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svmul_x(detail::sve_ptrue(), lhs, rhs); } // div template = 4, int>::type = 0> - inline batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svdiv_x(detail::sve_ptrue(), lhs, rhs); } // max template = 0> - inline batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svmax_x(detail::sve_ptrue(), lhs, rhs); } // min template = 0> - inline batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svmin_x(detail::sve_ptrue(), lhs, rhs); } // neg template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return svreinterpret_u8(svneg_x(detail::sve_ptrue(), svreinterpret_s8(arg))); } template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return svreinterpret_u16(svneg_x(detail::sve_ptrue(), svreinterpret_s16(arg))); } template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return svreinterpret_u32(svneg_x(detail::sve_ptrue(), svreinterpret_s32(arg))); } template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return svreinterpret_u64(svneg_x(detail::sve_ptrue(), svreinterpret_s64(arg))); } template = 0> - inline batch neg(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& arg, requires_arch) noexcept { return svneg_x(detail::sve_ptrue(), arg); } // abs template = 0> - inline batch abs(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return arg; } template = 0> - inline batch abs(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& arg, requires_arch) noexcept { return svabs_x(detail::sve_ptrue(), arg); } // fma: x * y + z template = 0> - inline batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return svmad_x(detail::sve_ptrue(), x, y, z); } // fnma: z - x * y template = 0> - inline batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return svmsb_x(detail::sve_ptrue(), x, y, z); } // fms: x * y - z template = 0> - inline batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return -fnma(x, y, z, sve {}); } // fnms: - x * y - z template = 0> - inline batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z, requires_arch) noexcept { return -fma(x, y, z, sve {}); } @@ -379,13 +379,13 @@ namespace xsimd // bitwise_and template = 0> - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svand_x(detail::sve_ptrue(), lhs, rhs); } template - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u32(lhs); const auto rhs_bits = svreinterpret_u32(rhs); @@ -394,7 +394,7 @@ namespace xsimd } template - inline batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u64(lhs); const auto rhs_bits = svreinterpret_u64(rhs); @@ -403,20 +403,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return svand_z(detail::sve_ptrue(), lhs, rhs); } // bitwise_andnot template = 0> - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svbic_x(detail::sve_ptrue(), lhs, rhs); } template - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u32(lhs); const auto rhs_bits = svreinterpret_u32(rhs); @@ -425,7 +425,7 @@ namespace xsimd } template - inline batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u64(lhs); const auto rhs_bits = svreinterpret_u64(rhs); @@ -434,20 +434,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return svbic_z(detail::sve_ptrue(), lhs, rhs); } // bitwise_or template = 0> - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svorr_x(detail::sve_ptrue(), lhs, rhs); } template - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u32(lhs); const auto rhs_bits = svreinterpret_u32(rhs); @@ -456,7 +456,7 @@ namespace xsimd } template - inline batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u64(lhs); const auto rhs_bits = svreinterpret_u64(rhs); @@ -465,20 +465,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return svorr_z(detail::sve_ptrue(), lhs, rhs); } // bitwise_xor template = 0> - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { return sveor_x(detail::sve_ptrue(), lhs, rhs); } template - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u32(lhs); const auto rhs_bits = svreinterpret_u32(rhs); @@ -487,7 +487,7 @@ namespace xsimd } template - inline batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& lhs, batch const& rhs, requires_arch) noexcept { const auto lhs_bits = svreinterpret_u64(lhs); const auto rhs_bits = svreinterpret_u64(rhs); @@ -496,20 +496,20 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return sveor_z(detail::sve_ptrue(), lhs, rhs); } // bitwise_not template = 0> - inline batch bitwise_not(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { return svnot_x(detail::sve_ptrue(), arg); } template - inline batch bitwise_not(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { const auto arg_bits = svreinterpret_u32(arg); const auto result_bits = svnot_x(detail::sve_ptrue(), arg_bits); @@ -517,7 +517,7 @@ namespace xsimd } template - inline batch bitwise_not(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& arg, requires_arch) noexcept { const auto arg_bits = svreinterpret_u64(arg); const auto result_bits = svnot_x(detail::sve_ptrue(), arg_bits); @@ -525,7 +525,7 @@ namespace xsimd } template = 0> - inline batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& arg, requires_arch) noexcept { return svnot_z(detail::sve_ptrue(), arg); } @@ -537,31 +537,31 @@ namespace xsimd namespace detail { template - inline batch sve_to_unsigned_batch_impl(batch const& arg, index<1>) noexcept + XSIMD_INLINE batch sve_to_unsigned_batch_impl(batch const& arg, index<1>) noexcept { return svreinterpret_u8(arg); } template - inline batch sve_to_unsigned_batch_impl(batch const& arg, index<2>) noexcept + XSIMD_INLINE batch sve_to_unsigned_batch_impl(batch const& arg, index<2>) noexcept { return svreinterpret_u16(arg); } template - inline batch sve_to_unsigned_batch_impl(batch const& arg, index<4>) noexcept + XSIMD_INLINE batch sve_to_unsigned_batch_impl(batch const& arg, index<4>) noexcept { return svreinterpret_u32(arg); } template - inline batch sve_to_unsigned_batch_impl(batch const& arg, index<8>) noexcept + XSIMD_INLINE batch sve_to_unsigned_batch_impl(batch const& arg, index<8>) noexcept { return svreinterpret_u64(arg); } template > - inline batch sve_to_unsigned_batch(batch const& arg) noexcept + XSIMD_INLINE batch sve_to_unsigned_batch(batch const& arg) noexcept { return sve_to_unsigned_batch_impl(arg, index {}); } @@ -569,7 +569,7 @@ namespace xsimd // bitwise_lshift template = 0> - inline batch bitwise_lshift(batch const& arg, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& arg, int n, requires_arch) noexcept { constexpr std::size_t size = sizeof(typename batch::value_type) * 8; assert(0 <= n && static_cast(n) < size && "index in bounds"); @@ -577,14 +577,14 @@ namespace xsimd } template = 0> - inline batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svlsl_x(detail::sve_ptrue(), lhs, detail::sve_to_unsigned_batch(rhs)); } // bitwise_rshift template = 0> - inline batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept { constexpr std::size_t size = sizeof(typename batch::value_type) * 8; assert(0 <= n && static_cast(n) < size && "index in bounds"); @@ -592,13 +592,13 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svlsr_x(detail::sve_ptrue(), lhs, rhs); } template = 0> - inline batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& arg, int n, requires_arch) noexcept { constexpr std::size_t size = sizeof(typename batch::value_type) * 8; assert(0 <= n && static_cast(n) < size && "index in bounds"); @@ -606,7 +606,7 @@ namespace xsimd } template = 0> - inline batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svasr_x(detail::sve_ptrue(), lhs, detail::sve_to_unsigned_batch(rhs)); } @@ -617,7 +617,7 @@ namespace xsimd // reduce_add template ::value_type, detail::sve_enable_all_t = 0> - inline V reduce_add(batch const& arg, requires_arch) noexcept + XSIMD_INLINE V reduce_add(batch const& arg, requires_arch) noexcept { // sve integer reduction results are promoted to 64 bits return static_cast(svaddv(detail::sve_ptrue(), arg)); @@ -625,21 +625,21 @@ namespace xsimd // reduce_max template = 0> - inline T reduce_max(batch const& arg, requires_arch) noexcept + XSIMD_INLINE T reduce_max(batch const& arg, requires_arch) noexcept { return svmaxv(detail::sve_ptrue(), arg); } // reduce_min template = 0> - inline T reduce_min(batch const& arg, requires_arch) noexcept + XSIMD_INLINE T reduce_min(batch const& arg, requires_arch) noexcept { return svminv(detail::sve_ptrue(), arg); } // haddp template = 0> - inline batch haddp(const batch* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(const batch* row, requires_arch) noexcept { constexpr std::size_t size = batch::size; T sums[size]; @@ -656,13 +656,13 @@ namespace xsimd // eq template = 0> - inline batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpeq(detail::sve_ptrue(), lhs, rhs); } template = 0> - inline batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { const auto neq_result = sveor_z(detail::sve_ptrue(), lhs, rhs); return svnot_z(detail::sve_ptrue(), neq_result); @@ -670,41 +670,41 @@ namespace xsimd // neq template = 0> - inline batch_bool neq(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpne(detail::sve_ptrue(), lhs, rhs); } template = 0> - inline batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& lhs, batch_bool const& rhs, requires_arch) noexcept { return sveor_z(detail::sve_ptrue(), lhs, rhs); } // lt template = 0> - inline batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmplt(detail::sve_ptrue(), lhs, rhs); } // le template = 0> - inline batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmple(detail::sve_ptrue(), lhs, rhs); } // gt template = 0> - inline batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpgt(detail::sve_ptrue(), lhs, rhs); } // ge template = 0> - inline batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svcmpge(detail::sve_ptrue(), lhs, rhs); } @@ -715,22 +715,22 @@ namespace xsimd // rotate_right template = 0> - inline batch rotate_right(batch const& a, requires_arch) noexcept + XSIMD_INLINE batch rotate_right(batch const& a, requires_arch) noexcept { return svext(a, a, N); } // swizzle (dynamic) template - inline batch swizzle(batch const& arg, batch indices, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& arg, batch indices, requires_arch) noexcept { return svtbl(arg, indices); } template - inline batch, A> swizzle(batch, A> const& self, - batch indices, - requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& self, + batch indices, + requires_arch) noexcept { const auto real = swizzle(self.real(), indices, sve {}); const auto imag = swizzle(self.imag(), indices, sve {}); @@ -739,16 +739,16 @@ namespace xsimd // swizzle (static) template - inline batch swizzle(batch const& arg, batch_constant indices, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& arg, batch_constant indices, requires_arch) noexcept { static_assert(batch::size == sizeof...(idx), "invalid swizzle indices"); return swizzle(arg, indices.as_batch(), sve {}); } template - inline batch, A> swizzle(batch, A> const& arg, - batch_constant indices, - requires_arch) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& arg, + batch_constant indices, + requires_arch) noexcept { static_assert(batch, A>::size == sizeof...(idx), "invalid swizzle indices"); return swizzle(arg, indices.as_batch(), sve {}); @@ -762,14 +762,14 @@ namespace xsimd namespace detail { template - inline batch sve_extract_pair(batch const&, batch const& /*rhs*/, std::size_t, ::xsimd::detail::index_sequence<>) noexcept + XSIMD_INLINE batch sve_extract_pair(batch const&, batch const& /*rhs*/, std::size_t, ::xsimd::detail::index_sequence<>) noexcept { assert(false && "extract_pair out of bounds"); return batch {}; } template - inline batch sve_extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept + XSIMD_INLINE batch sve_extract_pair(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence) noexcept { if (n == I) { @@ -782,7 +782,7 @@ namespace xsimd } template - inline batch sve_extract_pair_impl(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence<0, Is...>) noexcept + XSIMD_INLINE batch sve_extract_pair_impl(batch const& lhs, batch const& rhs, std::size_t n, ::xsimd::detail::index_sequence<0, Is...>) noexcept { if (n == 0) { @@ -796,7 +796,7 @@ namespace xsimd } template = 0> - inline batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept + XSIMD_INLINE batch extract_pair(batch const& lhs, batch const& rhs, std::size_t n, requires_arch) noexcept { constexpr std::size_t size = batch::size; assert(n < size && "index in bounds"); @@ -805,27 +805,27 @@ namespace xsimd // select template = 0> - inline batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& a, batch const& b, requires_arch) noexcept { return svsel(cond, a, b); } template - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { b... }, true_br, false_br, sve {}); } // zip_lo template = 0> - inline batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svzip1(lhs, rhs); } // zip_hi template = 0> - inline batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& lhs, batch const& rhs, requires_arch) noexcept { return svzip2(lhs, rhs); } @@ -836,21 +836,21 @@ namespace xsimd // rsqrt template = 0> - inline batch rsqrt(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& arg, requires_arch) noexcept { return svrsqrte(arg); } // sqrt template = 0> - inline batch sqrt(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& arg, requires_arch) noexcept { return svsqrt_x(detail::sve_ptrue(), arg); } // reciprocal template = 0> - inline batch reciprocal(const batch& arg, requires_arch) noexcept + XSIMD_INLINE batch reciprocal(const batch& arg, requires_arch) noexcept { return svrecpe(arg); } @@ -863,37 +863,37 @@ namespace xsimd namespace detail { template = 0> - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return svcvt_f32_x(detail::sve_ptrue(), arg); } template = 0> - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return svcvt_f64_x(detail::sve_ptrue(), arg); } template - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return svcvt_s32_x(detail::sve_ptrue(), arg); } template - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return svcvt_u32_x(detail::sve_ptrue(), arg); } template - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return svcvt_s64_x(detail::sve_ptrue(), arg); } template - inline batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& arg, batch const&, requires_arch) noexcept { return svcvt_u64_x(detail::sve_ptrue(), arg); } @@ -905,21 +905,21 @@ namespace xsimd // set template - inline batch set(batch const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Args... args) noexcept { return detail::sve_vector_type { args... }; } template - inline batch, A> set(batch, A> const&, requires_arch, - Args... args_complex) noexcept + XSIMD_INLINE batch, A> set(batch, A> const&, requires_arch, + Args... args_complex) noexcept { return batch>(detail::sve_vector_type { args_complex.real()... }, detail::sve_vector_type { args_complex.imag()... }); } template - inline batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Args... args) noexcept { using U = as_unsigned_integer_t; const auto values = detail::sve_vector_type { static_cast(args)... }; @@ -931,17 +931,17 @@ namespace xsimd namespace detail { // generate index sequence (iota) - inline svuint8_t sve_iota_impl(index<1>) noexcept { return svindex_u8(0, 1); } - inline svuint16_t sve_iota_impl(index<2>) noexcept { return svindex_u16(0, 1); } - inline svuint32_t sve_iota_impl(index<4>) noexcept { return svindex_u32(0, 1); } - inline svuint64_t sve_iota_impl(index<8>) noexcept { return svindex_u64(0, 1); } + XSIMD_INLINE svuint8_t sve_iota_impl(index<1>) noexcept { return svindex_u8(0, 1); } + XSIMD_INLINE svuint16_t sve_iota_impl(index<2>) noexcept { return svindex_u16(0, 1); } + XSIMD_INLINE svuint32_t sve_iota_impl(index<4>) noexcept { return svindex_u32(0, 1); } + XSIMD_INLINE svuint64_t sve_iota_impl(index<8>) noexcept { return svindex_u64(0, 1); } template >> - inline V sve_iota() noexcept { return sve_iota_impl(index {}); } + XSIMD_INLINE V sve_iota() noexcept { return sve_iota_impl(index {}); } } // namespace detail template = 0> - inline batch insert(batch const& arg, T val, index, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& arg, T val, index, requires_arch) noexcept { // create a predicate with only the I-th lane activated const auto iota = detail::sve_iota(); @@ -951,89 +951,89 @@ namespace xsimd // all template = 0> - inline bool all(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& arg, requires_arch) noexcept { return detail::sve_pcount(arg) == batch_bool::size; } // any template = 0> - inline bool any(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& arg, requires_arch) noexcept { return svptest_any(arg, arg); } // bitwise_cast template = 0, detail::enable_sized_unsigned_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u8(arg); } template = 0, detail::enable_sized_signed_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s8(arg); } template = 0, detail::enable_sized_unsigned_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u16(arg); } template = 0, detail::enable_sized_signed_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s16(arg); } template = 0, detail::enable_sized_unsigned_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u32(arg); } template = 0, detail::enable_sized_signed_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s32(arg); } template = 0, detail::enable_sized_unsigned_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_u64(arg); } template = 0, detail::enable_sized_signed_t = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_s64(arg); } template = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_f32(arg); } template = 0> - inline batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& arg, batch const&, requires_arch) noexcept { return svreinterpret_f64(arg); } // batch_bool_cast template = 0> - inline batch_bool batch_bool_cast(batch_bool const& arg, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& arg, batch_bool const&, requires_arch) noexcept { return arg.data; } // from_bool template = 0> - inline batch from_bool(batch_bool const& arg, requires_arch) noexcept + XSIMD_INLINE batch from_bool(batch_bool const& arg, requires_arch) noexcept { return select(arg, batch(1), batch(0)); } @@ -1045,7 +1045,7 @@ namespace xsimd struct sve_slider_left { template - inline batch operator()(batch const& arg) noexcept + XSIMD_INLINE batch operator()(batch const& arg) noexcept { using u8_vector = batch; const auto left = svdup_n_u8(0); @@ -1059,7 +1059,7 @@ namespace xsimd struct sve_slider_left<0> { template - inline batch operator()(batch const& arg) noexcept + XSIMD_INLINE batch operator()(batch const& arg) noexcept { return arg; } @@ -1067,7 +1067,7 @@ namespace xsimd } // namespace detail template = 0> - inline batch slide_left(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& arg, requires_arch) noexcept { return detail::sve_slider_left()(arg); } @@ -1079,7 +1079,7 @@ namespace xsimd struct sve_slider_right { template - inline batch operator()(batch const& arg) noexcept + XSIMD_INLINE batch operator()(batch const& arg) noexcept { using u8_vector = batch; const auto left = bitwise_cast(arg, u8_vector {}, sve {}).data; @@ -1093,7 +1093,7 @@ namespace xsimd struct sve_slider_right::size> { template - inline batch operator()(batch const&) noexcept + XSIMD_INLINE batch operator()(batch const&) noexcept { return batch {}; } @@ -1101,35 +1101,35 @@ namespace xsimd } // namespace detail template = 0> - inline batch slide_right(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& arg, requires_arch) noexcept { return detail::sve_slider_right()(arg); } // isnan template = 0> - inline batch_bool isnan(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& arg, requires_arch) noexcept { return !(arg == arg); } // nearbyint template = 0> - inline batch nearbyint(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch nearbyint(batch const& arg, requires_arch) noexcept { return svrintx_x(detail::sve_ptrue(), arg); } // nearbyint_as_int template - inline batch nearbyint_as_int(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& arg, requires_arch) noexcept { const auto nearest = svrintx_x(detail::sve_ptrue(), arg); return svcvt_s32_x(detail::sve_ptrue(), nearest); } template - inline batch nearbyint_as_int(batch const& arg, requires_arch) noexcept + XSIMD_INLINE batch nearbyint_as_int(batch const& arg, requires_arch) noexcept { const auto nearest = svrintx_x(detail::sve_ptrue(), arg); return svcvt_s64_x(detail::sve_ptrue(), nearest); @@ -1137,7 +1137,7 @@ namespace xsimd // ldexp template = 0> - inline batch ldexp(const batch& x, const batch, A>& exp, requires_arch) noexcept + XSIMD_INLINE batch ldexp(const batch& x, const batch, A>& exp, requires_arch) noexcept { return svscale_x(detail::sve_ptrue(), x, exp); } diff --git a/include/xsimd/arch/xsimd_wasm.hpp b/include/xsimd/arch/xsimd_wasm.hpp index 050b1a08f..5316cce35 100644 --- a/include/xsimd/arch/xsimd_wasm.hpp +++ b/include/xsimd/arch/xsimd_wasm.hpp @@ -23,7 +23,7 @@ namespace xsimd struct batch_bool_constant; template - inline batch bitwise_cast(batch const& x) noexcept; + XSIMD_INLINE batch bitwise_cast(batch const& x) noexcept; template struct batch_constant; @@ -34,15 +34,15 @@ namespace xsimd // fwd template - inline batch insert(batch const& self, T val, index, requires_arch) noexcept; + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept; template - inline batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept; + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept; template - inline batch avg(batch const&, batch const&, requires_arch) noexcept; + XSIMD_INLINE batch avg(batch const&, batch const&, requires_arch) noexcept; // abs template ::value && std::is_signed::value, void>::type> - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -68,20 +68,20 @@ namespace xsimd } template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { return wasm_f32x4_abs(self); } template - inline batch abs(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept { return wasm_f64x2_abs(self); } // add template ::value, void>::type> - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -107,20 +107,20 @@ namespace xsimd } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_add(self, other); } template - inline batch add(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_add(self, other); } // avgr template ::value, void>::type> - inline batch avgr(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -138,7 +138,7 @@ namespace xsimd // avg template ::value, void>::type> - inline batch avg(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -158,94 +158,94 @@ namespace xsimd // all template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return wasm_i32x4_bitmask(self) == 0x0F; } template - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return wasm_i64x2_bitmask(self) == 0x03; } template ::value, void>::type> - inline bool all(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept { return wasm_i8x16_bitmask(self) == 0xFFFF; } // any template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return wasm_i32x4_bitmask(self) != 0; } template - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return wasm_i64x2_bitmask(self) != 0; } template ::value, void>::type> - inline bool any(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept { return wasm_i8x16_bitmask(self) != 0; } // batch_bool_cast template - inline batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept { return { bitwise_cast(batch(self.data)).data }; } // bitwise_and template - inline batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept { return wasm_v128_and(self, other); } template - inline batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_v128_and(self, other); } // bitwise_andnot template - inline batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept { return wasm_v128_andnot(self, other); } template - inline batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_v128_andnot(self, other); } // bitwise_cast template - inline batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept { return batch(self.data); } // bitwise_or template - inline batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept { return wasm_v128_or(self, other); } template - inline batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_v128_or(self, other); } // bitwise_lshift template ::value, void>::type> - inline batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -272,7 +272,7 @@ namespace xsimd // bitwise_rshift template ::value, void>::type> - inline batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept { if (std::is_signed::value) { @@ -326,38 +326,38 @@ namespace xsimd // bitwise_not template - inline batch bitwise_not(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept { return wasm_v128_not(self); } template - inline batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept { return wasm_v128_not(self); } // bitwise_xor template - inline batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept { return wasm_v128_xor(self, other); } template - inline batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_v128_xor(self, other); } // broadcast template - batch inline broadcast(float val, requires_arch) noexcept + batch XSIMD_INLINE broadcast(float val, requires_arch) noexcept { return wasm_f32x4_splat(val); } template ::value, void>::type> - inline batch broadcast(T val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -382,48 +382,48 @@ namespace xsimd } } template - inline batch broadcast(double val, requires_arch) noexcept + XSIMD_INLINE batch broadcast(double val, requires_arch) noexcept { return wasm_f64x2_splat(val); } // ceil template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return wasm_f32x4_ceil(self); } template - inline batch ceil(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch ceil(batch const& self, requires_arch) noexcept { return wasm_f64x2_ceil(self); } // div template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_div(self, other); } template - inline batch div(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_div(self, other); } // eq template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_eq(self, other); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_i32x4_eq(self, other); } template ::value, void>::type> - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -448,7 +448,7 @@ namespace xsimd } } template ::value, void>::type> - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -473,12 +473,12 @@ namespace xsimd } } template - inline batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_eq(self, other); } template - inline batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_i64x2_eq(self, other); } @@ -487,13 +487,13 @@ namespace xsimd namespace detail { template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return wasm_f32x4_convert_i32x4(self); } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx // adapted to wasm @@ -506,7 +506,7 @@ namespace xsimd } template - inline batch fast_cast(batch const& x, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& x, batch const&, requires_arch) noexcept { // from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx // adapted to wasm @@ -520,7 +520,7 @@ namespace xsimd } template - inline batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept { return wasm_i32x4_make( static_cast(wasm_f32x4_extract_lane(self, 0)), @@ -532,20 +532,20 @@ namespace xsimd // floor template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return wasm_f32x4_floor(self); } template - inline batch floor(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch floor(batch const& self, requires_arch) noexcept { return wasm_f64x2_floor(self); } // from_mask template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint32_t lut[][4] = { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, @@ -569,7 +569,7 @@ namespace xsimd return wasm_v128_load((const v128_t*)lut[mask]); } template - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint64_t lut[][4] = { { 0x0000000000000000ul, 0x0000000000000000ul }, @@ -581,7 +581,7 @@ namespace xsimd return wasm_v128_load((const v128_t*)lut[mask]); } template ::value, void>::type> - inline batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept + XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept { alignas(A::alignment()) static const uint64_t lut64[] = { 0x0000000000000000, @@ -667,24 +667,24 @@ namespace xsimd // ge template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_ge(self, other); } template - inline batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_ge(self, other); } // gt template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_gt(self, other); } template ::value, void>::type> - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -732,14 +732,14 @@ namespace xsimd } template - inline batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_gt(self, other); } // haddp template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { v128_t tmp0 = wasm_i32x4_shuffle(row[0], row[1], 0, 4, 1, 5); v128_t tmp1 = wasm_i32x4_shuffle(row[0], row[1], 2, 6, 3, 7); @@ -752,7 +752,7 @@ namespace xsimd return wasm_f32x4_add(tmp0, tmp2); } template - inline batch haddp(batch const* row, requires_arch) noexcept + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept { return wasm_f64x2_add(wasm_i64x2_shuffle(row[0], row[1], 0, 2), wasm_i64x2_shuffle(row[0], row[1], 1, 3)); @@ -760,12 +760,12 @@ namespace xsimd // insert template - inline batch insert(batch const& self, float val, index pos, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, float val, index pos, requires_arch) noexcept { return wasm_f32x4_replace_lane(self, pos, val); } template ::value, void>::type> - inline batch insert(batch const& self, T val, index pos, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, T val, index pos, requires_arch) noexcept { if (std::is_signed::value) { @@ -818,48 +818,48 @@ namespace xsimd } template - inline batch insert(batch const& self, double val, index pos, requires_arch) noexcept + XSIMD_INLINE batch insert(batch const& self, double val, index pos, requires_arch) noexcept { return wasm_f64x2_replace_lane(self, pos, val); } // isnan template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return wasm_v128_or(wasm_f32x4_ne(self, self), wasm_f32x4_ne(self, self)); } template - inline batch_bool isnan(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept { return wasm_v128_or(wasm_f64x2_ne(self, self), wasm_f64x2_ne(self, self)); } // le template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_le(self, other); } template - inline batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_le(self, other); } // load_aligned template - inline batch load_aligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(float const* mem, convert, requires_arch) noexcept { return wasm_v128_load(mem); } template ::value, void>::type> - inline batch load_aligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept { return wasm_v128_load((v128_t const*)mem); } template - inline batch load_aligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(double const* mem, convert, requires_arch) noexcept { return wasm_v128_load(mem); } @@ -868,12 +868,12 @@ namespace xsimd namespace detail { template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { return { wasm_i32x4_shuffle(hi, lo, 0, 2, 4, 6), wasm_i32x4_shuffle(hi, lo, 1, 3, 5, 7) }; } template - inline batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept { return { wasm_i64x2_shuffle(hi, lo, 0, 2), wasm_i64x2_shuffle(hi, lo, 1, 3) }; } @@ -881,29 +881,29 @@ namespace xsimd // load_unaligned template - inline batch load_unaligned(float const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(float const* mem, convert, requires_arch) noexcept { return wasm_v128_load(mem); } template ::value, void>::type> - inline batch load_unaligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept { return wasm_v128_load((v128_t const*)mem); } template - inline batch load_unaligned(double const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(double const* mem, convert, requires_arch) noexcept { return wasm_v128_load(mem); } // lt template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_lt(self, other); } template ::value, void>::type> - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -964,14 +964,14 @@ namespace xsimd } template - inline batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_lt(self, other); } // mask template ::value, void>::type> - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -996,66 +996,66 @@ namespace xsimd } } template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return wasm_i32x4_bitmask(self); } template - inline uint64_t mask(batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE uint64_t mask(batch_bool const& self, requires_arch) noexcept { return wasm_i64x2_bitmask(self); } // max template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_pmax(self, other); } template ::value, void>::type> - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return select(self > other, self, other); } template - inline batch max(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_pmax(self, other); } // min template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_pmin(self, other); } template ::value, void>::type> - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return select(self <= other, self, other); } template - inline batch min(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_pmin(self, other); } // mul template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_mul(self, other); } template - inline batch mul(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_mul(self, other); } // neg template ::value, void>::type> - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1081,59 +1081,59 @@ namespace xsimd } template - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return wasm_f32x4_neg(self); } template - inline batch neg(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept { return wasm_f64x2_neg(self); } // neq template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_ne(self, other); } template ::value, void>::type> - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return ~(self == other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_f32x4_ne(self, other); } template ::value, void>::type> - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return ~(self == other); } template - inline batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_ne(self, other); } template - inline batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept { return wasm_f64x2_ne(self, other); } // reciprocal template - inline batch reciprocal(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch reciprocal(batch const& self, requires_arch) noexcept { v128_t one = wasm_f32x4_splat(1.0f); return wasm_f32x4_div(one, self); } template - inline batch reciprocal(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch reciprocal(batch const& self, requires_arch) noexcept { v128_t one = wasm_f64x2_splat(1.0); return wasm_f64x2_div(one, self); @@ -1141,7 +1141,7 @@ namespace xsimd // reduce_add template - inline float reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE float reduce_add(batch const& self, requires_arch) noexcept { v128_t tmp0 = wasm_f32x4_add(self, wasm_i32x4_shuffle(self, self, 6, 7, 2, 3)); v128_t tmp1 = wasm_i32x4_shuffle(tmp0, tmp0, 1, 0, 4, 4); @@ -1150,7 +1150,7 @@ namespace xsimd return wasm_f32x4_extract_lane(tmp3, 0); } template ::value, void>::type> - inline T reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 4) { @@ -1172,7 +1172,7 @@ namespace xsimd } } template - inline double reduce_add(batch const& self, requires_arch) noexcept + XSIMD_INLINE double reduce_add(batch const& self, requires_arch) noexcept { v128_t tmp0 = wasm_i64x2_shuffle(self, self, 1, 3); v128_t tmp1 = wasm_f64x2_add(self, tmp0); @@ -1182,13 +1182,13 @@ namespace xsimd // rsqrt template - inline batch rsqrt(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& self, requires_arch) noexcept { v128_t one = wasm_f32x4_splat(1.0f); return wasm_f32x4_div(one, wasm_f32x4_sqrt(self)); } template - inline batch rsqrt(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch rsqrt(batch const& self, requires_arch) noexcept { v128_t one = wasm_f64x2_splat(1.0); return wasm_f64x2_div(one, wasm_f64x2_sqrt(self)); @@ -1196,7 +1196,7 @@ namespace xsimd // slide_left template - inline batch slide_left(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept { return wasm_i8x16_shuffle( wasm_i64x2_const(0, 0), x, ((N) & 0xF0) ? 0 : 16 - ((N) & 0xF), @@ -1212,7 +1212,7 @@ namespace xsimd // slide_right template - inline batch slide_right(batch const& x, requires_arch) noexcept + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept { return wasm_i8x16_shuffle( x, wasm_i64x2_const(0, 0), ((N) & 0xF0) ? 16 : ((N) & 0xF) + 0, @@ -1228,7 +1228,7 @@ namespace xsimd // sadd template ::value, void>::type> - inline batch sadd(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1264,94 +1264,94 @@ namespace xsimd // select template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return wasm_v128_or(wasm_v128_and(cond, true_br), wasm_v128_andnot(false_br, cond)); } template ::value, void>::type> - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return wasm_v128_or(wasm_v128_and(cond, true_br), wasm_v128_andnot(false_br, cond)); } template ::value, void>::type> - inline batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept { return select(batch_bool { Values... }, true_br, false_br, wasm {}); } template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept { return wasm_v128_or(wasm_v128_and(cond, true_br), wasm_v128_andnot(false_br, cond)); } // shuffle template - inline batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept { return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3); } template - inline batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept { return wasm_i64x2_shuffle(x, y, I0, I1); } // set template - inline batch set(batch const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch::size, "consistent init"); return wasm_f32x4_make(values...); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1) noexcept { return wasm_i64x2_make(v0, v1); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3) noexcept { return wasm_i32x4_make(v0, v1, v2, v3); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept { return wasm_i16x8_make(v0, v1, v2, v3, v4, v5, v6, v7); } template ::value, void>::type> - inline batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept { return wasm_i8x16_make(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } template - inline batch set(batch const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch::size, "consistent init"); return wasm_f64x2_make(values...); } template ::value, void>::type> - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { return set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data; } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); return set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data; } template - inline batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept { static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); return set(batch(), A {}, static_cast(values ? -1LL : 0LL)...).data; @@ -1359,7 +1359,7 @@ namespace xsimd // ssub template ::value, void>::type> - inline batch ssub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept { if (std::is_signed::value) { @@ -1395,22 +1395,22 @@ namespace xsimd // store_aligned template - inline void store_aligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(float* mem, batch const& self, requires_arch) noexcept { return wasm_v128_store(mem, self); } template ::value, void>::type> - inline void store_aligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch const& self, requires_arch) noexcept { return wasm_v128_store((v128_t*)mem, self); } template ::value, void>::type> - inline void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch_bool const& self, requires_arch) noexcept { return wasm_v128_store((v128_t*)mem, self); } template - inline void store_aligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_aligned(double* mem, batch const& self, requires_arch) noexcept { return wasm_v128_store(mem, self); } @@ -1420,23 +1420,23 @@ namespace xsimd { // complex_low template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { return wasm_i32x4_shuffle(self.real(), self.imag(), 0, 4, 1, 5); } // complex_high template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { return wasm_i32x4_shuffle(self.real(), self.imag(), 2, 6, 3, 7); } template - inline batch complex_low(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept { return wasm_i64x2_shuffle(self.real(), self.imag(), 0, 2); } template - inline batch complex_high(batch, A> const& self, requires_arch) noexcept + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept { return wasm_i64x2_shuffle(self.real(), self.imag(), 1, 3); } @@ -1444,34 +1444,34 @@ namespace xsimd // store_unaligned template - inline void store_unaligned(float* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(float* mem, batch const& self, requires_arch) noexcept { return wasm_v128_store(mem, self); } template ::value, void>::type> - inline void store_unaligned(T* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch const& self, requires_arch) noexcept { return wasm_v128_store((v128_t*)mem, self); } template ::value, void>::type> - inline void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch_bool const& self, requires_arch) noexcept { return wasm_v128_store((v128_t*)mem, self); } template - inline void store_unaligned(double* mem, batch const& self, requires_arch) noexcept + XSIMD_INLINE void store_unaligned(double* mem, batch const& self, requires_arch) noexcept { return wasm_v128_store(mem, self); } // sub template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f32x4_sub(self, other); } template ::value, void>::type> - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1496,106 +1496,106 @@ namespace xsimd } } template - inline batch sub(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept { return wasm_f64x2_sub(self, other); } // sqrt template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return wasm_f32x4_sqrt(val); } template - inline batch sqrt(batch const& val, requires_arch) noexcept + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept { return wasm_f64x2_sqrt(val); } // swizzle template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { return wasm_i64x2_shuffle(self, self, V0, V1); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { return wasm_i64x2_shuffle(self, self, V0, V1); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, wasm {})); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, wasm {})); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { return wasm_i16x8_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, wasm {})); } template - inline batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept { return wasm_i8x16_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15); } template - inline batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept { return bitwise_cast(swizzle(bitwise_cast(self), mask, wasm {})); } // trunc template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return wasm_f32x4_trunc(self); } template - inline batch trunc(batch const& self, requires_arch) noexcept + XSIMD_INLINE batch trunc(batch const& self, requires_arch) noexcept { return wasm_f64x2_trunc(self); } // zip_hi template - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { return wasm_i32x4_shuffle(self, other, 2, 6, 3, 7); } template ::value, void>::type> - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1620,19 +1620,19 @@ namespace xsimd } } template - inline batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept { return wasm_i64x2_shuffle(self, other, 1, 3); } // zip_lo template - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { return wasm_i32x4_shuffle(self, other, 0, 4, 1, 5); } template ::value, void>::type> - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { XSIMD_IF_CONSTEXPR(sizeof(T) == 1) { @@ -1657,7 +1657,7 @@ namespace xsimd } } template - inline batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept { return wasm_i64x2_shuffle(self, other, 0, 2); } diff --git a/include/xsimd/config/xsimd_arch.hpp b/include/xsimd/config/xsimd_arch.hpp index ac51daca7..39d0d581d 100644 --- a/include/xsimd/config/xsimd_arch.hpp +++ b/include/xsimd/config/xsimd_arch.hpp @@ -57,13 +57,13 @@ namespace xsimd }; template - inline constexpr T max_of(T value) noexcept + XSIMD_INLINE constexpr T max_of(T value) noexcept { return value; } template - inline constexpr T max_of(T head0, T head1, Ts... tail) noexcept + XSIMD_INLINE constexpr T max_of(T head0, T head1, Ts... tail) noexcept { return max_of((head0 > head1 ? head0 : head1), tail...); } @@ -104,7 +104,7 @@ namespace xsimd } template - static inline void for_each(F&& f) noexcept + static XSIMD_INLINE void for_each(F&& f) noexcept { (void)std::initializer_list { (f(Archs {}), true)... }; } @@ -196,14 +196,14 @@ namespace xsimd F functor; template - inline auto walk_archs(arch_list, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward(args)...)) + XSIMD_INLINE auto walk_archs(arch_list, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward(args)...)) { assert(Arch::available() && "At least one arch must be supported during dispatch"); return functor(Arch {}, std::forward(args)...); } template - inline auto walk_archs(arch_list, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward(args)...)) + XSIMD_INLINE auto walk_archs(arch_list, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward(args)...)) { if (availables_archs.has(Arch {})) return functor(Arch {}, std::forward(args)...); @@ -212,14 +212,14 @@ namespace xsimd } public: - inline dispatcher(F f) noexcept + XSIMD_INLINE dispatcher(F f) noexcept : availables_archs(available_architectures()) , functor(f) { } template - inline auto operator()(Tys&&... args) noexcept -> decltype(functor(default_arch {}, std::forward(args)...)) + XSIMD_INLINE auto operator()(Tys&&... args) noexcept -> decltype(functor(default_arch {}, std::forward(args)...)) { return walk_archs(ArchList {}, std::forward(args)...); } @@ -228,7 +228,7 @@ namespace xsimd // Generic function dispatch, à la ifunc template - inline detail::dispatcher dispatch(F&& f) noexcept + XSIMD_INLINE detail::dispatcher dispatch(F&& f) noexcept { return { std::forward(f) }; } diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 89b883a39..f22089bac 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -41,7 +41,7 @@ namespace xsimd #define ARCH_FIELD_EX(arch, field_name) \ unsigned field_name; \ - inline bool has(::xsimd::arch) const { return this->field_name; } + XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name) ARCH_FIELD(sse2) @@ -78,7 +78,7 @@ namespace xsimd #undef ARCH_FIELD - inline supported_arch() noexcept + XSIMD_INLINE supported_arch() noexcept { memset(this, 0, sizeof(supported_arch)); @@ -191,7 +191,7 @@ namespace xsimd }; } // namespace detail - inline detail::supported_arch available_architectures() noexcept + XSIMD_INLINE detail::supported_arch available_architectures() noexcept { static detail::supported_arch supported; return supported; diff --git a/include/xsimd/config/xsimd_inline.hpp b/include/xsimd/config/xsimd_inline.hpp new file mode 100644 index 000000000..88e9cbcd0 --- /dev/null +++ b/include/xsimd/config/xsimd_inline.hpp @@ -0,0 +1,23 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_INLINE_HPP +#define XSIMD_INLINE_HPP + +#if defined(__GNUC__) +#define XSIMD_INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define XSIMD_INLINE inline __forceinline +#else +#define XSIMD_INLINE inline +#endif + +#endif diff --git a/include/xsimd/math/xsimd_rem_pio2.hpp b/include/xsimd/math/xsimd_rem_pio2.hpp index 05371ee52..eb232c568 100644 --- a/include/xsimd/math/xsimd_rem_pio2.hpp +++ b/include/xsimd/math/xsimd_rem_pio2.hpp @@ -217,7 +217,7 @@ namespace xsimd * */ - inline int32_t __kernel_rem_pio2(double* x, double* y, int32_t e0, int32_t nx, int32_t prec, const int32_t* ipio2) noexcept + XSIMD_INLINE int32_t __kernel_rem_pio2(double* x, double* y, int32_t e0, int32_t nx, int32_t prec, const int32_t* ipio2) noexcept { static const int32_t init_jk[] = { 2, 3, 4, 6 }; /* initial value for jk */ @@ -450,7 +450,7 @@ namespace xsimd return n & 7; } - inline std::int32_t __ieee754_rem_pio2(double x, double* y) noexcept + XSIMD_INLINE std::int32_t __ieee754_rem_pio2(double x, double* y) noexcept { static const std::int32_t two_over_pi[] = { 0xA2F983, diff --git a/include/xsimd/memory/xsimd_aligned_allocator.hpp b/include/xsimd/memory/xsimd_aligned_allocator.hpp index 3918d68a7..51779f31c 100644 --- a/include/xsimd/memory/xsimd_aligned_allocator.hpp +++ b/include/xsimd/memory/xsimd_aligned_allocator.hpp @@ -59,43 +59,43 @@ namespace xsimd using other = aligned_allocator; }; - inline aligned_allocator() noexcept; - inline aligned_allocator(const aligned_allocator& rhs) noexcept; + XSIMD_INLINE aligned_allocator() noexcept; + XSIMD_INLINE aligned_allocator(const aligned_allocator& rhs) noexcept; template - inline aligned_allocator(const aligned_allocator& rhs) noexcept; + XSIMD_INLINE aligned_allocator(const aligned_allocator& rhs) noexcept; - inline ~aligned_allocator(); + XSIMD_INLINE ~aligned_allocator(); - inline pointer address(reference) noexcept; - inline const_pointer address(const_reference) const noexcept; + XSIMD_INLINE pointer address(reference) noexcept; + XSIMD_INLINE const_pointer address(const_reference) const noexcept; - inline pointer allocate(size_type n, const void* hint = 0); - inline void deallocate(pointer p, size_type n); + XSIMD_INLINE pointer allocate(size_type n, const void* hint = 0); + XSIMD_INLINE void deallocate(pointer p, size_type n); - inline size_type max_size() const noexcept; - inline size_type size_max() const noexcept; + XSIMD_INLINE size_type max_size() const noexcept; + XSIMD_INLINE size_type size_max() const noexcept; template - inline void construct(U* p, Args&&... args); + XSIMD_INLINE void construct(U* p, Args&&... args); template - inline void destroy(U* p); + XSIMD_INLINE void destroy(U* p); }; template - inline bool operator==(const aligned_allocator& lhs, - const aligned_allocator& rhs) noexcept; + XSIMD_INLINE bool operator==(const aligned_allocator& lhs, + const aligned_allocator& rhs) noexcept; template - inline bool operator!=(const aligned_allocator& lhs, - const aligned_allocator& rhs) noexcept; + XSIMD_INLINE bool operator!=(const aligned_allocator& lhs, + const aligned_allocator& rhs) noexcept; - inline void* aligned_malloc(size_t size, size_t alignment); - inline void aligned_free(void* ptr); + XSIMD_INLINE void* aligned_malloc(size_t size, size_t alignment); + XSIMD_INLINE void aligned_free(void* ptr); template - inline size_t get_alignment_offset(const T* p, size_t size, size_t block_size); + XSIMD_INLINE size_t get_alignment_offset(const T* p, size_t size, size_t block_size); /************************************ * aligned_allocator implementation * @@ -105,7 +105,7 @@ namespace xsimd * Default constructor. */ template - inline aligned_allocator::aligned_allocator() noexcept + XSIMD_INLINE aligned_allocator::aligned_allocator() noexcept { } @@ -113,7 +113,7 @@ namespace xsimd * Copy constructor. */ template - inline aligned_allocator::aligned_allocator(const aligned_allocator&) noexcept + XSIMD_INLINE aligned_allocator::aligned_allocator(const aligned_allocator&) noexcept { } @@ -122,7 +122,7 @@ namespace xsimd */ template template - inline aligned_allocator::aligned_allocator(const aligned_allocator&) noexcept + XSIMD_INLINE aligned_allocator::aligned_allocator(const aligned_allocator&) noexcept { } @@ -130,7 +130,7 @@ namespace xsimd * Destructor. */ template - inline aligned_allocator::~aligned_allocator() + XSIMD_INLINE aligned_allocator::~aligned_allocator() { } @@ -140,7 +140,7 @@ namespace xsimd * @return the actual address of \c r. */ template - inline auto + XSIMD_INLINE auto aligned_allocator::address(reference r) noexcept -> pointer { return &r; @@ -152,7 +152,7 @@ namespace xsimd * @return the actual address of \c r. */ template - inline auto + XSIMD_INLINE auto aligned_allocator::address(const_reference r) const noexcept -> const_pointer { return &r; @@ -167,7 +167,7 @@ namespace xsimd * hold an array of \c n objects of type \c T. */ template - inline auto + XSIMD_INLINE auto aligned_allocator::allocate(size_type n, const void*) -> pointer { pointer res = reinterpret_cast(aligned_malloc(sizeof(T) * n, A)); @@ -186,7 +186,7 @@ namespace xsimd * @param n number of objects earlier passed to allocate(). */ template - inline void aligned_allocator::deallocate(pointer p, size_type) + XSIMD_INLINE void aligned_allocator::deallocate(pointer p, size_type) { aligned_free(p); } @@ -197,7 +197,7 @@ namespace xsimd * @return the maximum supported allocated size. */ template - inline auto + XSIMD_INLINE auto aligned_allocator::max_size() const noexcept -> size_type { return size_type(-1) / sizeof(T); @@ -207,7 +207,7 @@ namespace xsimd * This method is deprecated, use max_size() instead */ template - inline auto + XSIMD_INLINE auto aligned_allocator::size_max() const noexcept -> size_type { return size_type(-1) / sizeof(T); @@ -221,7 +221,7 @@ namespace xsimd */ template template - inline void aligned_allocator::construct(U* p, Args&&... args) + XSIMD_INLINE void aligned_allocator::construct(U* p, Args&&... args) { new ((void*)p) U(std::forward(args)...); } @@ -232,7 +232,7 @@ namespace xsimd */ template template - inline void aligned_allocator::destroy(U* p) + XSIMD_INLINE void aligned_allocator::destroy(U* p) { p->~U(); } @@ -250,8 +250,8 @@ namespace xsimd * @return true if the allocators have the same alignment. */ template - inline bool operator==(const aligned_allocator& lhs, - const aligned_allocator& rhs) noexcept + XSIMD_INLINE bool operator==(const aligned_allocator& lhs, + const aligned_allocator& rhs) noexcept { return lhs.alignment == rhs.alignment; } @@ -265,8 +265,8 @@ namespace xsimd * @return true if the allocators have different alignments. */ template - inline bool operator!=(const aligned_allocator& lhs, - const aligned_allocator& rhs) noexcept + XSIMD_INLINE bool operator!=(const aligned_allocator& lhs, + const aligned_allocator& rhs) noexcept { return !(lhs == rhs); } @@ -277,7 +277,7 @@ namespace xsimd namespace detail { - inline void* xaligned_malloc(size_t size, size_t alignment) + XSIMD_INLINE void* xaligned_malloc(size_t size, size_t alignment) { assert(((alignment & (alignment - 1)) == 0) && "alignment must be a power of two"); assert((alignment >= sizeof(void*)) && "alignment must be at least the size of a pointer"); @@ -293,7 +293,7 @@ namespace xsimd return res; } - inline void xaligned_free(void* ptr) + XSIMD_INLINE void xaligned_free(void* ptr) { #ifdef _WIN32 _aligned_free(ptr); @@ -303,18 +303,18 @@ namespace xsimd } } - inline void* aligned_malloc(size_t size, size_t alignment) + XSIMD_INLINE void* aligned_malloc(size_t size, size_t alignment) { return detail::xaligned_malloc(size, alignment); } - inline void aligned_free(void* ptr) + XSIMD_INLINE void aligned_free(void* ptr) { detail::xaligned_free(ptr); } template - inline size_t get_alignment_offset(const T* p, size_t size, size_t block_size) + XSIMD_INLINE size_t get_alignment_offset(const T* p, size_t size, size_t block_size) { // size_t block_size = simd_traits::size; if (block_size == 1) diff --git a/include/xsimd/memory/xsimd_alignment.hpp b/include/xsimd/memory/xsimd_alignment.hpp index 2b3b35088..2d59ac1fc 100644 --- a/include/xsimd/memory/xsimd_alignment.hpp +++ b/include/xsimd/memory/xsimd_alignment.hpp @@ -81,7 +81,7 @@ namespace xsimd * @return true if the alignment requirements are met */ template - inline bool is_aligned(void const* ptr) + XSIMD_INLINE bool is_aligned(void const* ptr) { return (reinterpret_cast(ptr) % static_cast(Arch::alignment())) == 0; } diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 138c9642d..79be4d88d 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -53,7 +53,7 @@ namespace xsimd * @return the absolute values of \c x. */ template - inline batch abs(batch const& x) noexcept + XSIMD_INLINE batch abs(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::abs(x, A {}); @@ -67,7 +67,7 @@ namespace xsimd * @return the absolute values of \c z. */ template - inline batch abs(batch, A> const& z) noexcept + XSIMD_INLINE batch abs(batch, A> const& z) noexcept { detail::static_check_supported_config(); return kernel::abs(z, A {}); @@ -82,7 +82,7 @@ namespace xsimd * @return the sum of \c x and \c y */ template - inline auto add(batch const& x, batch const& y) noexcept -> decltype(x + y) + XSIMD_INLINE auto add(batch const& x, batch const& y) noexcept -> decltype(x + y) { detail::static_check_supported_config(); return x + y; @@ -96,7 +96,7 @@ namespace xsimd * @return the arc cosine of \c x. */ template - inline batch acos(batch const& x) noexcept + XSIMD_INLINE batch acos(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::acos(x, A {}); @@ -110,7 +110,7 @@ namespace xsimd * @return the inverse hyperbolic cosine of \c x. */ template - inline batch acosh(batch const& x) noexcept + XSIMD_INLINE batch acosh(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::acosh(x, A {}); @@ -124,7 +124,7 @@ namespace xsimd * @return the argument of \c z. */ template - inline real_batch_type_t> arg(batch const& z) noexcept + XSIMD_INLINE real_batch_type_t> arg(batch const& z) noexcept { detail::static_check_supported_config(); return kernel::arg(z, A {}); @@ -138,7 +138,7 @@ namespace xsimd * @return the arc sine of \c x. */ template - inline batch asin(batch const& x) noexcept + XSIMD_INLINE batch asin(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::asin(x, A {}); @@ -152,7 +152,7 @@ namespace xsimd * @return the inverse hyperbolic sine of \c x. */ template - inline batch asinh(batch const& x) noexcept + XSIMD_INLINE batch asinh(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::asinh(x, A {}); @@ -166,7 +166,7 @@ namespace xsimd * @return the arc tangent of \c x. */ template - inline batch atan(batch const& x) noexcept + XSIMD_INLINE batch atan(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::atan(x, A {}); @@ -182,7 +182,7 @@ namespace xsimd * @return the arc tangent of \c x/y. */ template - inline batch atan2(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch atan2(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::atan2(x, y, A {}); @@ -196,7 +196,7 @@ namespace xsimd * @return the inverse hyperbolic tangent of \c x. */ template - inline batch atanh(batch const& x) noexcept + XSIMD_INLINE batch atanh(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::atanh(x, A {}); @@ -211,7 +211,7 @@ namespace xsimd * @return the average of elements between \c x and \c y. */ template - inline batch avg(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch avg(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::avg(x, y, A {}); @@ -226,7 +226,7 @@ namespace xsimd * @return the rounded average of elements between \c x and \c y. */ template - inline batch avgr(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch avgr(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::avgr(x, y, A {}); @@ -240,7 +240,7 @@ namespace xsimd * @return \c x cast to \c T_out */ template - inline batch_bool batch_bool_cast(batch_bool const& x) noexcept + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& x) noexcept { detail::static_check_supported_config(); detail::static_check_supported_config(); @@ -256,7 +256,7 @@ namespace xsimd * @return \c x cast to \c T_out */ template - inline batch batch_cast(batch const& x) noexcept + XSIMD_INLINE batch batch_cast(batch const& x) noexcept { detail::static_check_supported_config(); detail::static_check_supported_config(); @@ -271,7 +271,7 @@ namespace xsimd * @return bit of sign of \c x */ template - inline batch bitofsign(batch const& x) noexcept + XSIMD_INLINE batch bitofsign(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::bitofsign(x, A {}); @@ -286,7 +286,7 @@ namespace xsimd * @return the result of the bitwise and. */ template - inline auto bitwise_and(batch const& x, batch const& y) noexcept -> decltype(x & y) + XSIMD_INLINE auto bitwise_and(batch const& x, batch const& y) noexcept -> decltype(x & y) { detail::static_check_supported_config(); return x & y; @@ -301,7 +301,7 @@ namespace xsimd * @return the result of the bitwise and. */ template - inline auto bitwise_and(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x & y) + XSIMD_INLINE auto bitwise_and(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x & y) { detail::static_check_supported_config(); return x & y; @@ -316,7 +316,7 @@ namespace xsimd * @return the result of the bitwise and not. */ template - inline batch bitwise_andnot(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch bitwise_andnot(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::bitwise_andnot(x, y, A {}); @@ -331,7 +331,7 @@ namespace xsimd * @return the result of the bitwise and not. */ template - inline batch_bool bitwise_andnot(batch_bool const& x, batch_bool const& y) noexcept + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& x, batch_bool const& y) noexcept { detail::static_check_supported_config(); return kernel::bitwise_andnot(x, y, A {}); @@ -345,7 +345,7 @@ namespace xsimd * @return \c x reinterpreted as \c T_out */ template - inline batch bitwise_cast(batch const& x) noexcept + XSIMD_INLINE batch bitwise_cast(batch const& x) noexcept { detail::static_check_supported_config(); detail::static_check_supported_config(); @@ -361,13 +361,13 @@ namespace xsimd * @return shifted \c x. */ template - inline batch bitwise_lshift(batch const& x, int shift) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& x, int shift) noexcept { detail::static_check_supported_config(); return kernel::bitwise_lshift(x, shift, A {}); } template - inline batch bitwise_lshift(batch const& x, batch const& shift) noexcept + XSIMD_INLINE batch bitwise_lshift(batch const& x, batch const& shift) noexcept { detail::static_check_supported_config(); return kernel::bitwise_lshift(x, shift, A {}); @@ -381,7 +381,7 @@ namespace xsimd * @return the result of the bitwise not. */ template - inline batch bitwise_not(batch const& x) noexcept + XSIMD_INLINE batch bitwise_not(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::bitwise_not(x, A {}); @@ -395,7 +395,7 @@ namespace xsimd * @return the result of the bitwise not. */ template - inline batch_bool bitwise_not(batch_bool const& x) noexcept + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& x) noexcept { detail::static_check_supported_config(); return kernel::bitwise_not(x, A {}); @@ -410,7 +410,7 @@ namespace xsimd * @return the result of the bitwise or. */ template - inline auto bitwise_or(batch const& x, batch const& y) noexcept -> decltype(x | y) + XSIMD_INLINE auto bitwise_or(batch const& x, batch const& y) noexcept -> decltype(x | y) { detail::static_check_supported_config(); return x | y; @@ -425,7 +425,7 @@ namespace xsimd * @return the result of the bitwise or. */ template - inline auto bitwise_or(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x | y) + XSIMD_INLINE auto bitwise_or(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x | y) { detail::static_check_supported_config(); return x | y; @@ -440,13 +440,13 @@ namespace xsimd * @return shifted \c x. */ template - inline batch bitwise_rshift(batch const& x, int shift) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& x, int shift) noexcept { detail::static_check_supported_config(); return kernel::bitwise_rshift(x, shift, A {}); } template - inline batch bitwise_rshift(batch const& x, batch const& shift) noexcept + XSIMD_INLINE batch bitwise_rshift(batch const& x, batch const& shift) noexcept { detail::static_check_supported_config(); return kernel::bitwise_rshift(x, shift, A {}); @@ -461,7 +461,7 @@ namespace xsimd * @return the result of the bitwise xor. */ template - inline auto bitwise_xor(batch const& x, batch const& y) noexcept -> decltype(x ^ y) + XSIMD_INLINE auto bitwise_xor(batch const& x, batch const& y) noexcept -> decltype(x ^ y) { detail::static_check_supported_config(); return x ^ y; @@ -476,7 +476,7 @@ namespace xsimd * @return the result of the bitwise xor. */ template - inline auto bitwise_xor(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x ^ y) + XSIMD_INLINE auto bitwise_xor(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x ^ y) { detail::static_check_supported_config(); return x ^ y; @@ -490,7 +490,7 @@ namespace xsimd * @return a new batch instance */ template - inline batch broadcast(T v) noexcept + XSIMD_INLINE batch broadcast(T v) noexcept { detail::static_check_supported_config(); return batch::broadcast(v); @@ -505,7 +505,7 @@ namespace xsimd * @return a new batch instance */ template - inline simd_return_type broadcast_as(From v) noexcept + XSIMD_INLINE simd_return_type broadcast_as(From v) noexcept { detail::static_check_supported_config(); using batch_value_type = typename simd_return_type::value_type; @@ -523,7 +523,7 @@ namespace xsimd * @return the cubic root of \c x. */ template - inline batch cbrt(batch const& x) noexcept + XSIMD_INLINE batch cbrt(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::cbrt(x, A {}); @@ -538,7 +538,7 @@ namespace xsimd * @return the batch of smallest integer values not less than \c x. */ template - inline batch ceil(batch const& x) noexcept + XSIMD_INLINE batch ceil(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::ceil(x, A {}); @@ -554,7 +554,7 @@ namespace xsimd * @return the result of the clipping. */ template - inline batch clip(batch const& x, batch const& lo, batch const& hi) noexcept + XSIMD_INLINE batch clip(batch const& x, batch const& lo, batch const& hi) noexcept { detail::static_check_supported_config(); return kernel::clip(x, lo, hi, A {}); @@ -567,7 +567,7 @@ namespace xsimd * resulting vector, zeroing the remaining slots */ template - inline batch compress(batch const& x, batch_bool const& mask) noexcept + XSIMD_INLINE batch compress(batch const& x, batch_bool const& mask) noexcept { detail::static_check_supported_config(); return kernel::compress(x, mask, A {}); @@ -581,7 +581,7 @@ namespace xsimd * @return the argument of \c z. */ template - inline complex_batch_type_t> conj(batch const& z) noexcept + XSIMD_INLINE complex_batch_type_t> conj(batch const& z) noexcept { return kernel::conj(z, A {}); } @@ -597,7 +597,7 @@ namespace xsimd * matches that of \c y. */ template - inline batch copysign(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch copysign(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::copysign(x, y, A {}); @@ -611,7 +611,7 @@ namespace xsimd * @return the cosine of \c x. */ template - inline batch cos(batch const& x) noexcept + XSIMD_INLINE batch cos(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::cos(x, A {}); @@ -625,7 +625,7 @@ namespace xsimd * @return the hyperbolic cosine of \c x. */ template - inline batch cosh(batch const& x) noexcept + XSIMD_INLINE batch cosh(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::cosh(x, A {}); @@ -639,7 +639,7 @@ namespace xsimd * @return the subtraction of \c x and 1. */ template - inline batch decr(batch const& x) noexcept + XSIMD_INLINE batch decr(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::decr(x, A {}); @@ -655,7 +655,7 @@ namespace xsimd * @return the subtraction of \c x and 1 when \c mask is true. */ template - inline batch decr_if(batch const& x, Mask const& mask) noexcept + XSIMD_INLINE batch decr_if(batch const& x, Mask const& mask) noexcept { detail::static_check_supported_config(); return kernel::decr_if(x, mask, A {}); @@ -670,7 +670,7 @@ namespace xsimd * @return the result of the division. */ template - inline auto div(batch const& x, batch const& y) noexcept -> decltype(x / y) + XSIMD_INLINE auto div(batch const& x, batch const& y) noexcept -> decltype(x / y) { detail::static_check_supported_config(); return x / y; @@ -685,7 +685,7 @@ namespace xsimd * @return a boolean batch. */ template - inline auto eq(batch const& x, batch const& y) noexcept -> decltype(x == y) + XSIMD_INLINE auto eq(batch const& x, batch const& y) noexcept -> decltype(x == y) { detail::static_check_supported_config(); return x == y; @@ -700,7 +700,7 @@ namespace xsimd * @return a boolean batch. */ template - inline auto eq(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x == y) + XSIMD_INLINE auto eq(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x == y) { detail::static_check_supported_config(); return x == y; @@ -714,7 +714,7 @@ namespace xsimd * @return the natural exponential of \c x. */ template - inline batch exp(batch const& x) noexcept + XSIMD_INLINE batch exp(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::exp(x, A {}); @@ -728,7 +728,7 @@ namespace xsimd * @return the base 10 exponential of \c x. */ template - inline batch exp10(batch const& x) noexcept + XSIMD_INLINE batch exp10(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::exp10(x, A {}); @@ -742,7 +742,7 @@ namespace xsimd * @return the base 2 exponential of \c x. */ template - inline batch exp2(batch const& x) noexcept + XSIMD_INLINE batch exp2(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::exp2(x, A {}); @@ -755,7 +755,7 @@ namespace xsimd * mask, zeroing the other slots */ template - inline batch expand(batch const& x, batch_bool const& mask) noexcept + XSIMD_INLINE batch expand(batch const& x, batch_bool const& mask) noexcept { detail::static_check_supported_config(); return kernel::expand(x, mask, A {}); @@ -769,7 +769,7 @@ namespace xsimd * @return the natural exponential of \c x, minus one. */ template - inline batch expm1(batch const& x) noexcept + XSIMD_INLINE batch expm1(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::expm1(x, A {}); @@ -783,7 +783,7 @@ namespace xsimd * @return the error function of \c x. */ template - inline batch erf(batch const& x) noexcept + XSIMD_INLINE batch erf(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::erf(x, A {}); @@ -797,7 +797,7 @@ namespace xsimd * @return the error function of \c x. */ template - inline batch erfc(batch const& x) noexcept + XSIMD_INLINE batch erfc(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::erfc(x, A {}); @@ -814,7 +814,7 @@ namespace xsimd * @return. */ template - inline batch extract_pair(batch const& x, batch const& y, std::size_t i) noexcept + XSIMD_INLINE batch extract_pair(batch const& x, batch const& y, std::size_t i) noexcept { detail::static_check_supported_config(); return kernel::extract_pair(x, y, i, A {}); @@ -828,7 +828,7 @@ namespace xsimd * @return the absolute values of \c x. */ template - inline batch fabs(batch const& x) noexcept + XSIMD_INLINE batch fabs(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::abs(x, A {}); @@ -844,7 +844,7 @@ namespace xsimd * @return the positive difference. */ template - inline batch fdim(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch fdim(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::fdim(x, y, A {}); @@ -859,7 +859,7 @@ namespace xsimd * @return the batch of largest integer values not greater than \c x. */ template - inline batch floor(batch const& x) noexcept + XSIMD_INLINE batch floor(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::floor(x, A {}); @@ -875,7 +875,7 @@ namespace xsimd * @return the result of the fused multiply-add operation. */ template - inline batch fma(batch const& x, batch const& y, batch const& z) noexcept + XSIMD_INLINE batch fma(batch const& x, batch const& y, batch const& z) noexcept { detail::static_check_supported_config(); return kernel::fma(x, y, z, A {}); @@ -890,7 +890,7 @@ namespace xsimd * @return a batch of the larger values. */ template - inline batch fmax(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch fmax(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::max(x, y, A {}); @@ -905,7 +905,7 @@ namespace xsimd * @return a batch of the smaller values. */ template - inline batch fmin(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch fmin(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::min(x, y, A {}); @@ -920,7 +920,7 @@ namespace xsimd * @return the result of the modulo. */ template - inline batch fmod(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch fmod(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::fmod(x, y, A {}); @@ -936,7 +936,7 @@ namespace xsimd * @return the result of the fused multiply-sub operation. */ template - inline batch fms(batch const& x, batch const& y, batch const& z) noexcept + XSIMD_INLINE batch fms(batch const& x, batch const& y, batch const& z) noexcept { detail::static_check_supported_config(); return kernel::fms(x, y, z, A {}); @@ -952,7 +952,7 @@ namespace xsimd * @return the result of the fused negated multiply-add operation. */ template - inline batch fnma(batch const& x, batch const& y, batch const& z) noexcept + XSIMD_INLINE batch fnma(batch const& x, batch const& y, batch const& z) noexcept { detail::static_check_supported_config(); return kernel::fnma(x, y, z, A {}); @@ -968,7 +968,7 @@ namespace xsimd * @return the result of the fused negated multiply-sub operation. */ template - inline batch fnms(batch const& x, batch const& y, batch const& z) noexcept + XSIMD_INLINE batch fnms(batch const& x, batch const& y, batch const& z) noexcept { detail::static_check_supported_config(); return kernel::fnms(x, y, z, A {}); @@ -983,7 +983,7 @@ namespace xsimd * @return the normalized fraction of x */ template - inline batch frexp(const batch& x, batch, A>& y) noexcept + XSIMD_INLINE batch frexp(const batch& x, batch, A>& y) noexcept { detail::static_check_supported_config(); return kernel::frexp(x, y, A {}); @@ -999,7 +999,7 @@ namespace xsimd * @return a boolean batch. */ template - inline batch_bool ge(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch_bool ge(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return x >= y; @@ -1015,7 +1015,7 @@ namespace xsimd * @return a boolean batch. */ template - inline batch_bool gt(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch_bool gt(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return x > y; @@ -1031,7 +1031,7 @@ namespace xsimd * @return the result of the reduction. */ template - inline batch haddp(batch const* row) noexcept + XSIMD_INLINE batch haddp(batch const* row) noexcept { detail::static_check_supported_config(); return kernel::haddp(row, A {}); @@ -1047,7 +1047,7 @@ namespace xsimd * @return the square root of the sum of the squares of \c x and \c y. */ template - inline batch hypot(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch hypot(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::hypot(x, y, A {}); @@ -1061,7 +1061,7 @@ namespace xsimd * @return the argument of \c x. */ template - inline real_batch_type_t> imag(batch const& x) noexcept + XSIMD_INLINE real_batch_type_t> imag(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::imag(x, A {}); @@ -1075,7 +1075,7 @@ namespace xsimd * @return the sum of \c x and 1. */ template - inline batch incr(batch const& x) noexcept + XSIMD_INLINE batch incr(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::incr(x, A {}); @@ -1091,7 +1091,7 @@ namespace xsimd * @return the sum of \c x and 1 when \c mask is true. */ template - inline batch incr_if(batch const& x, Mask const& mask) noexcept + XSIMD_INLINE batch incr_if(batch const& x, Mask const& mask) noexcept { detail::static_check_supported_config(); return kernel::incr_if(x, mask, A {}); @@ -1104,7 +1104,7 @@ namespace xsimd * @return a batch of positive infinity */ template - inline B infinity() + XSIMD_INLINE B infinity() { using T = typename B::value_type; using A = typename B::arch_type; @@ -1122,7 +1122,7 @@ namespace xsimd * @return copy of \c x with position \c pos set to \c val */ template - inline batch insert(batch const& x, T val, index pos) noexcept + XSIMD_INLINE batch insert(batch const& x, T val, index pos) noexcept { detail::static_check_supported_config(); return kernel::insert(x, val, pos, A {}); @@ -1136,7 +1136,7 @@ namespace xsimd * @return a batch of booleans. */ template - inline batch_bool is_even(batch const& x) noexcept + XSIMD_INLINE batch_bool is_even(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::is_even(x, A {}); @@ -1150,7 +1150,7 @@ namespace xsimd * @return a batch of booleans. */ template - inline batch_bool is_flint(batch const& x) noexcept + XSIMD_INLINE batch_bool is_flint(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::is_flint(x, A {}); @@ -1164,7 +1164,7 @@ namespace xsimd * @return a batch of booleans. */ template - inline batch_bool is_odd(batch const& x) noexcept + XSIMD_INLINE batch_bool is_odd(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::is_odd(x, A {}); @@ -1178,7 +1178,7 @@ namespace xsimd * @return a batch of booleans. */ template - inline typename batch::batch_bool_type isinf(batch const& x) noexcept + XSIMD_INLINE typename batch::batch_bool_type isinf(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::isinf(x, A {}); @@ -1192,7 +1192,7 @@ namespace xsimd * @return a batch of booleans. */ template - inline typename batch::batch_bool_type isfinite(batch const& x) noexcept + XSIMD_INLINE typename batch::batch_bool_type isfinite(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::isfinite(x, A {}); @@ -1206,7 +1206,7 @@ namespace xsimd * @return a batch of booleans. */ template - inline typename batch::batch_bool_type isnan(batch const& x) noexcept + XSIMD_INLINE typename batch::batch_bool_type isnan(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::isnan(x, A {}); @@ -1221,7 +1221,7 @@ namespace xsimd * @return a batch of floating point values. */ template - inline batch ldexp(const batch& x, const batch, A>& y) noexcept + XSIMD_INLINE batch ldexp(const batch& x, const batch, A>& y) noexcept { detail::static_check_supported_config(); return kernel::ldexp(x, y, A {}); @@ -1236,7 +1236,7 @@ namespace xsimd * @return a boolean batch. */ template - inline batch_bool le(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch_bool le(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return x <= y; @@ -1250,7 +1250,7 @@ namespace xsimd * @return the natural logarithm of the gamma function of \c x. */ template - inline batch lgamma(batch const& x) noexcept + XSIMD_INLINE batch lgamma(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::lgamma(x, A {}); @@ -1265,7 +1265,7 @@ namespace xsimd * @return a new batch instance */ template - inline simd_return_type load_as(From const* ptr, aligned_mode) noexcept + XSIMD_INLINE simd_return_type load_as(From const* ptr, aligned_mode) noexcept { using batch_value_type = typename simd_return_type::value_type; detail::static_check_supported_config(); @@ -1274,14 +1274,14 @@ namespace xsimd } template - inline simd_return_type load_as(bool const* ptr, aligned_mode) noexcept + XSIMD_INLINE simd_return_type load_as(bool const* ptr, aligned_mode) noexcept { detail::static_check_supported_config(); return simd_return_type::load_aligned(ptr); } template - inline simd_return_type, To, A> load_as(std::complex const* ptr, aligned_mode) noexcept + XSIMD_INLINE simd_return_type, To, A> load_as(std::complex const* ptr, aligned_mode) noexcept { detail::static_check_supported_config(); using batch_value_type = typename simd_return_type, To, A>::value_type; @@ -1290,7 +1290,7 @@ namespace xsimd #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline simd_return_type, To, A> load_as(xtl::xcomplex const* ptr, aligned_mode) noexcept + XSIMD_INLINE simd_return_type, To, A> load_as(xtl::xcomplex const* ptr, aligned_mode) noexcept { detail::static_check_supported_config(); detail::static_check_supported_config(); @@ -1307,7 +1307,7 @@ namespace xsimd * @return a new batch instance */ template - inline simd_return_type load_as(From const* ptr, unaligned_mode) noexcept + XSIMD_INLINE simd_return_type load_as(From const* ptr, unaligned_mode) noexcept { using batch_value_type = typename simd_return_type::value_type; detail::static_check_supported_config(); @@ -1316,13 +1316,13 @@ namespace xsimd } template - inline simd_return_type load_as(bool const* ptr, unaligned_mode) noexcept + XSIMD_INLINE simd_return_type load_as(bool const* ptr, unaligned_mode) noexcept { return simd_return_type::load_unaligned(ptr); } template - inline simd_return_type, To, A> load_as(std::complex const* ptr, unaligned_mode) noexcept + XSIMD_INLINE simd_return_type, To, A> load_as(std::complex const* ptr, unaligned_mode) noexcept { detail::static_check_supported_config(); detail::static_check_supported_config(); @@ -1332,7 +1332,7 @@ namespace xsimd #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline simd_return_type, To, A> load_as(xtl::xcomplex const* ptr, unaligned_mode) noexcept + XSIMD_INLINE simd_return_type, To, A> load_as(xtl::xcomplex const* ptr, unaligned_mode) noexcept { detail::static_check_supported_config(); detail::static_check_supported_config(); @@ -1349,7 +1349,7 @@ namespace xsimd * @return a new batch instance */ template - inline batch load(From const* ptr, aligned_mode = {}) noexcept + XSIMD_INLINE batch load(From const* ptr, aligned_mode = {}) noexcept { detail::static_check_supported_config(); return load_as(ptr, aligned_mode {}); @@ -1364,7 +1364,7 @@ namespace xsimd * @return a new batch instance */ template - inline batch load(From const* ptr, unaligned_mode) noexcept + XSIMD_INLINE batch load(From const* ptr, unaligned_mode) noexcept { detail::static_check_supported_config(); return load_as(ptr, unaligned_mode {}); @@ -1379,7 +1379,7 @@ namespace xsimd * @return a new batch instance */ template - inline batch load_aligned(From const* ptr) noexcept + XSIMD_INLINE batch load_aligned(From const* ptr) noexcept { detail::static_check_supported_config(); return load_as(ptr, aligned_mode {}); @@ -1394,7 +1394,7 @@ namespace xsimd * @return a new batch instance */ template - inline batch load_unaligned(From const* ptr) noexcept + XSIMD_INLINE batch load_unaligned(From const* ptr) noexcept { detail::static_check_supported_config(); return load_as(ptr, unaligned_mode {}); @@ -1408,7 +1408,7 @@ namespace xsimd * @return the natural logarithm of \c x. */ template - inline batch log(batch const& x) noexcept + XSIMD_INLINE batch log(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::log(x, A {}); @@ -1421,7 +1421,7 @@ namespace xsimd * @return the base 2 logarithm of \c x. */ template - inline batch log2(batch const& x) noexcept + XSIMD_INLINE batch log2(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::log2(x, A {}); @@ -1434,7 +1434,7 @@ namespace xsimd * @return the base 10 logarithm of \c x. */ template - inline batch log10(batch const& x) noexcept + XSIMD_INLINE batch log10(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::log10(x, A {}); @@ -1447,7 +1447,7 @@ namespace xsimd * @return the natural logarithm of one plus \c x. */ template - inline batch log1p(batch const& x) noexcept + XSIMD_INLINE batch log1p(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::log1p(x, A {}); @@ -1462,7 +1462,7 @@ namespace xsimd * @return a boolean batch. */ template - inline batch_bool lt(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch_bool lt(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return x < y; @@ -1477,7 +1477,7 @@ namespace xsimd * @return a batch of the larger values. */ template - inline batch max(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch max(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::max(x, y, A {}); @@ -1492,7 +1492,7 @@ namespace xsimd * @return a batch of the smaller values. */ template - inline batch min(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch min(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::min(x, y, A {}); @@ -1505,7 +1505,7 @@ namespace xsimd * @return a batch of positive infinity */ template - inline B minusinfinity() noexcept + XSIMD_INLINE B minusinfinity() noexcept { using T = typename B::value_type; using A = typename B::arch_type; @@ -1522,7 +1522,7 @@ namespace xsimd * @return the result of the modulo. */ template - inline auto mod(batch const& x, batch const& y) noexcept -> decltype(x % y) + XSIMD_INLINE auto mod(batch const& x, batch const& y) noexcept -> decltype(x % y) { detail::static_check_supported_config(); return x % y; @@ -1538,7 +1538,7 @@ namespace xsimd * @return the result of the product. */ template - inline auto mul(batch const& x, batch const& y) noexcept -> decltype(x * y) + XSIMD_INLINE auto mul(batch const& x, batch const& y) noexcept -> decltype(x * y) { detail::static_check_supported_config(); return x * y; @@ -1553,7 +1553,7 @@ namespace xsimd * @return the batch of nearest integer values. */ template - inline batch nearbyint(batch const& x) noexcept + XSIMD_INLINE batch nearbyint(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::nearbyint(x, A {}); @@ -1570,7 +1570,7 @@ namespace xsimd * @warning For very large values the conversion to int silently overflows. */ template - inline batch, A> + XSIMD_INLINE batch, A> nearbyint_as_int(batch const& x) noexcept { detail::static_check_supported_config(); @@ -1586,7 +1586,7 @@ namespace xsimd * @return a boolean batch. */ template - inline auto neq(batch const& x, batch const& y) noexcept -> decltype(x != y) + XSIMD_INLINE auto neq(batch const& x, batch const& y) noexcept -> decltype(x != y) { detail::static_check_supported_config(); return x != y; @@ -1601,7 +1601,7 @@ namespace xsimd * @return a boolean batch. */ template - inline auto neq(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x != y) + XSIMD_INLINE auto neq(batch_bool const& x, batch_bool const& y) noexcept -> decltype(x != y) { detail::static_check_supported_config(); return x != y; @@ -1615,7 +1615,7 @@ namespace xsimd * @return the opposite of \c x. */ template - inline batch neg(batch const& x) noexcept + XSIMD_INLINE batch neg(batch const& x) noexcept { detail::static_check_supported_config(); return -x; @@ -1631,7 +1631,7 @@ namespace xsimd * @return \c x raised to the power \c y. */ template - inline batch nextafter(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch nextafter(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::nextafter(x, y, A {}); @@ -1645,7 +1645,7 @@ namespace xsimd * @return the norm of \c x. */ template - inline real_batch_type_t> norm(batch const& x) noexcept + XSIMD_INLINE real_batch_type_t> norm(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::norm(x, A {}); @@ -1660,7 +1660,7 @@ namespace xsimd * @return \c r exp(i * \c theta). */ template - inline complex_batch_type_t> polar(batch const& r, batch const& theta = batch {}) noexcept + XSIMD_INLINE complex_batch_type_t> polar(batch const& r, batch const& theta = batch {}) noexcept { detail::static_check_supported_config(); return kernel::polar(r, theta, A {}); @@ -1674,7 +1674,7 @@ namespace xsimd * @return \c x. */ template - inline batch pos(batch const& x) noexcept + XSIMD_INLINE batch pos(batch const& x) noexcept { detail::static_check_supported_config(); return +x; @@ -1690,7 +1690,7 @@ namespace xsimd * @return \c x raised to the power \c y. */ template - inline batch pow(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch pow(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::pow(x, y, A {}); @@ -1706,7 +1706,7 @@ namespace xsimd * @return \c x raised to the power \c y. */ template ::value, void>::type> - inline batch pow(batch const& x, ITy y) noexcept + XSIMD_INLINE batch pow(batch const& x, ITy y) noexcept { detail::static_check_supported_config(); return kernel::ipow(x, y, A {}); @@ -1720,7 +1720,7 @@ namespace xsimd * @return the projection of \c z. */ template - inline complex_batch_type_t> proj(batch const& z) noexcept + XSIMD_INLINE complex_batch_type_t> proj(batch const& z) noexcept { detail::static_check_supported_config(); return kernel::proj(z, A {}); @@ -1734,7 +1734,7 @@ namespace xsimd * @return the argument of \c z. */ template - inline real_batch_type_t> real(batch const& z) noexcept + XSIMD_INLINE real_batch_type_t> real(batch const& z) noexcept { detail::static_check_supported_config(); return kernel::real(z, A {}); @@ -1750,7 +1750,7 @@ namespace xsimd * @return the reciprocal. */ template ::value, void>::type> - inline batch reciprocal(batch const& x) noexcept + XSIMD_INLINE batch reciprocal(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::reciprocal(x, A {}); @@ -1765,7 +1765,7 @@ namespace xsimd * @return the result of the reduction, as a scalar. */ template - inline T reduce(F&& f, batch const& x) noexcept + XSIMD_INLINE T reduce(F&& f, batch const& x) noexcept { detail::static_check_supported_config(); return kernel::detail::reduce(std::forward(f), x, std::integral_constant::size>()); @@ -1779,7 +1779,7 @@ namespace xsimd * @return the result of the reduction. */ template - inline T reduce_add(batch const& x) noexcept + XSIMD_INLINE T reduce_add(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::reduce_add(x, A {}); @@ -1793,7 +1793,7 @@ namespace xsimd * @return the result of the reduction. */ template - inline T reduce_max(batch const& x) noexcept + XSIMD_INLINE T reduce_max(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::reduce_max(x, A {}); @@ -1807,7 +1807,7 @@ namespace xsimd * @return the result of the reduction. */ template - inline T reduce_min(batch const& x) noexcept + XSIMD_INLINE T reduce_min(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::reduce_min(x, A {}); @@ -1822,7 +1822,7 @@ namespace xsimd * @return the result of the addition. */ template - inline batch remainder(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch remainder(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::remainder(x, y, A {}); @@ -1837,7 +1837,7 @@ namespace xsimd * @return the batch of rounded values. */ template - inline batch rint(batch const& x) noexcept + XSIMD_INLINE batch rint(batch const& x) noexcept { detail::static_check_supported_config(); return nearbyint(x); @@ -1855,7 +1855,7 @@ namespace xsimd * @return rotated batch. */ template - inline batch rotate_left(batch const& x) noexcept + XSIMD_INLINE batch rotate_left(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::rotate_left(x, A {}); @@ -1873,7 +1873,7 @@ namespace xsimd * @return rotated batch. */ template - inline batch rotate_right(batch const& x) noexcept + XSIMD_INLINE batch rotate_right(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::rotate_right(x, A {}); @@ -1889,13 +1889,13 @@ namespace xsimd * @return rotated \c x. */ template - inline batch rotl(batch const& x, int shift) noexcept + XSIMD_INLINE batch rotl(batch const& x, int shift) noexcept { detail::static_check_supported_config(); return kernel::rotl(x, shift, A {}); } template - inline batch rotl(batch const& x, batch const& shift) noexcept + XSIMD_INLINE batch rotl(batch const& x, batch const& shift) noexcept { detail::static_check_supported_config(); return kernel::rotl(x, shift, A {}); @@ -1911,13 +1911,13 @@ namespace xsimd * @return rotated \c x. */ template - inline batch rotr(batch const& x, int shift) noexcept + XSIMD_INLINE batch rotr(batch const& x, int shift) noexcept { detail::static_check_supported_config(); return kernel::rotr(x, shift, A {}); } template - inline batch rotr(batch const& x, batch const& shift) noexcept + XSIMD_INLINE batch rotr(batch const& x, batch const& shift) noexcept { detail::static_check_supported_config(); return kernel::rotr(x, shift, A {}); @@ -1933,7 +1933,7 @@ namespace xsimd * @return the batch of nearest integer values. */ template - inline batch round(batch const& x) noexcept + XSIMD_INLINE batch round(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::round(x, A {}); @@ -1951,7 +1951,7 @@ namespace xsimd * @return the inverse square root of \c x. */ template - inline batch rsqrt(batch const& x) noexcept + XSIMD_INLINE batch rsqrt(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::rsqrt(x, A {}); @@ -1968,7 +1968,7 @@ namespace xsimd * @return the result of the saturated addition. */ template - inline batch sadd(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch sadd(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::sadd(x, y, A {}); @@ -1989,7 +1989,7 @@ namespace xsimd * @return the result of the selection. */ template - inline batch select(batch_bool const& cond, batch const& true_br, batch const& false_br) noexcept + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br) noexcept { detail::static_check_supported_config(); return kernel::select(cond, true_br, false_br, A {}); @@ -2010,7 +2010,7 @@ namespace xsimd * @return the result of the selection. */ template - inline batch, A> select(batch_bool const& cond, batch, A> const& true_br, batch, A> const& false_br) noexcept + XSIMD_INLINE batch, A> select(batch_bool const& cond, batch, A> const& true_br, batch, A> const& false_br) noexcept { detail::static_check_supported_config(); return kernel::select(cond, true_br, false_br, A {}); @@ -2031,7 +2031,7 @@ namespace xsimd * @return the result of the selection. */ template - inline batch select(batch_bool_constant const& cond, batch const& true_br, batch const& false_br) noexcept + XSIMD_INLINE batch select(batch_bool_constant const& cond, batch const& true_br, batch const& false_br) noexcept { detail::static_check_supported_config(); return kernel::select(cond, true_br, false_br, A {}); @@ -2054,7 +2054,7 @@ namespace xsimd * @return combined batch */ template - inline typename std::enable_if::value, batch>::type + XSIMD_INLINE typename std::enable_if::value, batch>::type shuffle(batch const& x, batch const& y, batch_constant mask) noexcept { static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); @@ -2070,7 +2070,7 @@ namespace xsimd * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element */ template - inline batch sign(batch const& x) noexcept + XSIMD_INLINE batch sign(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::sign(x, A {}); @@ -2084,7 +2084,7 @@ namespace xsimd * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element */ template - inline batch signnz(batch const& x) noexcept + XSIMD_INLINE batch signnz(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::signnz(x, A {}); @@ -2098,7 +2098,7 @@ namespace xsimd * @return the sine of \c x. */ template - inline batch sin(batch const& x) noexcept + XSIMD_INLINE batch sin(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::sin(x, A {}); @@ -2113,7 +2113,7 @@ namespace xsimd * @return a pair containing the sine then the cosine of batch \c x */ template - inline std::pair, batch> sincos(batch const& x) noexcept + XSIMD_INLINE std::pair, batch> sincos(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::sincos(x, A {}); @@ -2127,7 +2127,7 @@ namespace xsimd * @return the hyperbolic sine of \c x. */ template - inline batch sinh(batch const& x) noexcept + XSIMD_INLINE batch sinh(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::sinh(x, A {}); @@ -2144,7 +2144,7 @@ namespace xsimd * @return slided batch. */ template - inline batch slide_left(batch const& x) noexcept + XSIMD_INLINE batch slide_left(batch const& x) noexcept { static_assert(std::is_integral::value, "can only slide batch of integers"); detail::static_check_supported_config(); @@ -2162,7 +2162,7 @@ namespace xsimd * @return slided batch. */ template - inline batch slide_right(batch const& x) noexcept + XSIMD_INLINE batch slide_right(batch const& x) noexcept { static_assert(std::is_integral::value, "can only slide batch of integers"); detail::static_check_supported_config(); @@ -2177,7 +2177,7 @@ namespace xsimd * @return the square root of \c x. */ template - inline batch sqrt(batch const& x) noexcept + XSIMD_INLINE batch sqrt(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::sqrt(x, A {}); @@ -2193,7 +2193,7 @@ namespace xsimd * @return the result of the saturated difference. */ template - inline batch ssub(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch ssub(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::ssub(x, y, A {}); @@ -2208,21 +2208,21 @@ namespace xsimd * @param src the batch to copy */ template - inline void store_as(To* dst, batch const& src, aligned_mode) noexcept + XSIMD_INLINE void store_as(To* dst, batch const& src, aligned_mode) noexcept { detail::static_check_supported_config(); kernel::store_aligned(dst, src, A {}); } template - inline void store_as(bool* dst, batch_bool const& src, aligned_mode) noexcept + XSIMD_INLINE void store_as(bool* dst, batch_bool const& src, aligned_mode) noexcept { detail::static_check_supported_config(); kernel::store(src, dst, A {}); } template - inline void store_as(std::complex* dst, batch, A> const& src, aligned_mode) noexcept + XSIMD_INLINE void store_as(std::complex* dst, batch, A> const& src, aligned_mode) noexcept { detail::static_check_supported_config, A>(); kernel::store_complex_aligned(dst, src, A {}); @@ -2230,7 +2230,7 @@ namespace xsimd #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline void store_as(xtl::xcomplex* dst, batch, A> const& src, aligned_mode) noexcept + XSIMD_INLINE void store_as(xtl::xcomplex* dst, batch, A> const& src, aligned_mode) noexcept { store_as(reinterpret_cast*>(dst), src, aligned_mode()); } @@ -2245,21 +2245,21 @@ namespace xsimd * @param src the batch to copy */ template - inline void store_as(To* dst, batch const& src, unaligned_mode) noexcept + XSIMD_INLINE void store_as(To* dst, batch const& src, unaligned_mode) noexcept { detail::static_check_supported_config(); kernel::store_unaligned(dst, src, A {}); } template - inline void store_as(bool* dst, batch_bool const& src, unaligned_mode) noexcept + XSIMD_INLINE void store_as(bool* dst, batch_bool const& src, unaligned_mode) noexcept { detail::static_check_supported_config(); kernel::store(src, dst, A {}); } template - inline void store_as(std::complex* dst, batch, A> const& src, unaligned_mode) noexcept + XSIMD_INLINE void store_as(std::complex* dst, batch, A> const& src, unaligned_mode) noexcept { detail::static_check_supported_config, A>(); kernel::store_complex_unaligned(dst, src, A {}); @@ -2267,7 +2267,7 @@ namespace xsimd #ifdef XSIMD_ENABLE_XTL_COMPLEX template - inline void store_as(xtl::xcomplex* dst, batch, A> const& src, unaligned_mode) noexcept + XSIMD_INLINE void store_as(xtl::xcomplex* dst, batch, A> const& src, unaligned_mode) noexcept { detail::static_check_supported_config, A>(); store_as(reinterpret_cast*>(dst), src, unaligned_mode()); @@ -2283,7 +2283,7 @@ namespace xsimd * @param val the batch to copy from */ template - inline void store(T* mem, batch const& val, aligned_mode = {}) noexcept + XSIMD_INLINE void store(T* mem, batch const& val, aligned_mode = {}) noexcept { store_as(mem, val, aligned_mode {}); } @@ -2297,7 +2297,7 @@ namespace xsimd * @param val the batch to copy from */ template - inline void store(T* mem, batch const& val, unaligned_mode) noexcept + XSIMD_INLINE void store(T* mem, batch const& val, unaligned_mode) noexcept { store_as(mem, val, unaligned_mode {}); } @@ -2311,7 +2311,7 @@ namespace xsimd * @param val the batch to copy from */ template - inline void store_aligned(T* mem, batch const& val) noexcept + XSIMD_INLINE void store_aligned(T* mem, batch const& val) noexcept { store_as(mem, val, aligned_mode {}); } @@ -2325,7 +2325,7 @@ namespace xsimd * @param val the batch to copy */ template - inline void store_unaligned(T* mem, batch const& val) noexcept + XSIMD_INLINE void store_unaligned(T* mem, batch const& val) noexcept { store_as(mem, val, unaligned_mode {}); } @@ -2340,7 +2340,7 @@ namespace xsimd * @return the difference between \c x and \c y */ template - inline auto sub(batch const& x, batch const& y) noexcept -> decltype(x - y) + XSIMD_INLINE auto sub(batch const& x, batch const& y) noexcept -> decltype(x - y) { detail::static_check_supported_config(); return x - y; @@ -2356,7 +2356,7 @@ namespace xsimd * @return swizzled batch */ template - inline typename std::enable_if::value, batch>::type + XSIMD_INLINE typename std::enable_if::value, batch>::type swizzle(batch const& x, batch_constant mask) noexcept { static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); @@ -2364,7 +2364,7 @@ namespace xsimd return kernel::swizzle(x, mask, A {}); } template - inline batch, A> swizzle(batch, A> const& x, batch_constant mask) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& x, batch_constant mask) noexcept { static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); detail::static_check_supported_config(); @@ -2381,7 +2381,7 @@ namespace xsimd * @return swizzled batch */ template - inline typename std::enable_if::value, batch>::type + XSIMD_INLINE typename std::enable_if::value, batch>::type swizzle(batch const& x, batch mask) noexcept { static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); @@ -2390,7 +2390,7 @@ namespace xsimd } template - inline batch, A> swizzle(batch, A> const& x, batch mask) noexcept + XSIMD_INLINE batch, A> swizzle(batch, A> const& x, batch mask) noexcept { static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); detail::static_check_supported_config(); @@ -2405,7 +2405,7 @@ namespace xsimd * @return the tangent of \c x. */ template - inline batch tan(batch const& x) noexcept + XSIMD_INLINE batch tan(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::tan(x, A {}); @@ -2419,7 +2419,7 @@ namespace xsimd * @return the hyperbolic tangent of \c x. */ template - inline batch tanh(batch const& x) noexcept + XSIMD_INLINE batch tanh(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::tanh(x, A {}); @@ -2433,7 +2433,7 @@ namespace xsimd * @return the gamma function of \c x. */ template - inline batch tgamma(batch const& x) noexcept + XSIMD_INLINE batch tgamma(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::tgamma(x, A {}); @@ -2448,7 +2448,7 @@ namespace xsimd * @return \c i converted to a value of an floating point type of the same size as \c T */ template - inline batch, A> to_float(batch const& i) noexcept + XSIMD_INLINE batch, A> to_float(batch const& i) noexcept { detail::static_check_supported_config(); return batch_cast>(i); @@ -2463,7 +2463,7 @@ namespace xsimd * @return \c x converted to a value of an integer type of the same size as \c T */ template - inline batch, A> to_int(batch const& x) noexcept + XSIMD_INLINE batch, A> to_int(batch const& x) noexcept { detail::static_check_supported_config(); return batch_cast>(x); @@ -2478,7 +2478,7 @@ namespace xsimd * @return the batch of nearest integer values not greater in magnitude than \c x. */ template - inline batch trunc(batch const& x) noexcept + XSIMD_INLINE batch trunc(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::trunc(x, A {}); @@ -2494,7 +2494,7 @@ namespace xsimd * @return a batch of the high part of shuffled values. */ template - inline batch zip_hi(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch zip_hi(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::zip_hi(x, y, A {}); @@ -2510,7 +2510,7 @@ namespace xsimd * @return a batch of the low part of shuffled values. */ template - inline batch zip_lo(batch const& x, batch const& y) noexcept + XSIMD_INLINE batch zip_lo(batch const& x, batch const& y) noexcept { detail::static_check_supported_config(); return kernel::zip_lo(x, y, A {}); @@ -2527,7 +2527,7 @@ namespace xsimd * @return \c self cast to a \c batch of \c T */ template ::value, int>::type = 3> - inline batch bitwise_cast(batch_bool const& self) noexcept + XSIMD_INLINE batch bitwise_cast(batch_bool const& self) noexcept { T z(0); detail::static_check_supported_config(); @@ -2535,7 +2535,7 @@ namespace xsimd } template ::value, int>::type = 3> - inline batch bitwise_cast(batch_bool const& self) noexcept + XSIMD_INLINE batch bitwise_cast(batch_bool const& self) noexcept { T z0(0), z1(0); using int_type = as_unsigned_integer_t; @@ -2554,7 +2554,7 @@ namespace xsimd * @return a boolean scalar. */ template - inline bool all(batch_bool const& x) noexcept + XSIMD_INLINE bool all(batch_bool const& x) noexcept { detail::static_check_supported_config(); return kernel::all(x, A {}); @@ -2569,7 +2569,7 @@ namespace xsimd * @return a boolean scalar. */ template - inline bool any(batch_bool const& x) noexcept + XSIMD_INLINE bool any(batch_bool const& x) noexcept { detail::static_check_supported_config(); return kernel::any(x, A {}); @@ -2584,7 +2584,7 @@ namespace xsimd * @return a boolean scalar. */ template - inline bool none(batch_bool const& x) noexcept + XSIMD_INLINE bool none(batch_bool const& x) noexcept { detail::static_check_supported_config(); return !xsimd::any(x); @@ -2599,7 +2599,7 @@ namespace xsimd * @return a reference to \c o */ template - inline std::ostream& operator<<(std::ostream& o, batch const& x) noexcept + XSIMD_INLINE std::ostream& operator<<(std::ostream& o, batch const& x) noexcept { detail::static_check_supported_config(); constexpr auto size = batch::size; @@ -2620,7 +2620,7 @@ namespace xsimd * @return a reference to \c o */ template - inline std::ostream& operator<<(std::ostream& o, batch_bool const& x) noexcept + XSIMD_INLINE std::ostream& operator<<(std::ostream& o, batch_bool const& x) noexcept { detail::static_check_supported_config(); constexpr auto size = batch_bool::size; diff --git a/include/xsimd/types/xsimd_batch.hpp b/include/xsimd/types/xsimd_batch.hpp index d9108823a..898f7b5a4 100644 --- a/include/xsimd/types/xsimd_batch.hpp +++ b/include/xsimd/types/xsimd_batch.hpp @@ -29,38 +29,38 @@ namespace xsimd template struct integral_only_operators { - inline batch& operator%=(batch const& other) noexcept; - inline batch& operator>>=(int32_t other) noexcept; - inline batch& operator>>=(batch const& other) noexcept; - inline batch& operator<<=(int32_t other) noexcept; - inline batch& operator<<=(batch const& other) noexcept; + XSIMD_INLINE batch& operator%=(batch const& other) noexcept; + XSIMD_INLINE batch& operator>>=(int32_t other) noexcept; + XSIMD_INLINE batch& operator>>=(batch const& other) noexcept; + XSIMD_INLINE batch& operator<<=(int32_t other) noexcept; + XSIMD_INLINE batch& operator<<=(batch const& other) noexcept; /** Shorthand for xsimd::mod() */ - friend inline batch operator%(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator%(batch const& self, batch const& other) noexcept { return batch(self) %= other; } /** Shorthand for xsimd::bitwise_rshift() */ - friend inline batch operator>>(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator>>(batch const& self, batch const& other) noexcept { return batch(self) >>= other; } /** Shorthand for xsimd::bitwise_lshift() */ - friend inline batch operator<<(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator<<(batch const& self, batch const& other) noexcept { return batch(self) <<= other; } /** Shorthand for xsimd::bitwise_rshift() */ - friend inline batch operator>>(batch const& self, int32_t other) noexcept + friend XSIMD_INLINE batch operator>>(batch const& self, int32_t other) noexcept { return batch(self) >>= other; } /** Shorthand for xsimd::bitwise_lshift() */ - friend inline batch operator<<(batch const& self, int32_t other) noexcept + friend XSIMD_INLINE batch operator<<(batch const& self, int32_t other) noexcept { return batch(self) <<= other; } @@ -82,22 +82,22 @@ namespace xsimd // with batch. Their implementation must appear only once the // kernel implementations have been included. template - inline batch_bool eq(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other) noexcept; template - inline batch_bool neq(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other) noexcept; template - inline batch_bool ge(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other) noexcept; template - inline batch_bool le(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch_bool le(batch const& self, batch const& other) noexcept; template - inline batch_bool gt(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other) noexcept; template - inline batch_bool lt(batch const& self, batch const& other) noexcept; + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other) noexcept; } /** @@ -123,152 +123,152 @@ namespace xsimd using batch_bool_type = batch_bool; ///< Associated batch type used to represented logical operations on this batch. // constructors - inline batch() = default; ///< Create a batch initialized with undefined values. - inline batch(T val) noexcept; + XSIMD_INLINE batch() = default; ///< Create a batch initialized with undefined values. + XSIMD_INLINE batch(T val) noexcept; template - inline batch(T val0, T val1, Ts... vals) noexcept; - inline explicit batch(batch_bool_type const& b) noexcept; - inline batch(register_type reg) noexcept; + XSIMD_INLINE batch(T val0, T val1, Ts... vals) noexcept; + XSIMD_INLINE explicit batch(batch_bool_type const& b) noexcept; + XSIMD_INLINE batch(register_type reg) noexcept; template - XSIMD_NO_DISCARD static inline batch broadcast(U val) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch broadcast(U val) noexcept; // memory operators template - inline void store_aligned(U* mem) const noexcept; + XSIMD_INLINE void store_aligned(U* mem) const noexcept; template - inline void store_unaligned(U* mem) const noexcept; + XSIMD_INLINE void store_unaligned(U* mem) const noexcept; template - inline void store(U* mem, aligned_mode) const noexcept; + XSIMD_INLINE void store(U* mem, aligned_mode) const noexcept; template - inline void store(U* mem, unaligned_mode) const noexcept; + XSIMD_INLINE void store(U* mem, unaligned_mode) const noexcept; template - XSIMD_NO_DISCARD static inline batch load_aligned(U const* mem) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_aligned(U const* mem) noexcept; template - XSIMD_NO_DISCARD static inline batch load_unaligned(U const* mem) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_unaligned(U const* mem) noexcept; template - XSIMD_NO_DISCARD static inline batch load(U const* mem, aligned_mode) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load(U const* mem, aligned_mode) noexcept; template - XSIMD_NO_DISCARD static inline batch load(U const* mem, unaligned_mode) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load(U const* mem, unaligned_mode) noexcept; template - XSIMD_NO_DISCARD static inline batch gather(U const* src, batch const& index) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch gather(U const* src, batch const& index) noexcept; template - inline void scatter(U* dst, batch const& index) const noexcept; + XSIMD_INLINE void scatter(U* dst, batch const& index) const noexcept; - inline T get(std::size_t i) const noexcept; + XSIMD_INLINE T get(std::size_t i) const noexcept; // comparison operators. Defined as friend to enable automatic // conversion of parameters from scalar to batch, at the cost of using a // proxy implementation from details::. - friend inline batch_bool operator==(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch_bool operator==(batch const& self, batch const& other) noexcept { return details::eq(self, other); } - friend inline batch_bool operator!=(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch_bool operator!=(batch const& self, batch const& other) noexcept { return details::neq(self, other); } - friend inline batch_bool operator>=(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch_bool operator>=(batch const& self, batch const& other) noexcept { return details::ge(self, other); } - friend inline batch_bool operator<=(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch_bool operator<=(batch const& self, batch const& other) noexcept { return details::le(self, other); } - friend inline batch_bool operator>(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch_bool operator>(batch const& self, batch const& other) noexcept { return details::gt(self, other); } - friend inline batch_bool operator<(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch_bool operator<(batch const& self, batch const& other) noexcept { return details::lt(self, other); } // Update operators - inline batch& operator+=(batch const& other) noexcept; - inline batch& operator-=(batch const& other) noexcept; - inline batch& operator*=(batch const& other) noexcept; - inline batch& operator/=(batch const& other) noexcept; - inline batch& operator&=(batch const& other) noexcept; - inline batch& operator|=(batch const& other) noexcept; - inline batch& operator^=(batch const& other) noexcept; + XSIMD_INLINE batch& operator+=(batch const& other) noexcept; + XSIMD_INLINE batch& operator-=(batch const& other) noexcept; + XSIMD_INLINE batch& operator*=(batch const& other) noexcept; + XSIMD_INLINE batch& operator/=(batch const& other) noexcept; + XSIMD_INLINE batch& operator&=(batch const& other) noexcept; + XSIMD_INLINE batch& operator|=(batch const& other) noexcept; + XSIMD_INLINE batch& operator^=(batch const& other) noexcept; // incr/decr operators - inline batch& operator++() noexcept; - inline batch& operator--() noexcept; - inline batch operator++(int) noexcept; - inline batch operator--(int) noexcept; + XSIMD_INLINE batch& operator++() noexcept; + XSIMD_INLINE batch& operator--() noexcept; + XSIMD_INLINE batch operator++(int) noexcept; + XSIMD_INLINE batch operator--(int) noexcept; // unary operators - inline batch_bool_type operator!() const noexcept; - inline batch operator~() const noexcept; - inline batch operator-() const noexcept; - inline batch operator+() const noexcept; + XSIMD_INLINE batch_bool_type operator!() const noexcept; + XSIMD_INLINE batch operator~() const noexcept; + XSIMD_INLINE batch operator-() const noexcept; + XSIMD_INLINE batch operator+() const noexcept; // arithmetic operators. They are defined as friend to enable automatic // conversion of parameters from scalar to batch. Inline implementation // is required to avoid warnings. /** Shorthand for xsimd::add() */ - friend inline batch operator+(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator+(batch const& self, batch const& other) noexcept { return batch(self) += other; } /** Shorthand for xsimd::sub() */ - friend inline batch operator-(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator-(batch const& self, batch const& other) noexcept { return batch(self) -= other; } /** Shorthand for xsimd::mul() */ - friend inline batch operator*(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator*(batch const& self, batch const& other) noexcept { return batch(self) *= other; } /** Shorthand for xsimd::div() */ - friend inline batch operator/(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator/(batch const& self, batch const& other) noexcept { return batch(self) /= other; } /** Shorthand for xsimd::bitwise_and() */ - friend inline batch operator&(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator&(batch const& self, batch const& other) noexcept { return batch(self) &= other; } /** Shorthand for xsimd::bitwise_or() */ - friend inline batch operator|(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator|(batch const& self, batch const& other) noexcept { return batch(self) |= other; } /** Shorthand for xsimd::bitwise_xor() */ - friend inline batch operator^(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator^(batch const& self, batch const& other) noexcept { return batch(self) ^= other; } /** Shorthand for xsimd::logical_and() */ - friend inline batch operator&&(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator&&(batch const& self, batch const& other) noexcept { return batch(self).logical_and(other); } /** Shorthand for xsimd::logical_or() */ - friend inline batch operator||(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator||(batch const& self, batch const& other) noexcept { return batch(self).logical_or(other); } private: - inline batch logical_and(batch const& other) const noexcept; - inline batch logical_or(batch const& other) const noexcept; + XSIMD_INLINE batch logical_and(batch const& other) const noexcept; + XSIMD_INLINE batch logical_or(batch const& other) const noexcept; }; template @@ -297,51 +297,51 @@ namespace xsimd using batch_type = batch; ///< Associated batch type this batch represents logical operations for. // constructors - inline batch_bool() = default; ///< Create a batch initialized with undefined values. - inline batch_bool(bool val) noexcept; - inline batch_bool(register_type reg) noexcept; + XSIMD_INLINE batch_bool() = default; ///< Create a batch initialized with undefined values. + XSIMD_INLINE batch_bool(bool val) noexcept; + XSIMD_INLINE batch_bool(register_type reg) noexcept; template - inline batch_bool(bool val0, bool val1, Ts... vals) noexcept; + XSIMD_INLINE batch_bool(bool val0, bool val1, Ts... vals) noexcept; template - inline batch_bool(Tp const*) = delete; + XSIMD_INLINE batch_bool(Tp const*) = delete; // memory operators - inline void store_aligned(bool* mem) const noexcept; - inline void store_unaligned(bool* mem) const noexcept; - XSIMD_NO_DISCARD static inline batch_bool load_aligned(bool const* mem) noexcept; - XSIMD_NO_DISCARD static inline batch_bool load_unaligned(bool const* mem) noexcept; + XSIMD_INLINE void store_aligned(bool* mem) const noexcept; + XSIMD_INLINE void store_unaligned(bool* mem) const noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch_bool load_aligned(bool const* mem) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch_bool load_unaligned(bool const* mem) noexcept; - inline bool get(std::size_t i) const noexcept; + XSIMD_INLINE bool get(std::size_t i) const noexcept; // mask operations - inline uint64_t mask() const noexcept; - inline static batch_bool from_mask(uint64_t mask) noexcept; + XSIMD_INLINE uint64_t mask() const noexcept; + XSIMD_INLINE static batch_bool from_mask(uint64_t mask) noexcept; // comparison operators - inline batch_bool operator==(batch_bool const& other) const noexcept; - inline batch_bool operator!=(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator==(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator!=(batch_bool const& other) const noexcept; // logical operators - inline batch_bool operator~() const noexcept; - inline batch_bool operator!() const noexcept; - inline batch_bool operator&(batch_bool const& other) const noexcept; - inline batch_bool operator|(batch_bool const& other) const noexcept; - inline batch_bool operator^(batch_bool const& other) const noexcept; - inline batch_bool operator&&(batch_bool const& other) const noexcept; - inline batch_bool operator||(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator~() const noexcept; + XSIMD_INLINE batch_bool operator!() const noexcept; + XSIMD_INLINE batch_bool operator&(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator|(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator^(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator&&(batch_bool const& other) const noexcept; + XSIMD_INLINE batch_bool operator||(batch_bool const& other) const noexcept; // update operators - inline batch_bool& operator&=(batch_bool const& other) noexcept { return (*this) = (*this) & other; } - inline batch_bool& operator|=(batch_bool const& other) noexcept { return (*this) = (*this) | other; } - inline batch_bool& operator^=(batch_bool const& other) noexcept { return (*this) = (*this) ^ other; } + XSIMD_INLINE batch_bool& operator&=(batch_bool const& other) noexcept { return (*this) = (*this) & other; } + XSIMD_INLINE batch_bool& operator|=(batch_bool const& other) noexcept { return (*this) = (*this) | other; } + XSIMD_INLINE batch_bool& operator^=(batch_bool const& other) noexcept { return (*this) = (*this) ^ other; } private: template - static inline register_type make_register(detail::index_sequence, U u, V... v) noexcept; + static XSIMD_INLINE register_type make_register(detail::index_sequence, U u, V... v) noexcept; template - static inline register_type make_register(detail::index_sequence<>, V... v) noexcept; + static XSIMD_INLINE register_type make_register(detail::index_sequence<>, V... v) noexcept; }; template @@ -367,106 +367,106 @@ namespace xsimd static constexpr std::size_t size = real_batch::size; ///< Number of complex elements in this batch. // constructors - inline batch() = default; ///< Create a batch initialized with undefined values. - inline batch(value_type const& val) noexcept; - inline batch(real_batch const& real, real_batch const& imag) noexcept; + XSIMD_INLINE batch() = default; ///< Create a batch initialized with undefined values. + XSIMD_INLINE batch(value_type const& val) noexcept; + XSIMD_INLINE batch(real_batch const& real, real_batch const& imag) noexcept; - inline batch(real_batch const& real) noexcept; - inline batch(T val) noexcept; + XSIMD_INLINE batch(real_batch const& real) noexcept; + XSIMD_INLINE batch(T val) noexcept; template - inline batch(value_type val0, value_type val1, Ts... vals) noexcept; - inline explicit batch(batch_bool_type const& b) noexcept; + XSIMD_INLINE batch(value_type val0, value_type val1, Ts... vals) noexcept; + XSIMD_INLINE explicit batch(batch_bool_type const& b) noexcept; template - XSIMD_NO_DISCARD static inline batch broadcast(U val) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch broadcast(U val) noexcept; // memory operators - XSIMD_NO_DISCARD static inline batch load_aligned(const T* real_src, const T* imag_src = nullptr) noexcept; - XSIMD_NO_DISCARD static inline batch load_unaligned(const T* real_src, const T* imag_src = nullptr) noexcept; - inline void store_aligned(T* real_dst, T* imag_dst) const noexcept; - inline void store_unaligned(T* real_dst, T* imag_dst) const noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_aligned(const T* real_src, const T* imag_src = nullptr) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_unaligned(const T* real_src, const T* imag_src = nullptr) noexcept; + XSIMD_INLINE void store_aligned(T* real_dst, T* imag_dst) const noexcept; + XSIMD_INLINE void store_unaligned(T* real_dst, T* imag_dst) const noexcept; - XSIMD_NO_DISCARD static inline batch load_aligned(const value_type* src) noexcept; - XSIMD_NO_DISCARD static inline batch load_unaligned(const value_type* src) noexcept; - inline void store_aligned(value_type* dst) const noexcept; - inline void store_unaligned(value_type* dst) const noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_aligned(const value_type* src) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_unaligned(const value_type* src) noexcept; + XSIMD_INLINE void store_aligned(value_type* dst) const noexcept; + XSIMD_INLINE void store_unaligned(value_type* dst) const noexcept; template - XSIMD_NO_DISCARD static inline batch load(U const* mem, aligned_mode) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load(U const* mem, aligned_mode) noexcept; template - XSIMD_NO_DISCARD static inline batch load(U const* mem, unaligned_mode) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load(U const* mem, unaligned_mode) noexcept; template - inline void store(U* mem, aligned_mode) const noexcept; + XSIMD_INLINE void store(U* mem, aligned_mode) const noexcept; template - inline void store(U* mem, unaligned_mode) const noexcept; + XSIMD_INLINE void store(U* mem, unaligned_mode) const noexcept; - inline real_batch real() const noexcept; - inline real_batch imag() const noexcept; + XSIMD_INLINE real_batch real() const noexcept; + XSIMD_INLINE real_batch imag() const noexcept; - inline value_type get(std::size_t i) const noexcept; + XSIMD_INLINE value_type get(std::size_t i) const noexcept; #ifdef XSIMD_ENABLE_XTL_COMPLEX // xtl-related methods template - inline batch(xtl::xcomplex const& val) noexcept; + XSIMD_INLINE batch(xtl::xcomplex const& val) noexcept; template - inline batch(xtl::xcomplex val0, xtl::xcomplex val1, Ts... vals) noexcept; + XSIMD_INLINE batch(xtl::xcomplex val0, xtl::xcomplex val1, Ts... vals) noexcept; template - XSIMD_NO_DISCARD static inline batch load_aligned(const xtl::xcomplex* src) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_aligned(const xtl::xcomplex* src) noexcept; template - XSIMD_NO_DISCARD static inline batch load_unaligned(const xtl::xcomplex* src) noexcept; + XSIMD_NO_DISCARD static XSIMD_INLINE batch load_unaligned(const xtl::xcomplex* src) noexcept; template - inline void store_aligned(xtl::xcomplex* dst) const noexcept; + XSIMD_INLINE void store_aligned(xtl::xcomplex* dst) const noexcept; template - inline void store_unaligned(xtl::xcomplex* dst) const noexcept; + XSIMD_INLINE void store_unaligned(xtl::xcomplex* dst) const noexcept; #endif // comparison operators - inline batch_bool operator==(batch const& other) const noexcept; - inline batch_bool operator!=(batch const& other) const noexcept; + XSIMD_INLINE batch_bool operator==(batch const& other) const noexcept; + XSIMD_INLINE batch_bool operator!=(batch const& other) const noexcept; // Update operators - inline batch& operator+=(batch const& other) noexcept; - inline batch& operator-=(batch const& other) noexcept; - inline batch& operator*=(batch const& other) noexcept; - inline batch& operator/=(batch const& other) noexcept; + XSIMD_INLINE batch& operator+=(batch const& other) noexcept; + XSIMD_INLINE batch& operator-=(batch const& other) noexcept; + XSIMD_INLINE batch& operator*=(batch const& other) noexcept; + XSIMD_INLINE batch& operator/=(batch const& other) noexcept; // incr/decr operators - inline batch& operator++() noexcept; - inline batch& operator--() noexcept; - inline batch operator++(int) noexcept; - inline batch operator--(int) noexcept; + XSIMD_INLINE batch& operator++() noexcept; + XSIMD_INLINE batch& operator--() noexcept; + XSIMD_INLINE batch operator++(int) noexcept; + XSIMD_INLINE batch operator--(int) noexcept; // unary operators - inline batch_bool_type operator!() const noexcept; - inline batch operator~() const noexcept; - inline batch operator-() const noexcept; - inline batch operator+() const noexcept; + XSIMD_INLINE batch_bool_type operator!() const noexcept; + XSIMD_INLINE batch operator~() const noexcept; + XSIMD_INLINE batch operator-() const noexcept; + XSIMD_INLINE batch operator+() const noexcept; // arithmetic operators. They are defined as friend to enable automatic // conversion of parameters from scalar to batch /** Shorthand for xsimd::add() */ - friend inline batch operator+(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator+(batch const& self, batch const& other) noexcept { return batch(self) += other; } /** Shorthand for xsimd::sub() */ - friend inline batch operator-(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator-(batch const& self, batch const& other) noexcept { return batch(self) -= other; } /** Shorthand for xsimd::mul() */ - friend inline batch operator*(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator*(batch const& self, batch const& other) noexcept { return batch(self) *= other; } /** Shorthand for xsimd::div() */ - friend inline batch operator/(batch const& self, batch const& other) noexcept + friend XSIMD_INLINE batch operator/(batch const& self, batch const& other) noexcept { return batch(self) /= other; } @@ -500,7 +500,7 @@ namespace xsimd * Create a batch with all element initialized to \c val. */ template - inline batch::batch(T val) noexcept + XSIMD_INLINE batch::batch(T val) noexcept : types::simd_register(kernel::broadcast(val, A {})) { detail::static_check_supported_config(); @@ -512,7 +512,7 @@ namespace xsimd */ template template - inline batch::batch(T val0, T val1, Ts... vals) noexcept + XSIMD_INLINE batch::batch(T val0, T val1, Ts... vals) noexcept : batch(kernel::set(batch {}, A {}, val0, val1, static_cast(vals)...)) { detail::static_check_supported_config(); @@ -525,7 +525,7 @@ namespace xsimd * (resp. `false`). */ template - inline batch::batch(batch_bool const& b) noexcept + XSIMD_INLINE batch::batch(batch_bool const& b) noexcept : batch(kernel::from_bool(b, A {})) { } @@ -535,7 +535,7 @@ namespace xsimd * becomes handy when doing architecture-specific operations. */ template - inline batch::batch(register_type reg) noexcept + XSIMD_INLINE batch::batch(register_type reg) noexcept : types::simd_register({ reg }) { detail::static_check_supported_config(); @@ -546,7 +546,7 @@ namespace xsimd */ template template - XSIMD_NO_DISCARD inline batch batch::broadcast(U val) noexcept + XSIMD_NO_DISCARD XSIMD_INLINE batch batch::broadcast(U val) noexcept { detail::static_check_supported_config(); return batch(static_cast(val)); @@ -562,7 +562,7 @@ namespace xsimd */ template template - inline void batch::store_aligned(U* mem) const noexcept + XSIMD_INLINE void batch::store_aligned(U* mem) const noexcept { detail::static_check_supported_config(); assert(((reinterpret_cast(mem) % A::alignment()) == 0) @@ -576,7 +576,7 @@ namespace xsimd */ template template - inline void batch::store_unaligned(U* mem) const noexcept + XSIMD_INLINE void batch::store_unaligned(U* mem) const noexcept { detail::static_check_supported_config(); kernel::store_unaligned(mem, *this, A {}); @@ -587,7 +587,7 @@ namespace xsimd */ template template - inline void batch::store(U* mem, aligned_mode) const noexcept + XSIMD_INLINE void batch::store(U* mem, aligned_mode) const noexcept { detail::static_check_supported_config(); return store_aligned(mem); @@ -598,7 +598,7 @@ namespace xsimd */ template template - inline void batch::store(U* mem, unaligned_mode) const noexcept + XSIMD_INLINE void batch::store(U* mem, unaligned_mode) const noexcept { detail::static_check_supported_config(); return store_unaligned(mem); @@ -610,7 +610,7 @@ namespace xsimd */ template template - inline batch batch::load_aligned(U const* mem) noexcept + XSIMD_INLINE batch batch::load_aligned(U const* mem) noexcept { assert(((reinterpret_cast(mem) % A::alignment()) == 0) && "loaded pointer is not properly aligned"); @@ -624,7 +624,7 @@ namespace xsimd */ template template - inline batch batch::load_unaligned(U const* mem) noexcept + XSIMD_INLINE batch batch::load_unaligned(U const* mem) noexcept { detail::static_check_supported_config(); return kernel::load_unaligned(mem, kernel::convert {}, A {}); @@ -635,7 +635,7 @@ namespace xsimd */ template template - inline batch batch::load(U const* mem, aligned_mode) noexcept + XSIMD_INLINE batch batch::load(U const* mem, aligned_mode) noexcept { detail::static_check_supported_config(); return load_aligned(mem); @@ -646,7 +646,7 @@ namespace xsimd */ template template - inline batch batch::load(U const* mem, unaligned_mode) noexcept + XSIMD_INLINE batch batch::load(U const* mem, unaligned_mode) noexcept { detail::static_check_supported_config(); return load_unaligned(mem); @@ -660,7 +660,7 @@ namespace xsimd */ template template - inline batch batch::gather(U const* src, batch const& index) noexcept + XSIMD_INLINE batch batch::gather(U const* src, batch const& index) noexcept { detail::static_check_supported_config(); static_assert(std::is_convertible::value, "Can't convert from src to this batch's type!"); @@ -675,7 +675,7 @@ namespace xsimd */ template template - inline void batch::scatter(U* dst, batch const& index) const noexcept + XSIMD_INLINE void batch::scatter(U* dst, batch const& index) const noexcept { detail::static_check_supported_config(); static_assert(std::is_convertible::value, "Can't convert from this batch's type to dst!"); @@ -688,7 +688,7 @@ namespace xsimd * \c warning This is very inefficient and should only be used for debugging purpose. */ template - inline T batch::get(std::size_t i) const noexcept + XSIMD_INLINE T batch::get(std::size_t i) const noexcept { return kernel::get(*this, i, A {}); } @@ -702,7 +702,7 @@ namespace xsimd * Shorthand for xsimd::eq() */ template - inline batch_bool eq(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other) noexcept { detail::static_check_supported_config(); return kernel::eq(self, other, A {}); @@ -712,7 +712,7 @@ namespace xsimd * Shorthand for xsimd::neq() */ template - inline batch_bool neq(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other) noexcept { detail::static_check_supported_config(); return kernel::neq(self, other, A {}); @@ -722,7 +722,7 @@ namespace xsimd * Shorthand for xsimd::ge() */ template - inline batch_bool ge(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other) noexcept { detail::static_check_supported_config(); return kernel::ge(self, other, A {}); @@ -732,7 +732,7 @@ namespace xsimd * Shorthand for xsimd::le() */ template - inline batch_bool le(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool le(batch const& self, batch const& other) noexcept { detail::static_check_supported_config(); return kernel::le(self, other, A {}); @@ -742,7 +742,7 @@ namespace xsimd * Shorthand for xsimd::gt() */ template - inline batch_bool gt(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other) noexcept { detail::static_check_supported_config(); return kernel::gt(self, other, A {}); @@ -752,7 +752,7 @@ namespace xsimd * Shorthand for xsimd::lt() */ template - inline batch_bool lt(batch const& self, batch const& other) noexcept + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other) noexcept { detail::static_check_supported_config(); return kernel::lt(self, other, A {}); @@ -764,84 +764,84 @@ namespace xsimd **************************/ template - inline batch& batch::operator+=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator+=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::add(*this, other, A {}); } template - inline batch& batch::operator-=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator-=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::sub(*this, other, A {}); } template - inline batch& batch::operator*=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator*=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::mul(*this, other, A {}); } template - inline batch& batch::operator/=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator/=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::div(*this, other, A {}); } template - inline batch& types::integral_only_operators::operator%=(batch const& other) noexcept + XSIMD_INLINE batch& types::integral_only_operators::operator%=(batch const& other) noexcept { ::xsimd::detail::static_check_supported_config(); return *static_cast*>(this) = kernel::mod(*static_cast*>(this), other, A {}); } template - inline batch& batch::operator&=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator&=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::bitwise_and(*this, other, A {}); } template - inline batch& batch::operator|=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator|=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::bitwise_or(*this, other, A {}); } template - inline batch& batch::operator^=(batch const& other) noexcept + XSIMD_INLINE batch& batch::operator^=(batch const& other) noexcept { detail::static_check_supported_config(); return *this = kernel::bitwise_xor(*this, other, A {}); } template - inline batch& kernel::integral_only_operators::operator>>=(batch const& other) noexcept + XSIMD_INLINE batch& kernel::integral_only_operators::operator>>=(batch const& other) noexcept { ::xsimd::detail::static_check_supported_config(); return *static_cast*>(this) = kernel::bitwise_rshift(*static_cast*>(this), other, A {}); } template - inline batch& kernel::integral_only_operators::operator<<=(batch const& other) noexcept + XSIMD_INLINE batch& kernel::integral_only_operators::operator<<=(batch const& other) noexcept { ::xsimd::detail::static_check_supported_config(); return *static_cast*>(this) = kernel::bitwise_lshift(*static_cast*>(this), other, A {}); } template - inline batch& kernel::integral_only_operators::operator>>=(int32_t other) noexcept + XSIMD_INLINE batch& kernel::integral_only_operators::operator>>=(int32_t other) noexcept { ::xsimd::detail::static_check_supported_config(); return *static_cast*>(this) = kernel::bitwise_rshift(*static_cast*>(this), other, A {}); } template - inline batch& kernel::integral_only_operators::operator<<=(int32_t other) noexcept + XSIMD_INLINE batch& kernel::integral_only_operators::operator<<=(int32_t other) noexcept { ::xsimd::detail::static_check_supported_config(); return *static_cast*>(this) = kernel::bitwise_lshift(*static_cast*>(this), other, A {}); @@ -852,21 +852,21 @@ namespace xsimd *****************************/ template - inline batch& batch::operator++() noexcept + XSIMD_INLINE batch& batch::operator++() noexcept { detail::static_check_supported_config(); return operator+=(1); } template - inline batch& batch::operator--() noexcept + XSIMD_INLINE batch& batch::operator--() noexcept { detail::static_check_supported_config(); return operator-=(1); } template - inline batch batch::operator++(int) noexcept + XSIMD_INLINE batch batch::operator++(int) noexcept { detail::static_check_supported_config(); batch copy(*this); @@ -875,7 +875,7 @@ namespace xsimd } template - inline batch batch::operator--(int) noexcept + XSIMD_INLINE batch batch::operator--(int) noexcept { detail::static_check_supported_config(); batch copy(*this); @@ -888,28 +888,28 @@ namespace xsimd *************************/ template - inline batch_bool batch::operator!() const noexcept + XSIMD_INLINE batch_bool batch::operator!() const noexcept { detail::static_check_supported_config(); return kernel::eq(*this, batch(0), A {}); } template - inline batch batch::operator~() const noexcept + XSIMD_INLINE batch batch::operator~() const noexcept { detail::static_check_supported_config(); return kernel::bitwise_not(*this, A {}); } template - inline batch batch::operator-() const noexcept + XSIMD_INLINE batch batch::operator-() const noexcept { detail::static_check_supported_config(); return kernel::neg(*this, A {}); } template - inline batch batch::operator+() const noexcept + XSIMD_INLINE batch batch::operator+() const noexcept { detail::static_check_supported_config(); return *this; @@ -920,13 +920,13 @@ namespace xsimd ************************/ template - inline batch batch::logical_and(batch const& other) const noexcept + XSIMD_INLINE batch batch::logical_and(batch const& other) const noexcept { return kernel::logical_and(*this, other, A()); } template - inline batch batch::logical_or(batch const& other) const noexcept + XSIMD_INLINE batch batch::logical_or(batch const& other) const noexcept { return kernel::logical_or(*this, other, A()); } @@ -936,14 +936,14 @@ namespace xsimd ***************************/ template - inline batch_bool::batch_bool(register_type reg) noexcept + XSIMD_INLINE batch_bool::batch_bool(register_type reg) noexcept : types::get_bool_simd_register_t({ reg }) { } template template - inline batch_bool::batch_bool(bool val0, bool val1, Ts... vals) noexcept + XSIMD_INLINE batch_bool::batch_bool(bool val0, bool val1, Ts... vals) noexcept : batch_bool(kernel::set(batch_bool {}, A {}, val0, val1, static_cast(vals)...)) { static_assert(sizeof...(Ts) + 2 == size, "The constructor requires as many arguments as batch elements."); @@ -954,19 +954,19 @@ namespace xsimd *******************************/ template - inline void batch_bool::store_aligned(bool* mem) const noexcept + XSIMD_INLINE void batch_bool::store_aligned(bool* mem) const noexcept { kernel::store(*this, mem, A {}); } template - inline void batch_bool::store_unaligned(bool* mem) const noexcept + XSIMD_INLINE void batch_bool::store_unaligned(bool* mem) const noexcept { store_aligned(mem); } template - inline batch_bool batch_bool::load_aligned(bool const* mem) noexcept + XSIMD_INLINE batch_bool batch_bool::load_aligned(bool const* mem) noexcept { batch_type ref(0); alignas(A::alignment()) T buffer[size]; @@ -976,7 +976,7 @@ namespace xsimd } template - inline batch_bool batch_bool::load_unaligned(bool const* mem) noexcept + XSIMD_INLINE batch_bool batch_bool::load_unaligned(bool const* mem) noexcept { return load_aligned(mem); } @@ -987,7 +987,7 @@ namespace xsimd * @return bit mask */ template - inline uint64_t batch_bool::mask() const noexcept + XSIMD_INLINE uint64_t batch_bool::mask() const noexcept { return kernel::mask(*this, A {}); } @@ -998,13 +998,13 @@ namespace xsimd * @return bit mask */ template - inline batch_bool batch_bool::from_mask(uint64_t mask) noexcept + XSIMD_INLINE batch_bool batch_bool::from_mask(uint64_t mask) noexcept { return kernel::from_mask(batch_bool(), mask, A {}); } template - inline bool batch_bool::get(std::size_t i) const noexcept + XSIMD_INLINE bool batch_bool::get(std::size_t i) const noexcept { return kernel::get(*this, i, A {}); } @@ -1014,13 +1014,13 @@ namespace xsimd ***********************************/ template - inline batch_bool batch_bool::operator==(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator==(batch_bool const& other) const noexcept { return kernel::eq(*this, other, A {}).data; } template - inline batch_bool batch_bool::operator!=(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator!=(batch_bool const& other) const noexcept { return kernel::neq(*this, other, A {}).data; } @@ -1030,43 +1030,43 @@ namespace xsimd ********************************/ template - inline batch_bool batch_bool::operator~() const noexcept + XSIMD_INLINE batch_bool batch_bool::operator~() const noexcept { return kernel::bitwise_not(*this, A {}).data; } template - inline batch_bool batch_bool::operator!() const noexcept + XSIMD_INLINE batch_bool batch_bool::operator!() const noexcept { return operator==(batch_bool(false)); } template - inline batch_bool batch_bool::operator&(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator&(batch_bool const& other) const noexcept { return kernel::bitwise_and(*this, other, A {}).data; } template - inline batch_bool batch_bool::operator|(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator|(batch_bool const& other) const noexcept { return kernel::bitwise_or(*this, other, A {}).data; } template - inline batch_bool batch_bool::operator^(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator^(batch_bool const& other) const noexcept { return kernel::bitwise_xor(*this, other, A {}).data; } template - inline batch_bool batch_bool::operator&&(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator&&(batch_bool const& other) const noexcept { return operator&(other); } template - inline batch_bool batch_bool::operator||(batch_bool const& other) const noexcept + XSIMD_INLINE batch_bool batch_bool::operator||(batch_bool const& other) const noexcept { return operator|(other); } @@ -1076,21 +1076,21 @@ namespace xsimd ******************************/ template - inline batch_bool::batch_bool(bool val) noexcept + XSIMD_INLINE batch_bool::batch_bool(bool val) noexcept : base_type { make_register(detail::make_index_sequence(), val) } { } template template - inline auto batch_bool::make_register(detail::index_sequence, U u, V... v) noexcept -> register_type + XSIMD_INLINE auto batch_bool::make_register(detail::index_sequence, U u, V... v) noexcept -> register_type { return make_register(detail::index_sequence(), u, u, v...); } template template - inline auto batch_bool::make_register(detail::index_sequence<>, V... v) noexcept -> register_type + XSIMD_INLINE auto batch_bool::make_register(detail::index_sequence<>, V... v) noexcept -> register_type { return kernel::set(batch_bool(), A {}, v...).data; } @@ -1100,28 +1100,28 @@ namespace xsimd *******************************/ template - inline batch, A>::batch(value_type const& val) noexcept + XSIMD_INLINE batch, A>::batch(value_type const& val) noexcept : m_real(val.real()) , m_imag(val.imag()) { } template - inline batch, A>::batch(real_batch const& real, real_batch const& imag) noexcept + XSIMD_INLINE batch, A>::batch(real_batch const& real, real_batch const& imag) noexcept : m_real(real) , m_imag(imag) { } template - inline batch, A>::batch(real_batch const& real) noexcept + XSIMD_INLINE batch, A>::batch(real_batch const& real) noexcept : m_real(real) , m_imag(0) { } template - inline batch, A>::batch(T val) noexcept + XSIMD_INLINE batch, A>::batch(T val) noexcept : m_real(val) , m_imag(0) { @@ -1129,14 +1129,14 @@ namespace xsimd template template - inline batch, A>::batch(value_type val0, value_type val1, Ts... vals) noexcept + XSIMD_INLINE batch, A>::batch(value_type val0, value_type val1, Ts... vals) noexcept : batch(kernel::set(batch {}, A {}, val0, val1, static_cast(vals)...)) { static_assert(sizeof...(Ts) + 2 == size, "as many arguments as batch elements"); } template - inline batch, A>::batch(batch_bool_type const& b) noexcept + XSIMD_INLINE batch, A>::batch(batch_bool_type const& b) noexcept : m_real(b) , m_imag(0) { @@ -1144,7 +1144,7 @@ namespace xsimd template template - XSIMD_NO_DISCARD inline batch, A> batch, A>::broadcast(U val) noexcept + XSIMD_NO_DISCARD XSIMD_INLINE batch, A> batch, A>::broadcast(U val) noexcept { return batch(static_cast>(val)); } @@ -1154,18 +1154,18 @@ namespace xsimd ***********************************/ template - inline batch, A> batch, A>::load_aligned(const T* real_src, const T* imag_src) noexcept + XSIMD_INLINE batch, A> batch, A>::load_aligned(const T* real_src, const T* imag_src) noexcept { return { batch::load_aligned(real_src), imag_src ? batch::load_aligned(imag_src) : batch(0) }; } template - inline batch, A> batch, A>::load_unaligned(const T* real_src, const T* imag_src) noexcept + XSIMD_INLINE batch, A> batch, A>::load_unaligned(const T* real_src, const T* imag_src) noexcept { return { batch::load_unaligned(real_src), imag_src ? batch::load_unaligned(imag_src) : batch(0) }; } template - inline batch, A> batch, A>::load_aligned(const value_type* src) noexcept + XSIMD_INLINE batch, A> batch, A>::load_aligned(const value_type* src) noexcept { assert(((reinterpret_cast(src) % A::alignment()) == 0) && "loaded pointer is not properly aligned"); @@ -1173,13 +1173,13 @@ namespace xsimd } template - inline batch, A> batch, A>::load_unaligned(const value_type* src) noexcept + XSIMD_INLINE batch, A> batch, A>::load_unaligned(const value_type* src) noexcept { return kernel::load_complex_unaligned(src, kernel::convert {}, A {}); } template - inline void batch, A>::store_aligned(value_type* dst) const noexcept + XSIMD_INLINE void batch, A>::store_aligned(value_type* dst) const noexcept { assert(((reinterpret_cast(dst) % A::alignment()) == 0) && "store location is not properly aligned"); @@ -1187,20 +1187,20 @@ namespace xsimd } template - inline void batch, A>::store_unaligned(value_type* dst) const noexcept + XSIMD_INLINE void batch, A>::store_unaligned(value_type* dst) const noexcept { return kernel::store_complex_unaligned(dst, *this, A {}); } template - inline void batch, A>::store_aligned(T* real_dst, T* imag_dst) const noexcept + XSIMD_INLINE void batch, A>::store_aligned(T* real_dst, T* imag_dst) const noexcept { m_real.store_aligned(real_dst); m_imag.store_aligned(imag_dst); } template - inline void batch, A>::store_unaligned(T* real_dst, T* imag_dst) const noexcept + XSIMD_INLINE void batch, A>::store_unaligned(T* real_dst, T* imag_dst) const noexcept { m_real.store_unaligned(real_dst); m_imag.store_unaligned(imag_dst); @@ -1208,46 +1208,46 @@ namespace xsimd template template - inline batch, A> batch, A>::load(U const* mem, aligned_mode) noexcept + XSIMD_INLINE batch, A> batch, A>::load(U const* mem, aligned_mode) noexcept { return load_aligned(mem); } template template - inline batch, A> batch, A>::load(U const* mem, unaligned_mode) noexcept + XSIMD_INLINE batch, A> batch, A>::load(U const* mem, unaligned_mode) noexcept { return load_unaligned(mem); } template template - inline void batch, A>::store(U* mem, aligned_mode) const noexcept + XSIMD_INLINE void batch, A>::store(U* mem, aligned_mode) const noexcept { return store_aligned(mem); } template template - inline void batch, A>::store(U* mem, unaligned_mode) const noexcept + XSIMD_INLINE void batch, A>::store(U* mem, unaligned_mode) const noexcept { return store_unaligned(mem); } template - inline auto batch, A>::real() const noexcept -> real_batch + XSIMD_INLINE auto batch, A>::real() const noexcept -> real_batch { return m_real; } template - inline auto batch, A>::imag() const noexcept -> real_batch + XSIMD_INLINE auto batch, A>::imag() const noexcept -> real_batch { return m_imag; } template - inline auto batch, A>::get(std::size_t i) const noexcept -> value_type + XSIMD_INLINE auto batch, A>::get(std::size_t i) const noexcept -> value_type { return kernel::get(*this, i, A {}); } @@ -1260,7 +1260,7 @@ namespace xsimd template template - inline batch, A>::batch(xtl::xcomplex const& val) noexcept + XSIMD_INLINE batch, A>::batch(xtl::xcomplex const& val) noexcept : m_real(val.real()) , m_imag(val.imag()) { @@ -1268,7 +1268,7 @@ namespace xsimd template template - inline batch, A>::batch(xtl::xcomplex val0, xtl::xcomplex val1, Ts... vals) noexcept + XSIMD_INLINE batch, A>::batch(xtl::xcomplex val0, xtl::xcomplex val1, Ts... vals) noexcept : batch(kernel::set(batch {}, A {}, val0, val1, static_cast>(vals)...)) { static_assert(sizeof...(Ts) + 2 == size, "as many arguments as batch elements"); @@ -1280,28 +1280,28 @@ namespace xsimd template template - inline batch, A> batch, A>::load_aligned(const xtl::xcomplex* src) noexcept + XSIMD_INLINE batch, A> batch, A>::load_aligned(const xtl::xcomplex* src) noexcept { return load_aligned(reinterpret_cast const*>(src)); } template template - inline batch, A> batch, A>::load_unaligned(const xtl::xcomplex* src) noexcept + XSIMD_INLINE batch, A> batch, A>::load_unaligned(const xtl::xcomplex* src) noexcept { return load_unaligned(reinterpret_cast const*>(src)); } template template - inline void batch, A>::store_aligned(xtl::xcomplex* dst) const noexcept + XSIMD_INLINE void batch, A>::store_aligned(xtl::xcomplex* dst) const noexcept { store_aligned(reinterpret_cast*>(dst)); } template template - inline void batch, A>::store_unaligned(xtl::xcomplex* dst) const noexcept + XSIMD_INLINE void batch, A>::store_unaligned(xtl::xcomplex* dst) const noexcept { store_unaligned(reinterpret_cast*>(dst)); } @@ -1313,13 +1313,13 @@ namespace xsimd ***************************************/ template - inline batch_bool batch, A>::operator==(batch const& other) const noexcept + XSIMD_INLINE batch_bool batch, A>::operator==(batch const& other) const noexcept { return m_real == other.m_real && m_imag == other.m_imag; } template - inline batch_bool batch, A>::operator!=(batch const& other) const noexcept + XSIMD_INLINE batch_bool batch, A>::operator!=(batch const& other) const noexcept { return m_real != other.m_real || m_imag != other.m_imag; } @@ -1329,7 +1329,7 @@ namespace xsimd ***********************************/ template - inline batch, A>& batch, A>::operator+=(batch const& other) noexcept + XSIMD_INLINE batch, A>& batch, A>::operator+=(batch const& other) noexcept { m_real += other.m_real; m_imag += other.m_imag; @@ -1337,7 +1337,7 @@ namespace xsimd } template - inline batch, A>& batch, A>::operator-=(batch const& other) noexcept + XSIMD_INLINE batch, A>& batch, A>::operator-=(batch const& other) noexcept { m_real -= other.m_real; m_imag -= other.m_imag; @@ -1345,7 +1345,7 @@ namespace xsimd } template - inline batch, A>& batch, A>::operator*=(batch const& other) noexcept + XSIMD_INLINE batch, A>& batch, A>::operator*=(batch const& other) noexcept { real_batch new_real = fms(real(), other.real(), imag() * other.imag()); real_batch new_imag = fma(real(), other.imag(), imag() * other.real()); @@ -1355,7 +1355,7 @@ namespace xsimd } template - inline batch, A>& batch, A>::operator/=(batch const& other) noexcept + XSIMD_INLINE batch, A>& batch, A>::operator/=(batch const& other) noexcept { real_batch a = real(); real_batch b = imag(); @@ -1372,19 +1372,19 @@ namespace xsimd **************************************/ template - inline batch, A>& batch, A>::operator++() noexcept + XSIMD_INLINE batch, A>& batch, A>::operator++() noexcept { return operator+=(1); } template - inline batch, A>& batch, A>::operator--() noexcept + XSIMD_INLINE batch, A>& batch, A>::operator--() noexcept { return operator-=(1); } template - inline batch, A> batch, A>::operator++(int) noexcept + XSIMD_INLINE batch, A> batch, A>::operator++(int) noexcept { batch copy(*this); operator+=(1); @@ -1392,7 +1392,7 @@ namespace xsimd } template - inline batch, A> batch, A>::operator--(int) noexcept + XSIMD_INLINE batch, A> batch, A>::operator--(int) noexcept { batch copy(*this); operator-=(1); @@ -1404,25 +1404,25 @@ namespace xsimd **********************************/ template - inline batch_bool batch, A>::operator!() const noexcept + XSIMD_INLINE batch_bool batch, A>::operator!() const noexcept { return operator==(batch(0)); } template - inline batch, A> batch, A>::operator~() const noexcept + XSIMD_INLINE batch, A> batch, A>::operator~() const noexcept { return { ~m_real, ~m_imag }; } template - inline batch, A> batch, A>::operator-() const noexcept + XSIMD_INLINE batch, A> batch, A>::operator-() const noexcept { return { -m_real, -m_imag }; } template - inline batch, A> batch, A>::operator+() const noexcept + XSIMD_INLINE batch, A> batch, A>::operator+() const noexcept { return { +m_real, +m_imag }; } diff --git a/include/xsimd/types/xsimd_batch_constant.hpp b/include/xsimd/types/xsimd_batch_constant.hpp index cb2822044..3d9603277 100644 --- a/include/xsimd/types/xsimd_batch_constant.hpp +++ b/include/xsimd/types/xsimd_batch_constant.hpp @@ -138,12 +138,12 @@ namespace xsimd /** * @brief Generate a batch of @p batch_type from this @p batch_constant */ - inline batch_type as_batch() const noexcept { return { Values... }; } + XSIMD_INLINE batch_type as_batch() const noexcept { return { Values... }; } /** * @brief Generate a batch of @p batch_type from this @p batch_constant */ - inline operator batch_type() const noexcept { return as_batch(); } + XSIMD_INLINE operator batch_type() const noexcept { return as_batch(); } /** * @brief Get the @p i th element of this @p batch_constant @@ -246,13 +246,13 @@ namespace xsimd namespace detail { template - inline constexpr auto make_batch_constant(detail::index_sequence) noexcept + XSIMD_INLINE constexpr auto make_batch_constant(detail::index_sequence) noexcept -> batch_constant { return {}; } template - inline constexpr auto make_batch_bool_constant(detail::index_sequence) noexcept + XSIMD_INLINE constexpr auto make_batch_bool_constant(detail::index_sequence) noexcept -> batch_bool_constant { return {}; @@ -281,13 +281,13 @@ namespace xsimd * @endcode */ template - inline constexpr auto make_batch_constant() noexcept -> decltype(detail::make_batch_constant(detail::make_index_sequence::size>())) + XSIMD_INLINE constexpr auto make_batch_constant() noexcept -> decltype(detail::make_batch_constant(detail::make_index_sequence::size>())) { return detail::make_batch_constant(detail::make_index_sequence::size>()); } template - inline constexpr auto make_batch_bool_constant() noexcept + XSIMD_INLINE constexpr auto make_batch_bool_constant() noexcept -> decltype(detail::make_batch_bool_constant( detail::make_index_sequence::size>())) { diff --git a/include/xsimd/types/xsimd_emulated_register.hpp b/include/xsimd/types/xsimd_emulated_register.hpp index b05d71814..6e0d659bd 100644 --- a/include/xsimd/types/xsimd_emulated_register.hpp +++ b/include/xsimd/types/xsimd_emulated_register.hpp @@ -55,7 +55,7 @@ namespace xsimd static_assert(N % (8 * sizeof(T)) == 0, "bit width must be a multiple of scalar width"); using register_type = std::array; register_type data; - inline operator register_type() const noexcept + XSIMD_INLINE operator register_type() const noexcept { return data; } diff --git a/include/xsimd/types/xsimd_register.hpp b/include/xsimd/types/xsimd_register.hpp index 4fe4f3f13..a838f8786 100644 --- a/include/xsimd/types/xsimd_register.hpp +++ b/include/xsimd/types/xsimd_register.hpp @@ -37,7 +37,7 @@ namespace xsimd { \ using register_type = VECTOR_TYPE; \ register_type data; \ - inline operator register_type() const noexcept \ + XSIMD_INLINE operator register_type() const noexcept \ { \ return data; \ } \ diff --git a/include/xsimd/types/xsimd_rvv_register.hpp b/include/xsimd/types/xsimd_rvv_register.hpp index ff03b6508..8887f8a0b 100644 --- a/include/xsimd/types/xsimd_rvv_register.hpp +++ b/include/xsimd/types/xsimd_rvv_register.hpp @@ -88,14 +88,14 @@ namespace xsimd using byte_type = XSIMD_RVV_TYPE(u, 8, vmul); \ using fixed_type = type __attribute__((riscv_rvv_vector_bits(width))); \ template \ - static inline type bitcast(U x) noexcept \ + static XSIMD_INLINE type bitcast(U x) noexcept \ { \ const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \ return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, t, s, m, vmul)(words); \ } \ template <> \ - inline type bitcast(type x) noexcept { return x; } \ - static inline byte_type as_bytes(type x) noexcept \ + XSIMD_INLINE type bitcast(type x) noexcept { return x; } \ + static XSIMD_INLINE byte_type as_bytes(type x) noexcept \ { \ const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \ return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, 8, m, vmul)(words); \ @@ -267,17 +267,17 @@ namespace xsimd // template struct rvv_bool_info; -#define XSIMD_RVV_MAKE_BOOL_TYPE(i) \ - template <> \ - struct rvv_bool_info \ - { \ - using type = XSIMD_RVV_JOINT(vbool, i, _t); \ - template \ - static inline type bitcast(T value) noexcept \ - { \ - return XSIMD_RVV_JOINT(__riscv_vreinterpret_b, i, )(value); \ - } \ - /*template <> static inline type bitcast(type value) noexcept { return value; }*/ \ +#define XSIMD_RVV_MAKE_BOOL_TYPE(i) \ + template <> \ + struct rvv_bool_info \ + { \ + using type = XSIMD_RVV_JOINT(vbool, i, _t); \ + template \ + static XSIMD_INLINE type bitcast(T value) noexcept \ + { \ + return XSIMD_RVV_JOINT(__riscv_vreinterpret_b, i, )(value); \ + } \ + /*template <> static XSIMD_INLINE type bitcast(type value) noexcept { return value; }*/ \ }; XSIMD_RVV_MAKE_BOOL_TYPE(1); XSIMD_RVV_MAKE_BOOL_TYPE(2); diff --git a/include/xsimd/types/xsimd_traits.hpp b/include/xsimd/types/xsimd_traits.hpp index f848aab1f..471e979a4 100644 --- a/include/xsimd/types/xsimd_traits.hpp +++ b/include/xsimd/types/xsimd_traits.hpp @@ -86,7 +86,7 @@ namespace xsimd // consistency checker template - inline void static_check_supported_config() + XSIMD_INLINE void static_check_supported_config() { (void)static_check_supported_config_emitter(); } diff --git a/include/xsimd/xsimd.hpp b/include/xsimd/xsimd.hpp index 8d76a5f91..b5548e7ac 100644 --- a/include/xsimd/xsimd.hpp +++ b/include/xsimd/xsimd.hpp @@ -51,6 +51,7 @@ #endif #include "config/xsimd_config.hpp" +#include "config/xsimd_inline.hpp" #include "arch/xsimd_scalar.hpp" #include "memory/xsimd_aligned_allocator.hpp"