From 6a5007c1c4fc0feeb457bd6a8826b4a7241e2f7e Mon Sep 17 00:00:00 2001 From: jatin Date: Sat, 16 Dec 2023 14:25:38 -0800 Subject: [PATCH] Using bit_cast, and making more things constexpr --- .github/workflows/run_tests.yml | 6 +++ include/math_approx/src/asinh_approx.hpp | 2 +- include/math_approx/src/basic_math.hpp | 2 + include/math_approx/src/log_approx.hpp | 30 ++++++------ .../math_approx/src/polylogarithm_approx.hpp | 8 ++-- include/math_approx/src/pow_approx.hpp | 18 +++---- include/math_approx/src/sigmoid_approx.hpp | 8 ++-- include/math_approx/src/sinh_cosh_approx.hpp | 6 +-- include/math_approx/src/tanh_approx.hpp | 10 ++-- include/math_approx/src/trig_approx.hpp | 48 +++++++++---------- .../math_approx/src/wright_omega_approx.hpp | 6 +-- 11 files changed, 76 insertions(+), 68 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 6ea5321..0d2597a 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -27,6 +27,12 @@ jobs: # sudo apt-get update # sudo apt install libasound2-dev libcurl4-openssl-dev libx11-dev libxinerama-dev libxext-dev libfreetype6-dev libwebkit2gtk-4.0-dev libglu1-mesa-dev libsamplerate-dev + - name: Install Xcode + if: runner.os == 'MacOS' + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '14.3.1' + - name: Get latest CMake uses: lukka/get-cmake@latest diff --git a/include/math_approx/src/asinh_approx.hpp b/include/math_approx/src/asinh_approx.hpp index bcc67a6..c2db5e3 100644 --- a/include/math_approx/src/asinh_approx.hpp +++ b/include/math_approx/src/asinh_approx.hpp @@ -52,7 +52,7 @@ struct AsinhLog2Provider * accuracy achieved by the STL implementation). */ template -T asinh (T x) +constexpr T asinh (T x) { using S = scalar_of_t; using std::abs; diff --git a/include/math_approx/src/basic_math.hpp b/include/math_approx/src/basic_math.hpp index 9a1f2a5..f38ca6c 100644 --- a/include/math_approx/src/basic_math.hpp +++ b/include/math_approx/src/basic_math.hpp @@ -24,6 +24,8 @@ using scalar_of_t = typename scalar_of::type; template T rsqrt (T x) { + // @TODO: figure out a way that we can make this method constexpr + // sqrtss followed by divss... this seems to measure a bit faster than the rsqrtss plus NR iteration below return (T) 1 / std::sqrt (x); diff --git a/include/math_approx/src/log_approx.hpp b/include/math_approx/src/log_approx.hpp index 0f19b3c..11be37b 100644 --- a/include/math_approx/src/log_approx.hpp +++ b/include/math_approx/src/log_approx.hpp @@ -103,29 +103,29 @@ namespace log_detail /** approximation for log(Base, x) (32-bit) */ template -float log (float x) +constexpr float log (float x) { - const auto vi = reinterpret_cast (x); + const auto vi = std::bit_cast (x); const auto ex = vi & 0x7f800000; const auto e = (ex >> 23) - 127; const auto vfi = (vi - ex) | 0x3f800000; - const auto vf = reinterpret_cast (vfi); + const auto vf = std::bit_cast (vfi); - static constexpr auto log2_base_r = 1.0f / Base::log2_base; + constexpr auto log2_base_r = 1.0f / Base::log2_base; return log2_base_r * ((float) e + Log2ProviderType::template log2_approx (vf)); } /** approximation for log(x) (64-bit) */ template -double log (double x) +constexpr double log (double x) { - const auto vi = reinterpret_cast (x); + const auto vi = std::bit_cast (x); const auto ex = vi & 0x7ff0000000000000; const auto e = (ex >> 52) - 1023; const auto vfi = (vi - ex) | 0x3ff0000000000000; - const auto vf = reinterpret_cast (vfi); + const auto vf = std::bit_cast (vfi); - static constexpr auto log2_base_r = 1.0 / Base::log2_base; + constexpr auto log2_base_r = 1.0 / Base::log2_base; return log2_base_r * ((double) e + Log2ProviderType::template log2_approx (vf)); } @@ -134,11 +134,11 @@ double log (double x) template xsimd::batch log (xsimd::batch x) { - const auto vi = reinterpret_cast&> (x); // NOSONAR + const auto vi = xsimd::bit_cast> (x); const auto ex = vi & 0x7f800000; const auto e = (ex >> 23) - 127; const auto vfi = (vi - ex) | 0x3f800000; - const auto vf = reinterpret_cast&> (vfi); // NOSONAR + const auto vf = xsimd::bit_cast> (vfi); static constexpr auto log2_base_r = 1.0f / Base::log2_base; return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx, order, C1_continuous> (vf)); @@ -148,11 +148,11 @@ xsimd::batch log (xsimd::batch x) template xsimd::batch log (xsimd::batch x) { - const auto vi = reinterpret_cast&> (x); // NOSONAR + const auto vi = xsimd::bit_cast> (x); const auto ex = vi & 0x7ff0000000000000; const auto e = (ex >> 52) - 1023; const auto vfi = (vi - ex) | 0x3ff0000000000000; - const auto vf = reinterpret_cast&> (vfi); // NOSONAR + const auto vf = xsimd::bit_cast> (vfi); static constexpr auto log2_base_r = 1.0 / Base::log2_base; return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx, order, C1_continuous> (vf)); @@ -164,19 +164,19 @@ xsimd::batch log (xsimd::batch x) #endif template -T log (T x) +constexpr T log (T x) { return log>, order, C1_continuous> (x); } template -T log2 (T x) +constexpr T log2 (T x) { return log>, order, C1_continuous> (x); } template -T log10 (T x) +constexpr T log10 (T x) { return log>, order, C1_continuous> (x); } diff --git a/include/math_approx/src/polylogarithm_approx.hpp b/include/math_approx/src/polylogarithm_approx.hpp index c3dbe82..c28b894 100644 --- a/include/math_approx/src/polylogarithm_approx.hpp +++ b/include/math_approx/src/polylogarithm_approx.hpp @@ -14,7 +14,7 @@ namespace math_approx * improve the accuracy very much. */ template -T li2_0_half (T x) +constexpr T li2_0_half (T x) { static_assert (order >= 1 && order <= 6); using S = scalar_of_t; @@ -104,13 +104,13 @@ T li2_0_half (T x) * improve the accuracy very much. */ template = 5), typename T> -T li2 (T x) +constexpr T li2 (T x) { const auto x_r = (T) 1 / x; const auto x_r1 = (T) 1 / (x - (T) 1); - static constexpr auto pisq_o_6 = (T) M_PI * (T) M_PI / (T) 6; - static constexpr auto pisq_o_3 = (T) M_PI * (T) M_PI / (T) 3; + constexpr auto pisq_o_6 = (T) M_PI * (T) M_PI / (T) 6; + constexpr auto pisq_o_3 = (T) M_PI * (T) M_PI / (T) 3; T y, r; bool sign = true; diff --git a/include/math_approx/src/pow_approx.hpp b/include/math_approx/src/pow_approx.hpp index 08ba50c..b66b60b 100644 --- a/include/math_approx/src/pow_approx.hpp +++ b/include/math_approx/src/pow_approx.hpp @@ -139,7 +139,7 @@ namespace pow_detail /** approximation for pow(Base, x) (32-bit) */ template -float pow (float x) +constexpr float pow (float x) { x = std::max (-126.0f, Base::log2_base * x); @@ -148,12 +148,12 @@ float pow (float x) const auto f = x - (float) l; const auto vi = (l + 127) << 23; - return reinterpret_cast (vi) * pow_detail::pow2_approx (f); + return std::bit_cast (vi) * pow_detail::pow2_approx (f); } /** approximation for pow(Base, x) (64-bit) */ template -double pow (double x) +constexpr double pow (double x) { x = std::max (-1022.0, Base::log2_base * x); @@ -162,7 +162,7 @@ double pow (double x) const auto d = x - (double) l; const auto vi = (l + 1023) << 52; - return reinterpret_cast (vi) * pow_detail::pow2_approx (d); + return std::bit_cast (vi) * pow_detail::pow2_approx (d); } #if defined(XSIMD_HPP) @@ -177,7 +177,7 @@ xsimd::batch pow (xsimd::batch x) const auto f = x - xsimd::to_float (l); const auto vi = (l + 127) << 23; - return reinterpret_cast&> (vi) * pow_detail::pow2_approx, order, C1_continuous> (f); + return xsimd::bit_cast> (vi) * pow_detail::pow2_approx, order, C1_continuous> (f); } /** approximation for pow(Base, x) (64-bit SIMD) */ @@ -191,7 +191,7 @@ xsimd::batch pow (xsimd::batch x) const auto d = x - xsimd::to_float (l); const auto vi = (l + 1023) << 52; - return reinterpret_cast&> (vi) * pow_detail::pow2_approx, order, C1_continuous> (d); + return xsimd::bit_cast> (vi) * pow_detail::pow2_approx, order, C1_continuous> (d); } #endif @@ -200,19 +200,19 @@ xsimd::batch pow (xsimd::batch x) #endif template -T exp (T x) +constexpr T exp (T x) { return pow>, order, C1_continuous> (x); } template -T exp2 (T x) +constexpr T exp2 (T x) { return pow>, order, C1_continuous> (x); } template -T exp10 (T x) +constexpr T exp10 (T x) { return pow>, order, C1_continuous> (x); } diff --git a/include/math_approx/src/sigmoid_approx.hpp b/include/math_approx/src/sigmoid_approx.hpp index 4e84e28..ac5252d 100644 --- a/include/math_approx/src/sigmoid_approx.hpp +++ b/include/math_approx/src/sigmoid_approx.hpp @@ -9,7 +9,7 @@ namespace sigmoid_detail // These polynomial fits were generated from: https://www.wolframcloud.com/obj/chowdsp/Published/sigmoid_approx.nb template - T sig_poly_9 (T x) + constexpr T sig_poly_9 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -21,7 +21,7 @@ namespace sigmoid_detail } template - T sig_poly_7 (T x) + constexpr T sig_poly_7 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -32,7 +32,7 @@ namespace sigmoid_detail } template - T sig_poly_5 (T x) + constexpr T sig_poly_5 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -42,7 +42,7 @@ namespace sigmoid_detail } template - T sig_poly_3 (T x) + constexpr T sig_poly_3 (T x) { using S = scalar_of_t; const auto x_sq = x * x; diff --git a/include/math_approx/src/sinh_cosh_approx.hpp b/include/math_approx/src/sinh_cosh_approx.hpp index 651da48..6dab976 100644 --- a/include/math_approx/src/sinh_cosh_approx.hpp +++ b/include/math_approx/src/sinh_cosh_approx.hpp @@ -11,7 +11,7 @@ namespace math_approx /** Approximation of sinh(x), using exp(x) internally */ template -T sinh (T x) +constexpr T sinh (T x) { using S = scalar_of_t; auto B = exp (x); @@ -22,7 +22,7 @@ T sinh (T x) /** Approximation of cosh(x), using exp(x) internally */ template -T cosh (T x) +constexpr T cosh (T x) { using S = scalar_of_t; auto B = exp (x); @@ -38,7 +38,7 @@ T cosh (T x) * For more information see the comments above. */ template -auto sinh_cosh (T x) +constexpr auto sinh_cosh (T x) { using S = scalar_of_t; auto B = exp (x); diff --git a/include/math_approx/src/tanh_approx.hpp b/include/math_approx/src/tanh_approx.hpp index e4c8283..092a00d 100644 --- a/include/math_approx/src/tanh_approx.hpp +++ b/include/math_approx/src/tanh_approx.hpp @@ -9,7 +9,7 @@ namespace tanh_detail // These polynomial fits were generated from: https://www.wolframcloud.com/obj/chowdsp/Published/tanh_approx.nb template - T tanh_poly_11 (T x) + constexpr T tanh_poly_11 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -22,7 +22,7 @@ namespace tanh_detail } template - T tanh_poly_9 (T x) + constexpr T tanh_poly_9 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -34,7 +34,7 @@ namespace tanh_detail } template - T tanh_poly_7 (T x) + constexpr T tanh_poly_7 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -45,7 +45,7 @@ namespace tanh_detail } template - T tanh_poly_5 (T x) + constexpr T tanh_poly_5 (T x) { using S = scalar_of_t; const auto x_sq = x * x; @@ -55,7 +55,7 @@ namespace tanh_detail } template - T tanh_poly_3 (T x) + constexpr T tanh_poly_3 (T x) { using S = scalar_of_t; const auto x_sq = x * x; diff --git a/include/math_approx/src/trig_approx.hpp b/include/math_approx/src/trig_approx.hpp index d263a83..d74db9e 100644 --- a/include/math_approx/src/trig_approx.hpp +++ b/include/math_approx/src/trig_approx.hpp @@ -7,7 +7,7 @@ namespace math_approx namespace trig_detail { template - T truncate (T x) + constexpr T truncate (T x) { return static_cast (static_cast (x)); } @@ -22,12 +22,12 @@ namespace trig_detail /** Fast method to wrap a value into the range [-pi, pi] */ template - T fast_mod_mpi_pi (T x) + constexpr T fast_mod_mpi_pi (T x) { using S = scalar_of_t; - static constexpr auto pi = static_cast (M_PI); - static constexpr auto two_pi = static_cast (2.0 * M_PI); - static constexpr auto recip_two_pi = static_cast (1) / two_pi; + constexpr auto pi = static_cast (M_PI); + constexpr auto two_pi = static_cast (2.0 * M_PI); + constexpr auto recip_two_pi = static_cast (1) / two_pi; x += pi; const auto mod = x - two_pi * truncate (x * recip_two_pi); @@ -36,12 +36,12 @@ namespace trig_detail /** Fast method to wrap a value into the range [-pi/2, pi/2] */ template - T fast_mod_mhalfpi_halfpi (T x) + constexpr T fast_mod_mhalfpi_halfpi (T x) { using S = scalar_of_t; - static constexpr auto half_pi = static_cast (M_PI) * (S) 0.5; - static constexpr auto pi = static_cast (M_PI); - static constexpr auto recip_pi = (S) 1 / pi; + constexpr auto half_pi = static_cast (M_PI) * (S) 0.5; + constexpr auto pi = static_cast (M_PI); + constexpr auto recip_pi = (S) 1 / pi; x += half_pi; const auto mod = x - pi * truncate (x * recip_pi); @@ -49,7 +49,7 @@ namespace trig_detail } template - T sin_poly_9 (T x, T x_sq) + constexpr T sin_poly_9 (T x, T x_sq) { using S = scalar_of_t; const auto x_7_9 = (S) -2.49397084313e-6 + (S) 2.00382818811e-8 * x_sq; @@ -60,7 +60,7 @@ namespace trig_detail } template - T sin_poly_7 (T x, T x_sq) + constexpr T sin_poly_7 (T x, T x_sq) { using S = scalar_of_t; const auto x_5_7 = (S) 0.000170965340046 + (S) -2.09843101304e-6 * x_sq; @@ -70,7 +70,7 @@ namespace trig_detail } template - T sin_poly_5 (T x, T x_sq) + constexpr T sin_poly_5 (T x, T x_sq) { using S = scalar_of_t; const auto x_3_5 = (S) -0.00650096169550 + (S) 0.000139899314103 * x_sq; @@ -80,13 +80,13 @@ namespace trig_detail } // namespace sin_detail template -T sin_mpi_pi (T x) +constexpr T sin_mpi_pi (T x) { static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]"); using S = scalar_of_t; - static constexpr auto pi = static_cast (M_PI); - static constexpr auto pi_sq = pi * pi; + constexpr auto pi = static_cast (M_PI); + constexpr auto pi_sq = pi * pi; const auto x_sq = x * x; T x_poly {}; @@ -101,20 +101,20 @@ T sin_mpi_pi (T x) } template -T sin (T x) +constexpr T sin (T x) { return sin_mpi_pi (trig_detail::fast_mod_mpi_pi (x)); } template -T cos_mpi_pi (T x) +constexpr T cos_mpi_pi (T x) { static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]"); using S = scalar_of_t; - static constexpr auto pi = static_cast (M_PI); - static constexpr auto pi_sq = pi * pi; - static constexpr auto pi_o_2 = pi * (S) 0.5; + constexpr auto pi = static_cast (M_PI); + constexpr auto pi_sq = pi * pi; + constexpr auto pi_o_2 = pi * (S) 0.5; using std::abs; #if defined(XSIMD_HPP) @@ -137,14 +137,14 @@ T cos_mpi_pi (T x) } template -T cos (T x) +constexpr T cos (T x) { return cos_mpi_pi (trig_detail::fast_mod_mpi_pi (x)); } /** Approximation of tan(x) on the range [-pi/4, pi/4] */ template -T tan_mquarterpi_quarterpi (T x) +constexpr T tan_mquarterpi_quarterpi (T x) { static_assert (order % 2 == 1 && order >= 3 && order <= 15, "Order must be an odd number within [3, 15]"); @@ -222,7 +222,7 @@ T tan_mquarterpi_quarterpi (T x) * Accuracy may suffer as x approaches ±pi/2. */ template -T tan_mhalfpi_halfpi (T x) +constexpr T tan_mhalfpi_halfpi (T x) { using S = scalar_of_t; const auto h_x = tan_mquarterpi_quarterpi ((S) 0.5 * x); @@ -235,7 +235,7 @@ T tan_mhalfpi_halfpi (T x) * Accuracy may suffer as x approaches values for which tan(x) approaches ±Inf. */ template -T tan (T x) +constexpr T tan (T x) { return tan_mhalfpi_halfpi (trig_detail::fast_mod_mhalfpi_halfpi (x)); } diff --git a/include/math_approx/src/wright_omega_approx.hpp b/include/math_approx/src/wright_omega_approx.hpp index 287e17e..b14c4f0 100644 --- a/include/math_approx/src/wright_omega_approx.hpp +++ b/include/math_approx/src/wright_omega_approx.hpp @@ -5,12 +5,12 @@ namespace math_approx { template -T wright_omega (T x) +constexpr T wright_omega (T x) { static_assert (poly_order == 3 || poly_order == 5); using S = scalar_of_t; - static constexpr auto E = (S) 2.7182818284590452354; + constexpr auto E = (S) 2.7182818284590452354; const auto x1 = [] (T _x) { @@ -56,7 +56,7 @@ T wright_omega (T x) * For more accuracy, use the other implementation with at least 1 NR iteration. */ template -T wright_omega_dangelo (T x) +constexpr T wright_omega_dangelo (T x) { using S = scalar_of_t;