diff --git a/include/math_approx/math_approx.hpp b/include/math_approx/math_approx.hpp index aa1b396..a1d49b0 100644 --- a/include/math_approx/math_approx.hpp +++ b/include/math_approx/math_approx.hpp @@ -11,6 +11,7 @@ namespace math_approx #include "src/log_approx.hpp" #include "src/tanh_approx.hpp" #include "src/sinh_cosh_approx.hpp" +#include "src/asinh_approx.hpp" #include "src/sigmoid_approx.hpp" #include "src/wright_omega_approx.hpp" #include "src/polylogarithm_approx.hpp" diff --git a/include/math_approx/src/asinh_approx.hpp b/include/math_approx/src/asinh_approx.hpp new file mode 100644 index 0000000..bcc67a6 --- /dev/null +++ b/include/math_approx/src/asinh_approx.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include "log_approx.hpp" + +namespace math_approx +{ +struct AsinhLog2Provider +{ + /** approximation for log2(x), optimized on the range [1, 2], to be used within an asinh(x) computation */ + template + static constexpr T log2_approx (T x) + { + static_assert (order >= 3 && order <= 5); + using S = scalar_of_t; + + const auto x_sq = x * x; + if constexpr (order == 3) + { + const auto x_2_3 = (S) -1.21535595794871 + (S) 0.194363894384581 * x; + const auto x_0_1 = (S) -2.26452854958994 + (S) 3.28552061315407 * x; + return x_0_1 + x_2_3 * x_sq; + } + else if constexpr (order == 4) + { + const auto x_3_4 = (S) 0.770443387059628 + (S) -0.102652345633016 * x; + const auto x_1_2 = (S) 4.33013912645867 + (S) -2.39448588379361 * x; + const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; + return (S) -2.60344428409168 + x_1_2_3_4 * x; + } + else if constexpr (order == 5) + { + const auto x_4_5 = (S) -0.511946284688366 + (S) 0.0578217518982235 * x; + const auto x_2_3 = (S) -3.94632584968643 + (S) 1.90796087279737 * x; + const auto x_0_1 = (S) -2.87748189127908 + (S) 5.36997140095829 * x; + const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; + return x_0_1 + x_2_3_4_5 * x_sq; + } + else + { + return {}; + } + } +}; + +/** + * Approximation of asinh(x) in the full range, using identity + * asinh(x) = log(x + sqrt(x^2 + 1)). + * + * Orders 6 and 7 use an additional Newton-Raphson iteration, + * but for most cases the accuracy improvement is not worth + * the additional cost (when compared to the performance and + * accuracy achieved by the STL implementation). + */ +template +T asinh (T x) +{ + using S = scalar_of_t; + using std::abs; + using std::sqrt; +#if defined(XSIMD_HPP) + using xsimd::abs; + using xsimd::sqrt; +#endif + + const auto sign = select (x > (S) 0, (T) (S) 1, select (x < (S) 0, (T) (S) -1, (T) (S) 0)); + x = abs (x); + + const auto log_arg = x + sqrt (x * x + (S) 1); + auto y = log>, std::min (order, 5), false, AsinhLog2Provider> (log_arg); + + if constexpr (order > 5) + { + const auto exp_y = math_approx::exp (y); + y -= (exp_y - log_arg) / exp_y; + } + + return sign * y; +} +} // namespace math_approx diff --git a/include/math_approx/src/log_approx.hpp b/include/math_approx/src/log_approx.hpp index 8571638..0f19b3c 100644 --- a/include/math_approx/src/log_approx.hpp +++ b/include/math_approx/src/log_approx.hpp @@ -7,89 +7,92 @@ namespace math_approx { namespace log_detail { - /** approximation for log2(x), optimized on the range [1, 2] */ - template - constexpr T log2_approx (T x) + struct Log2Provider { - static_assert (order >= 3 && order <= 6); - using S = scalar_of_t; - - const auto x_sq = x * x; - if constexpr (C1_continuous) + /** approximation for log2(x), optimized on the range [1, 2] */ + template + static constexpr T log2_approx (T x) { - if constexpr (order == 3) - { - const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x; - const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x; - return x_0_1 + x_2_3 * x_sq; - } - else if constexpr (order == 4) - { - const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x; - const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x; - const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; - return (S) -2.55185920824 + x_1_2_3_4 * x; - } - else if constexpr (order == 5) - { - const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x; - const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x; - const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x; - const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; - return x_0_1 + x_2_3_4_5 * x_sq; - } - else if constexpr (order == 6) - { - const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x; - const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x; - const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x; - const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; - const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; - return (S) -3.06081857306 + x_1_2_3_4_5_6 * x; - } - else - { - return {}; - } - } - else - { - if constexpr (order == 3) - { - const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x; - const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x; - return x_0_1 + x_2_3 * x_sq; - } - else if constexpr (order == 4) - { - const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x; - const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x; - const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; - return (S) -2.51982743265 + x_1_2_3_4 * x; - } - else if constexpr (order == 5) - { - const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x; - const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x; - const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x; - const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; - return x_0_1 + x_2_3_4_5 * x_sq; - } - else if constexpr (order == 6) + static_assert (order >= 3 && order <= 6); + using S = scalar_of_t; + + const auto x_sq = x * x; + if constexpr (C1_continuous) { - const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x; - const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x; - const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x; - const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; - const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; - return (S) -3.04376925958 + x_1_2_3_4_5_6 * x; + if constexpr (order == 3) + { + const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x; + const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x; + return x_0_1 + x_2_3 * x_sq; + } + else if constexpr (order == 4) + { + const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x; + const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x; + const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; + return (S) -2.55185920824 + x_1_2_3_4 * x; + } + else if constexpr (order == 5) + { + const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x; + const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x; + const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x; + const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; + return x_0_1 + x_2_3_4_5 * x_sq; + } + else if constexpr (order == 6) + { + const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x; + const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x; + const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x; + const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; + const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; + return (S) -3.06081857306 + x_1_2_3_4_5_6 * x; + } + else + { + return {}; + } } else { - return {}; + if constexpr (order == 3) + { + const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x; + const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x; + return x_0_1 + x_2_3 * x_sq; + } + else if constexpr (order == 4) + { + const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x; + const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x; + const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; + return (S) -2.51982743265 + x_1_2_3_4 * x; + } + else if constexpr (order == 5) + { + const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x; + const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x; + const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x; + const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; + return x_0_1 + x_2_3_4_5 * x_sq; + } + else if constexpr (order == 6) + { + const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x; + const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x; + const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x; + const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; + const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; + return (S) -3.04376925958 + x_1_2_3_4_5_6 * x; + } + else + { + return {}; + } } } - } + }; } #if defined(__GNUC__) @@ -99,7 +102,7 @@ namespace log_detail #endif /** approximation for log(Base, x) (32-bit) */ -template +template float log (float x) { const auto vi = reinterpret_cast (x); @@ -109,11 +112,11 @@ float log (float x) const auto vf = reinterpret_cast (vfi); static constexpr auto log2_base_r = 1.0f / Base::log2_base; - return log2_base_r * ((float) e + log_detail::log2_approx (vf)); + return log2_base_r * ((float) e + Log2ProviderType::template log2_approx (vf)); } /** approximation for log(x) (64-bit) */ -template +template double log (double x) { const auto vi = reinterpret_cast (x); @@ -123,12 +126,12 @@ double log (double x) const auto vf = reinterpret_cast (vfi); static constexpr auto log2_base_r = 1.0 / Base::log2_base; - return log2_base_r * ((double) e + log_detail::log2_approx (vf)); + return log2_base_r * ((double) e + Log2ProviderType::template log2_approx (vf)); } #if defined(XSIMD_HPP) /** approximation for pow(Base, x) (32-bit SIMD) */ -template +template xsimd::batch log (xsimd::batch x) { const auto vi = reinterpret_cast&> (x); // NOSONAR @@ -138,11 +141,11 @@ xsimd::batch log (xsimd::batch x) const auto vf = reinterpret_cast&> (vfi); // NOSONAR static constexpr auto log2_base_r = 1.0f / Base::log2_base; - return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx, order, C1_continuous> (vf)); + return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx, order, C1_continuous> (vf)); } /** approximation for pow(Base, x) (64-bit SIMD) */ -template +template xsimd::batch log (xsimd::batch x) { const auto vi = reinterpret_cast&> (x); // NOSONAR @@ -152,7 +155,7 @@ xsimd::batch log (xsimd::batch x) const auto vf = reinterpret_cast&> (vfi); // NOSONAR static constexpr auto log2_base_r = 1.0 / Base::log2_base; - return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx, order, C1_continuous> (vf)); + return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx, order, C1_continuous> (vf)); } #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 04d059f..a1221c7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -36,3 +36,4 @@ setup_catch_test(log_approx_test) setup_catch_test(wright_omega_approx_test) setup_catch_test(polylog_approx_test) setup_catch_test(sinh_cosh_approx_test) +setup_catch_test(asinh_approx_test) diff --git a/test/src/asinh_approx_test.cpp b/test/src/asinh_approx_test.cpp new file mode 100644 index 0000000..53320c4 --- /dev/null +++ b/test/src/asinh_approx_test.cpp @@ -0,0 +1,54 @@ +#include "test_helpers.hpp" +#include "catch2/catch_template_test_macros.hpp" + +#include +#include + +#include + +template +void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound) +{ + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto max_error = test_helpers::abs_max (error); + + std::cout << max_error << std::endl; + REQUIRE (std::abs (max_error) < err_bound); +} + +TEMPLATE_TEST_CASE ("Asinh Approx Test", "", float, double) +{ +#if ! defined(WIN32) + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-2f); +#else + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); +#endif + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::asinh (x); }); + + SECTION ("6th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::asinh<6> (x); }, + 5.0e-7f); + } + SECTION ("5th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::asinh<5> (x); }, + 6.0e-5f); + } + SECTION ("4th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::asinh<4> (x); }, + 3.5e-4f); + } + SECTION ("3th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::asinh<3> (x); }, + 2.5e-3f); + } +} diff --git a/tools/bench/CMakeLists.txt b/tools/bench/CMakeLists.txt index 51df02a..dab6795 100644 --- a/tools/bench/CMakeLists.txt +++ b/tools/bench/CMakeLists.txt @@ -28,3 +28,6 @@ target_link_libraries(polylog_approx_bench PRIVATE benchmark::benchmark math_app add_executable(sinh_cosh_approx_bench sinh_cosh_bench.cpp) target_link_libraries(sinh_cosh_approx_bench PRIVATE benchmark::benchmark math_approx) + +add_executable(asinh_approx_bench asinh_bench.cpp) +target_link_libraries(asinh_approx_bench PRIVATE benchmark::benchmark math_approx) diff --git a/tools/bench/asinh_bench.cpp b/tools/bench/asinh_bench.cpp new file mode 100644 index 0000000..31c7685 --- /dev/null +++ b/tools/bench/asinh_bench.cpp @@ -0,0 +1,55 @@ +#include +#include + +static constexpr size_t N = 2000; +const auto data = [] +{ + std::vector x; + x.resize (N, 0.0f); + for (size_t i = 0; i < N; ++i) + x[i] = -10.0f + 20.0f * (float) i / (float) N; + return x; +}(); + +#define ASINH_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (x); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +ASINH_BENCH (asinh_std, std::asinh) +ASINH_BENCH (asinh_approx7, math_approx::asinh<7>) +ASINH_BENCH (asinh_approx6, math_approx::asinh<6>) +ASINH_BENCH (asinh_approx5, math_approx::asinh<5>) +ASINH_BENCH (asinh_approx4, math_approx::asinh<4>) +ASINH_BENCH (asinh_approx3, math_approx::asinh<3>) + +#define ASINH_SIMD_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (xsimd::broadcast (x)); \ +static_assert (std::is_same_v, decltype(y)>); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +ASINH_SIMD_BENCH (asinh_xsimd, xsimd::asinh) +ASINH_SIMD_BENCH (asinh_simd_approx7, math_approx::asinh<7>) +ASINH_SIMD_BENCH (asinh_simd_approx6, math_approx::asinh<6>) +ASINH_SIMD_BENCH (asinh_simd_approx5, math_approx::asinh<5>) +ASINH_SIMD_BENCH (asinh_simd_approx4, math_approx::asinh<4>) +ASINH_SIMD_BENCH (asinh_simd_approx3, math_approx::asinh<3>) + +BENCHMARK_MAIN(); diff --git a/tools/plotter/plotter.cpp b/tools/plotter/plotter.cpp index 53cad1c..b1acf88 100644 --- a/tools/plotter/plotter.cpp +++ b/tools/plotter/plotter.cpp @@ -6,9 +6,9 @@ #include namespace plt = matplotlibcpp; -#include "../../test/src/test_helpers.hpp" -#include "../../test/src/reference/toms917.hpp" #include "../../test/src/reference/polylogarithm.hpp" +#include "../../test/src/reference/toms917.hpp" +#include "../../test/src/test_helpers.hpp" #include template @@ -61,13 +61,14 @@ void plot_function (std::span all_floats, int main() { plt::figure(); - const auto range = std::make_pair (-3.0f, 3.0f); + const auto range = std::make_pair (-1.0f, 1.0f); static constexpr auto tol = 1.0e-2f; const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol); - const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC(std::cosh)); - plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::cosh<5>)), "cosh-5"); - plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::cosh<6>)), "cosh-6"); + const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC (std::asinh)); + // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::asinh<5>)), "asinh-5"); + plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::asinh<6>) ), "asinh-6"); + plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::asinh<7>) ), "asinh-7"); plt::legend ({ { "loc", "upper right" } }); plt::xlim (range.first, range.second);