From 17bfd031d804d259cbf212eebed569f8fcfd574b Mon Sep 17 00:00:00 2001 From: jatin Date: Tue, 5 Dec 2023 00:47:38 -0800 Subject: [PATCH] Add implementations of sinh, cosh, and tan --- README.md | 4 +- include/math_approx/math_approx.hpp | 5 +- include/math_approx/src/pow_approx.hpp | 24 ++- include/math_approx/src/sinh_cosh_approx.hpp | 53 +++++++ include/math_approx/src/trig_approx.hpp | 134 +++++++++++++++-- test/CMakeLists.txt | 1 + test/src/sinh_cosh_approx_test.cpp | 147 +++++++++++++++++++ test/src/trig_approx_test.cpp | 79 ++++++++++ tools/bench/CMakeLists.txt | 3 + tools/bench/sinh_cosh_bench.cpp | 65 ++++++++ tools/bench/trig_bench.cpp | 16 ++ tools/plotter/plotter.cpp | 7 +- 12 files changed, 519 insertions(+), 19 deletions(-) create mode 100644 include/math_approx/src/sinh_cosh_approx.hpp create mode 100644 test/src/sinh_cosh_approx_test.cpp create mode 100644 tools/bench/sinh_cosh_bench.cpp diff --git a/README.md b/README.md index 8498fc4..5a1a0d8 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ A C++ library for math approximations. Currently supported: -- sin/cos +- sin/cos/tan - exp/exp2/exp10 - log/log2/log10 -- tanh +- sinh/cosh/tanh - sigmoid - Wright-Omega function - Dilogarithm function diff --git a/include/math_approx/math_approx.hpp b/include/math_approx/math_approx.hpp index bcedaf2..aa1b396 100644 --- a/include/math_approx/math_approx.hpp +++ b/include/math_approx/math_approx.hpp @@ -6,10 +6,11 @@ namespace math_approx #include "src/basic_math.hpp" -#include "src/tanh_approx.hpp" -#include "src/sigmoid_approx.hpp" #include "src/trig_approx.hpp" #include "src/pow_approx.hpp" #include "src/log_approx.hpp" +#include "src/tanh_approx.hpp" +#include "src/sinh_cosh_approx.hpp" +#include "src/sigmoid_approx.hpp" #include "src/wright_omega_approx.hpp" #include "src/polylogarithm_approx.hpp" diff --git a/include/math_approx/src/pow_approx.hpp b/include/math_approx/src/pow_approx.hpp index c652be4..08ba50c 100644 --- a/include/math_approx/src/pow_approx.hpp +++ b/include/math_approx/src/pow_approx.hpp @@ -10,7 +10,7 @@ namespace pow_detail template constexpr T pow2_approx (T x) { - static_assert (order >= 3 && order <= 6); + static_assert (order >= 3 && order <= 7); using S = scalar_of_t; const auto x_sq = x * x; @@ -46,6 +46,17 @@ namespace pow_detail const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; return (S) 1 + x_1_2_3_4_5_6 * x; } + else if constexpr (order == 7) + { + // doesn't seem to help at single-precision + const auto x_6_7 = (S) 0.000133154170702612 + (S) 0.0000245778949916153 * x; + const auto x_4_5 = (S) 0.00960612128901630 + (S) 0.00135551454943593 * x; + const auto x_2_3 = (S) 0.240226202240181 + (S) 0.0555072492957270 * x; + const auto x_0_1 = (S) 1 + (S) 0.693147180559945 * x; + const auto x_4_5_6_7 = x_4_5 + x_6_7 * x_sq; + const auto x_0_1_2_3 = x_0_1 + x_2_3 * x_sq; + return x_0_1_2_3 + x_4_5_6_7 * x_sq * x_sq; + } else { return {}; @@ -83,6 +94,17 @@ namespace pow_detail const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; return (S) 1 + x_1_2_3_4_5_6 * x; } + else if constexpr (order == 7) + { + // doesn't seem to help at single-precision + const auto x_6_7 = (S) 0.000136898688977877 + (S) 0.0000234440812713967 * x; + const auto x_4_5 = (S) 0.00960825566419915 + (S) 0.00135107295099880 * x; + const auto x_2_3 = (S) 0.240226092549669 + (S) 0.0555070350342468 * x; + const auto x_0_1 = (S) 1 + (S) 0.693147201030637 * x; + const auto x_4_5_6_7 = x_4_5 + x_6_7 * x_sq; + const auto x_0_1_2_3 = x_0_1 + x_2_3 * x_sq; + return x_0_1_2_3 + x_4_5_6_7 * x_sq * x_sq; + } else { return {}; diff --git a/include/math_approx/src/sinh_cosh_approx.hpp b/include/math_approx/src/sinh_cosh_approx.hpp new file mode 100644 index 0000000..651da48 --- /dev/null +++ b/include/math_approx/src/sinh_cosh_approx.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include "pow_approx.hpp" + +namespace math_approx +{ +// ref: https://en.wikipedia.org/wiki/Hyperbolic_functions#Definitions +// sinh = (e^(2x) - 1) / (2e^x), cosh = (e^(2x) + 1) / (2e^x) +// let B = e^x, then sinh = (B^2 - 1) / (2B), cosh = (B^2 + 1) / (2B) +// simplifying, we get: sinh = 0.5 (B - 1/B), cosh = 0.5 (B + 1/B) + +/** Approximation of sinh(x), using exp(x) internally */ +template +T sinh (T x) +{ + using S = scalar_of_t; + auto B = exp (x); + auto Br = (S) 0.5 / B; + B *= (S) 0.5; + return B - Br; +} + +/** Approximation of cosh(x), using exp(x) internally */ +template +T cosh (T x) +{ + using S = scalar_of_t; + auto B = exp (x); + auto Br = (S) 0.5 / B; + B *= (S) 0.5; + return B + Br; +} + +/** + * Simultaneous pproximation of sinh(x) and cosh(x), + * using exp(x) internally. + * + * For more information see the comments above. + */ +template +auto sinh_cosh (T x) +{ + using S = scalar_of_t; + auto B = exp (x); + auto Br = (S) 0.5 / B; + B *= (S) 0.5; + + auto sinh = B - Br; + auto cosh = B + Br; + + return std::make_pair (sinh, cosh); +} +} // namespace math_approx diff --git a/include/math_approx/src/trig_approx.hpp b/include/math_approx/src/trig_approx.hpp index 48809aa..d263a83 100644 --- a/include/math_approx/src/trig_approx.hpp +++ b/include/math_approx/src/trig_approx.hpp @@ -4,7 +4,7 @@ namespace math_approx { -namespace sin_detail +namespace trig_detail { template T truncate (T x) @@ -34,6 +34,20 @@ namespace sin_detail return select (x >= (T) 0, mod, mod + two_pi) - pi; } + /** Fast method to wrap a value into the range [-pi/2, pi/2] */ + template + T fast_mod_mhalfpi_halfpi (T x) + { + using S = scalar_of_t; + static constexpr auto half_pi = static_cast (M_PI) * (S) 0.5; + static constexpr auto pi = static_cast (M_PI); + static constexpr auto recip_pi = (S) 1 / pi; + + x += half_pi; + const auto mod = x - pi * truncate (x * recip_pi); + return select (x >= (T) 0, mod, mod + pi) - half_pi; + } + template T sin_poly_9 (T x, T x_sq) { @@ -68,7 +82,7 @@ namespace sin_detail template T sin_mpi_pi (T x) { - static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must e an odd number within [5, 9]"); + static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]"); using S = scalar_of_t; static constexpr auto pi = static_cast (M_PI); @@ -77,11 +91,11 @@ T sin_mpi_pi (T x) T x_poly {}; if constexpr (order == 9) - x_poly = sin_detail::sin_poly_9 (x, x_sq); + x_poly = trig_detail::sin_poly_9 (x, x_sq); else if constexpr (order == 7) - x_poly = sin_detail::sin_poly_7 (x, x_sq); + x_poly = trig_detail::sin_poly_7 (x, x_sq); else if constexpr (order == 5) - x_poly = sin_detail::sin_poly_5 (x, x_sq); + x_poly = trig_detail::sin_poly_5 (x, x_sq); return (pi_sq - x_sq) * x_poly; } @@ -89,13 +103,13 @@ T sin_mpi_pi (T x) template T sin (T x) { - return sin_mpi_pi (sin_detail::fast_mod_mpi_pi (x)); + return sin_mpi_pi (trig_detail::fast_mod_mpi_pi (x)); } template T cos_mpi_pi (T x) { - static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must e an odd number within [5, 9]"); + static_assert (order % 2 == 1 && order <= 9 && order >= 5, "Order must be an odd number within [5, 9]"); using S = scalar_of_t; static constexpr auto pi = static_cast (M_PI); @@ -113,11 +127,11 @@ T cos_mpi_pi (T x) T x_poly {}; if constexpr (order == 9) - x_poly = sin_detail::sin_poly_9 (hpmx, hpmx_sq); + x_poly = trig_detail::sin_poly_9 (hpmx, hpmx_sq); else if constexpr (order == 7) - x_poly = sin_detail::sin_poly_7 (hpmx, hpmx_sq); + x_poly = trig_detail::sin_poly_7 (hpmx, hpmx_sq); else if constexpr (order == 5) - x_poly = sin_detail::sin_poly_5 (hpmx, hpmx_sq); + x_poly = trig_detail::sin_poly_5 (hpmx, hpmx_sq); return (pi_sq - hpmx_sq) * x_poly; } @@ -125,6 +139,104 @@ T cos_mpi_pi (T x) template T cos (T x) { - return cos_mpi_pi (sin_detail::fast_mod_mpi_pi (x)); + return cos_mpi_pi (trig_detail::fast_mod_mpi_pi (x)); +} + +/** Approximation of tan(x) on the range [-pi/4, pi/4] */ +template +T tan_mquarterpi_quarterpi (T x) +{ + static_assert (order % 2 == 1 && order >= 3 && order <= 15, "Order must be an odd number within [3, 15]"); + + using S = scalar_of_t; + const auto x_sq = x * x; + if constexpr (order == 3) + { + const auto x_1_3 = (S) 1 + (S) 0.442959265447 * x_sq; + return x * x_1_3; + } + else if constexpr (order == 5) + { + const auto x_3_5 = (S) 0.317574684334 + (S) 0.203265826702 * x_sq; + const auto x_1_3_5 = (S) 1 + x_3_5 * x_sq; + return x * x_1_3_5; + } + else if constexpr (order == 7) + { + const auto x_5_7 = (S) 0.116406244996 + (S) 0.0944480566104 * x_sq; + const auto x_1_3 = (S) 1 + (S) 0.335216153138 * x_sq; + const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_sq * x_sq; + return x * x_1_3_5_7; + } + else if constexpr (order == 9) + { + const auto x_7_9 = (S) 0.0405232529373 + (S) 0.0439292071029 * x_sq; + const auto x_3_5 = (S) 0.333131667276 + (S) 0.136333765649 * x_sq; + const auto x_3_5_7_9 = x_3_5 + x_7_9 * x_sq * x_sq; + return x * ((S) 1 + x_3_5_7_9 * x_sq); + } + else if constexpr (order == 11) + { + const auto x_q = x_sq * x_sq; + const auto x_9_11 = (S) 0.0126603694551 + (S) 0.0203633469693 * x_sq; + const auto x_5_7 = (S) 0.132897195017 + (S) 0.0570525279731 * x_sq; + const auto x_1_3 = (S) 1 + (S) 0.333353019629 * x_sq; + const auto x_5_7_9_11 = x_5_7 + x_9_11 * x_q; + const auto x_1_3_5_7_9_11 = x_1_3 + x_5_7_9_11 * x_q; + return x * x_1_3_5_7_9_11; + } + else if constexpr (order == 13) + { + const auto x_q = x_sq * x_sq; + const auto x_6 = x_q * x_sq; + const auto x_11_13 = (S) 0.00343732283737 + (S) 0.00921082294855 * x_sq; + const auto x_7_9 = (S) 0.0534743904687 + (S) 0.0242183751709 * x_sq; + const auto x_3_5 = (S) 0.333331890901 + (S) 0.133379954680 * x_sq; + const auto x_7_9_11_13 = x_7_9 + x_11_13 * x_q; + const auto x_1_3_5 = (S) 1 + x_3_5 * x_sq; + return x * (x_1_3_5 + x_7_9_11_13 * x_6); + } + else if constexpr (order == 15) + { + // doesn't seem to help much at single-precision, but here it is: + const auto x_q = x_sq * x_sq; + const auto x_8 = x_q * x_q; + const auto x_13_15 = (S) 0.000292958045126 + (S) 0.00427933470414 * x_sq; + const auto x_9_11 = (S) 0.0213477960960 + (S) 0.0106702896251 * x_sq; + const auto x_5_7 = (S) 0.133327796402 + (S) 0.0540469276103* x_sq; + const auto x_1_3 = (S) 1 + (S) 0.333333463757 * x_sq; + const auto x_9_11_13_15 = x_9_11 + x_13_15 * x_q; + const auto x_1_3_5_7 = x_1_3 + x_5_7 * x_q; + const auto x_1_3_5_7_9_11_13_15 = x_1_3_5_7 + x_9_11_13_15 * x_8; + return x * x_1_3_5_7_9_11_13_15; + } + else + { + return {}; + } +} + +/** + * Approximation of tan(x) on the range [-pi/2, pi/2] + * + * Accuracy may suffer as x approaches ±pi/2. + */ +template +T tan_mhalfpi_halfpi (T x) +{ + using S = scalar_of_t; + const auto h_x = tan_mquarterpi_quarterpi ((S) 0.5 * x); + return (S) 2 * h_x / ((S) 1 - h_x * h_x); +} + +/** + * Full-range approximation of tan(x) + * + * Accuracy may suffer as x approaches values for which tan(x) approaches ±Inf. + */ +template +T tan (T x) +{ + return tan_mhalfpi_halfpi (trig_detail::fast_mod_mhalfpi_halfpi (x)); } } // namespace math_approx diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fff15d2..04d059f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -35,3 +35,4 @@ setup_catch_test(pow_approx_test) setup_catch_test(log_approx_test) setup_catch_test(wright_omega_approx_test) setup_catch_test(polylog_approx_test) +setup_catch_test(sinh_cosh_approx_test) diff --git a/test/src/sinh_cosh_approx_test.cpp b/test/src/sinh_cosh_approx_test.cpp new file mode 100644 index 0000000..4e40043 --- /dev/null +++ b/test/src/sinh_cosh_approx_test.cpp @@ -0,0 +1,147 @@ +#include "test_helpers.hpp" +#include +#include + +#include + +TEST_CASE ("Sinh Approx Test") +{ +#if ! defined(WIN32) + const auto all_floats = test_helpers::all_32_bit_floats (-3.5f, 3.5f, 1.0e-3f); +#else + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); +#endif + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::sinh (x); }); + + const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) + { + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); + const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); + + const auto max_error = test_helpers::abs_max (error); + const auto max_rel_error = test_helpers::abs_max (rel_error); + const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); + + std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; + REQUIRE (std::abs (max_error) < err_bound); + REQUIRE (std::abs (max_rel_error) < rel_err_bound); + if (ulp_bound > 0) + REQUIRE (max_ulp_error < ulp_bound); + }; + + SECTION ("6th-Order simul.") + { + test_approx ([] (auto x) + { return math_approx::sinh_cosh<6> (x).first; }, + 6.0e-6f, + 0.00011f, + 1050); + } + SECTION ("6th-Order") + { + test_approx ([] (auto x) + { return math_approx::sinh<6> (x); }, + 6.0e-6f, + 0.00011f, + 1050); + } + SECTION ("5th-Order") + { + test_approx ([] (auto x) + { return math_approx::sinh<5> (x); }, + 8.0e-6f, + 0.00015f, + 0); + } + SECTION ("4th-Order") + { + test_approx ([] (auto x) + { return math_approx::sinh<4> (x); }, + 6.0e-5f, + 0.00025f, + 0); + } + SECTION ("3rd-Order") + { + test_approx ([] (auto x) + { return math_approx::sinh<3> (x); }, + 0.002f, + 0.0035f, + 0); + } +} + +TEST_CASE ("Cosh Approx Test") +{ +#if ! defined(WIN32) + const auto all_floats = test_helpers::all_32_bit_floats (-5.0f, 5.0f, 1.0e-3f); +#else + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); +#endif + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::cosh (x); }); + + const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) + { + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); + const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); + + const auto max_error = test_helpers::abs_max (error); + const auto max_rel_error = test_helpers::abs_max (rel_error); + const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); + + std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; + REQUIRE (std::abs (max_error) < err_bound); + REQUIRE (std::abs (max_rel_error) < rel_err_bound); + if (ulp_bound > 0) + REQUIRE (max_ulp_error < ulp_bound); + }; + + SECTION ("6th-Order simul.") + { + test_approx ([] (auto x) + { return math_approx::sinh_cosh<6> (x).second; }, + 2.5e-5f, + 4.5e-7f, + 8); + } + SECTION ("6th-Order") + { + test_approx ([] (auto x) + { return math_approx::cosh<6> (x); }, + 2.5e-5f, + 4.5e-7f, + 8); + } + SECTION ("5th-Order") + { + test_approx ([] (auto x) + { return math_approx::cosh<5> (x); }, + 3.5e-5f, + 5.5e-7f, + 10); + } + SECTION ("4th-Order") + { + test_approx ([] (auto x) + { return math_approx::cosh<4> (x); }, + 0.0003f, + 4.0e-6f, + 60); + } + SECTION ("3rd-Order") + { + test_approx ([] (auto x) + { return math_approx::cosh<3> (x); }, + 0.0075f, + 0.00015f, + 0); + } +} diff --git a/test/src/trig_approx_test.cpp b/test/src/trig_approx_test.cpp index 71735c4..3d69009 100644 --- a/test/src/trig_approx_test.cpp +++ b/test/src/trig_approx_test.cpp @@ -85,3 +85,82 @@ TEST_CASE ("Cosine Approx Test") 7.5e-4f); } } + +TEST_CASE ("Tan Approx Test") +{ +#if ! defined(WIN32) + const auto all_floats = test_helpers::all_32_bit_floats (-1.5f, 1.5f, 1.0e-3f); +#else + const auto all_floats = test_helpers::all_32_bit_floats (-1.5f, 1.5f, 1.0e-1f); +#endif + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::tan (x); }); + + const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) + { + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); + const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); + + const auto max_error = test_helpers::abs_max (error); + const auto max_rel_error = test_helpers::abs_max (rel_error); + const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); + + std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; + REQUIRE (std::abs (max_error) < err_bound); + REQUIRE (std::abs (max_rel_error) < rel_err_bound); + if (ulp_bound > 0) + REQUIRE (max_ulp_error < ulp_bound); + }; + + SECTION ("13th-Order") + { + test_approx ([] (auto x) + { return math_approx::tan<13> (x); }, + 5.5e-5f, + 6.0e-5f, + 520); + } + SECTION ("11th-Order") + { + test_approx ([] (auto x) + { return math_approx::tan<11> (x); }, + 9.5e-5f, + 6.0e-5f, + 520); + } + SECTION ("9th-Order") + { + test_approx ([] (auto x) + { return math_approx::tan<9> (x); }, + 0.0009f, + 6.0e-5f, + 900); + } + SECTION ("7th-Order") + { + test_approx ([] (auto x) + { return math_approx::tan<7> (x); }, + 0.015f, + 0.0009f, + 0); + } + SECTION ("5th-Order") + { + test_approx ([] (auto x) + { return math_approx::tan<5> (x); }, + 0.14f, + 0.01f, + 0); + } + SECTION ("3rd-Order") + { + test_approx ([] (auto x) + { return math_approx::tan<3> (x); }, + 1.5f, + 0.09f, + 0); + } +} diff --git a/tools/bench/CMakeLists.txt b/tools/bench/CMakeLists.txt index 0c06c5d..51df02a 100644 --- a/tools/bench/CMakeLists.txt +++ b/tools/bench/CMakeLists.txt @@ -25,3 +25,6 @@ target_link_libraries(wright_omega_approx_bench PRIVATE benchmark::benchmark mat add_executable(polylog_approx_bench polylog_bench.cpp) target_link_libraries(polylog_approx_bench PRIVATE benchmark::benchmark math_approx) + +add_executable(sinh_cosh_approx_bench sinh_cosh_bench.cpp) +target_link_libraries(sinh_cosh_approx_bench PRIVATE benchmark::benchmark math_approx) diff --git a/tools/bench/sinh_cosh_bench.cpp b/tools/bench/sinh_cosh_bench.cpp new file mode 100644 index 0000000..a32df9f --- /dev/null +++ b/tools/bench/sinh_cosh_bench.cpp @@ -0,0 +1,65 @@ +#include +#include + +static constexpr size_t N = 2000; +const auto data = [] +{ + std::vector x; + x.resize (N, 0.0f); + for (size_t i = 0; i < N; ++i) + x[i] = -10.0f + 20.0f * (float) i / (float) N; + return x; +}(); + +#define TANH_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (x); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +TANH_BENCH (sinh_std, std::sinh) +TANH_BENCH (sinh_approx6, math_approx::sinh<6>) +TANH_BENCH (sinh_approx5, math_approx::sinh<5>) +TANH_BENCH (sinh_approx4, math_approx::sinh<4>) +TANH_BENCH (sinh_approx3, math_approx::sinh<3>) + +TANH_BENCH (cosh_std, std::sinh) +TANH_BENCH (cosh_approx6, math_approx::cosh<6>) +TANH_BENCH (cosh_approx5, math_approx::cosh<5>) +TANH_BENCH (cosh_approx4, math_approx::cosh<4>) +TANH_BENCH (cosh_approx3, math_approx::cosh<3>) + +#define TANH_SIMD_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (xsimd::broadcast (x)); \ +static_assert (std::is_same_v, decltype(y)>); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +TANH_SIMD_BENCH (sinh_xsimd, xsimd::tanh) +TANH_SIMD_BENCH (sinh_simd_approx6, math_approx::sinh<6>) +TANH_SIMD_BENCH (sinh_simd_approx5, math_approx::sinh<5>) +TANH_SIMD_BENCH (sinh_simd_approx4, math_approx::sinh<4>) +TANH_SIMD_BENCH (sinh_simd_approx3, math_approx::sinh<3>) + +TANH_SIMD_BENCH (cosh_xsimd, xsimd::tanh) +TANH_SIMD_BENCH (cosh_simd_approx6, math_approx::cosh<6>) +TANH_SIMD_BENCH (cosh_simd_approx5, math_approx::cosh<5>) +TANH_SIMD_BENCH (cosh_simd_approx4, math_approx::cosh<4>) +TANH_SIMD_BENCH (cosh_simd_approx3, math_approx::cosh<3>) + +BENCHMARK_MAIN(); diff --git a/tools/bench/trig_bench.cpp b/tools/bench/trig_bench.cpp index 796bd27..fba83ba 100644 --- a/tools/bench/trig_bench.cpp +++ b/tools/bench/trig_bench.cpp @@ -35,6 +35,14 @@ TRIG_BENCH (sin_approx9, math_approx::sin<9>) TRIG_BENCH (sin_approx7, math_approx::sin<7>) TRIG_BENCH (sin_approx5, math_approx::sin<5>) +TRIG_BENCH (tan_std, std::tan) +TRIG_BENCH (tan_approx13, math_approx::tan<13>) +TRIG_BENCH (tan_approx11, math_approx::tan<11>) +TRIG_BENCH (tan_approx9, math_approx::tan<9>) +TRIG_BENCH (tan_approx7, math_approx::tan<7>) +TRIG_BENCH (tan_approx5, math_approx::tan<5>) +TRIG_BENCH (tan_approx3, math_approx::tan<3>) + #define TRIG_SIMD_BENCH(name, func) \ void name (benchmark::State& state) \ { \ @@ -60,4 +68,12 @@ TRIG_SIMD_BENCH (cos_simd_approx9, math_approx::cos<9>) TRIG_SIMD_BENCH (cos_simd_approx7, math_approx::cos<7>) TRIG_SIMD_BENCH (cos_simd_approx5, math_approx::cos<5>) +TRIG_SIMD_BENCH (tan_xsimd, xsimd::tan) +TRIG_SIMD_BENCH (tan_simd_approx13, math_approx::tan<13>) +TRIG_SIMD_BENCH (tan_simd_approx11, math_approx::tan<11>) +TRIG_SIMD_BENCH (tan_simd_approx9, math_approx::tan<9>) +TRIG_SIMD_BENCH (tan_simd_approx7, math_approx::tan<7>) +TRIG_SIMD_BENCH (tan_simd_approx5, math_approx::tan<5>) +TRIG_SIMD_BENCH (tan_simd_approx3, math_approx::tan<3>) + BENCHMARK_MAIN(); diff --git a/tools/plotter/plotter.cpp b/tools/plotter/plotter.cpp index 927da1b..53cad1c 100644 --- a/tools/plotter/plotter.cpp +++ b/tools/plotter/plotter.cpp @@ -61,12 +61,13 @@ void plot_function (std::span all_floats, int main() { plt::figure(); - const auto range = std::make_pair (-5.0f, 5.0f); + const auto range = std::make_pair (-3.0f, 3.0f); static constexpr auto tol = 1.0e-2f; const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol); - const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC(polylogarithm::Li2)); - plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::li2<3,6>)), "Li2-3"); + const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC(std::cosh)); + plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::cosh<5>)), "cosh-5"); + plot_ulp_error (all_floats, y_exact, FLOAT_FUNC((math_approx::cosh<6>)), "cosh-6"); plt::legend ({ { "loc", "upper right" } }); plt::xlim (range.first, range.second);