diff --git a/include/math_approx/src/inverse_trig_approx.hpp b/include/math_approx/src/inverse_trig_approx.hpp index 0ee6ddc..2b07ae5 100644 --- a/include/math_approx/src/inverse_trig_approx.hpp +++ b/include/math_approx/src/inverse_trig_approx.hpp @@ -4,7 +4,7 @@ namespace math_approx { -namespace asin_detail +namespace inv_trig_detail { template constexpr T asin_kernel (T x) @@ -31,6 +31,36 @@ namespace asin_detail return {}; } + + template + constexpr T acos_kernel (T x) + { + using S = scalar_of_t; + static_assert (order >= 1 && order <= 5); + + if constexpr (order == 1) + { + return (S) 0.061454830783555181029 + x * (S) 0.50934149601134137697; + } + if constexpr (order == 2) + { + return (S) 0.18188825560430002537 + x * ((S) -0.092825628092384385170 + x * (S) 0.48173369928298098719); + } + if constexpr (order == 3) + { + return (S) 0.16480511788348814473 + x * ((S) 0.11286070199090997290 + x * ((S) -0.18795205899643871450 + x * (S) 0.48108256591693704385)); + } + if constexpr (order == 4) + { + return (S) 0.16687235373875186628 + x * ((S) 0.068412956842158992310 + x * ((S) 0.11466969910945928879 + x * ((S) -0.27433862418620241774 + x * (S) 0.49517994129072917531))); + } + if constexpr (order == 5) + { + return (S) 0.16664924406383360700 + x * ((S) 0.075837825275592588015 + x * ((S) 0.030665158374004904823 + x * ((S) 0.13572846625592635550 + x * ((S) -0.34609357317006372856 + x * (S) 0.50800920599560273061)))); + } + + return {}; + } } // namespace asin_detail template @@ -49,10 +79,33 @@ T asin (T x) auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x); auto x2 = select (reflect, sqrt (z0), abs_x); - auto z1 = asin_detail::asin_kernel (z0); + auto z1 = inv_trig_detail::asin_kernel (z0); - auto z2 = z1 = z1 * (z0 * x2) + x2; + auto z2 = z1 * (z0 * x2) + x2; auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2); return select (x > (S) 0, res, -res); } + +template +T acos (T x) +{ + using S = scalar_of_t; + + using std::abs, std::sqrt; +#if defined(XSIMD_HPP) + using xsimd::abs, xsimd::sqrt; +#endif + + const auto abs_x = abs (x); + + const auto reflect = abs_x > (S) 0.5; + auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x); + + auto x2 = select (reflect, sqrt (z0), abs_x); + auto z1 = inv_trig_detail::acos_kernel (z0); + + auto z2 = z1 * (z0 * x2) + x2; + auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2); + return (S) M_PI_2 - select (x > (S) 0, res, -res); +} } // namespace math_approx diff --git a/test/src/inverse_trig_approx_test.cpp b/test/src/inverse_trig_approx_test.cpp index 7272572..c197a8d 100644 --- a/test/src/inverse_trig_approx_test.cpp +++ b/test/src/inverse_trig_approx_test.cpp @@ -66,3 +66,57 @@ TEST_CASE ("Asin Approx Test") 0); } } + +TEST_CASE ("Acos Approx Test") +{ +#if ! defined(WIN32) + const auto all_floats = test_helpers::all_32_bit_floats (-1.0f, 1.0f, 1.0e-2f); +#else + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); +#endif + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::acos (x); }); + + const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) + { + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + + const auto error = test_helpers::compute_error (y_exact, y_approx); + + const auto max_error = test_helpers::abs_max (error); + + std::cout << max_error << std::endl; + REQUIRE (std::abs (max_error) < err_bound); + }; + + SECTION ("5th-Order") + { + test_approx ([] (auto x) + { return math_approx::acos<5> (x); }, + 5.0e-7f); + } + SECTION ("4th-Order") + { + test_approx ([] (auto x) + { return math_approx::acos<4> (x); }, + 1.0e-6f); + } + SECTION ("3rd-Order") + { + test_approx ([] (auto x) + { return math_approx::acos<3> (x); }, + 1.5e-5f); + } + SECTION ("2nd-Order") + { + test_approx ([] (auto x) + { return math_approx::acos<2> (x); }, + 2.5e-4f); + } + SECTION ("1st-Order") + { + test_approx ([] (auto x) + { return math_approx::acos<1> (x); }, + 5.0e-3f); + } +} diff --git a/tools/bench/inverse_trig_bench.cpp b/tools/bench/inverse_trig_bench.cpp index ac7c2ac..2e19032 100644 --- a/tools/bench/inverse_trig_bench.cpp +++ b/tools/bench/inverse_trig_bench.cpp @@ -31,6 +31,13 @@ TRIG_BENCH (asin_approx3, math_approx::asin<3>) TRIG_BENCH (asin_approx2, math_approx::asin<2>) TRIG_BENCH (asin_approx1, math_approx::asin<1>) +TRIG_BENCH (acos_std, std::acos) +TRIG_BENCH (acos_approx5, math_approx::acos<5>) +TRIG_BENCH (acos_approx4, math_approx::acos<4>) +TRIG_BENCH (acos_approx3, math_approx::acos<3>) +TRIG_BENCH (acos_approx2, math_approx::acos<2>) +TRIG_BENCH (acos_approx1, math_approx::acos<1>) + #define TRIG_SIMD_BENCH(name, func) \ void name (benchmark::State& state) \ { \ @@ -52,4 +59,11 @@ TRIG_SIMD_BENCH (asin_simd_approx3, math_approx::asin<3>) TRIG_SIMD_BENCH (asin_simd_approx2, math_approx::asin<2>) TRIG_SIMD_BENCH (asin_simd_approx1, math_approx::asin<1>) +TRIG_SIMD_BENCH (acos_xsimd, xsimd::acos) +TRIG_SIMD_BENCH (acos_simd_approx5, math_approx::acos<5>) +TRIG_SIMD_BENCH (acos_simd_approx4, math_approx::acos<4>) +TRIG_SIMD_BENCH (acos_simd_approx3, math_approx::acos<3>) +TRIG_SIMD_BENCH (acos_simd_approx2, math_approx::acos<2>) +TRIG_SIMD_BENCH (acos_simd_approx1, math_approx::acos<1>) + BENCHMARK_MAIN(); diff --git a/tools/plotter/plotter.cpp b/tools/plotter/plotter.cpp index aed4bb6..d249631 100644 --- a/tools/plotter/plotter.cpp +++ b/tools/plotter/plotter.cpp @@ -58,11 +58,6 @@ void plot_function (std::span all_floats, #define FLOAT_FUNC(func) [] (float x) { return func (x); } -float asin_xsimd (float x) -{ - return xsimd::asin (xsimd::broadcast (x)).get (0); -} - int main() { plt::figure(); @@ -70,10 +65,10 @@ int main() static constexpr auto tol = 1.0e-2f; const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol); - const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC (std::asin)); - plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((asin_xsimd) ), "asin-xsimd"); - plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::asin<4>) ), "asin-4"); - // plot_function (all_floats, FLOAT_FUNC ((math_approx::asin<4>) ), "asin-4"); + const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC (std::acos)); + // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((acos_xsimd) ), "acos-xsimd"); + plot_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::acos<4>) ), "acos-4"); + // plot_function (all_floats, FLOAT_FUNC ((math_approx::acos<4>) ), "acos-4"); plt::legend ({ { "loc", "upper right" } }); plt::xlim (range.first, range.second);