Skip to content

Commit

Permalink
Improved asin approximation
Browse files Browse the repository at this point in the history
  • Loading branch information
jatinchowdhury18 committed Jan 6, 2024
1 parent 14f2a67 commit c8977bd
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 170 deletions.
113 changes: 18 additions & 95 deletions include/math_approx/src/inverse_trig_approx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,129 +7,52 @@ namespace math_approx
namespace asin_detail
{
template <int order, typename T>
constexpr T asin_0_rsqrt2 (T x)
constexpr T asin_kernel (T x)
{
using S = scalar_of_t<T>;
static_assert (order >= 3 && order <= 11);
static_assert (order >= 1 && order <= 4);

const auto x_sq = x * x;
if constexpr (order == 3)
{
const auto y_2_3 = (S) -0.0536922932754174 + (S) 0.297373838424192 * x;
return x + x_sq * y_2_3;
}
if constexpr (order == 4)
if constexpr (order == 1)
{
const auto y_3_4 = (S) -0.00535062837316264 + (S) 0.257252341545375 * x;
const auto y_2_3_4 = (S) 0.0317400592553864 + x * y_3_4;
return x + x_sq * y_2_3_4;
return (S) 0.16443531037029196495 + x * (S) 0.097419577664394046979;
}
if constexpr (order == 5)
if constexpr (order == 2)
{
const auto y_4_5 = (S) -0.304640601352515 + (S) 0.353208342056560 * x;
const auto y_2_3 = (S) -0.0132122795426018 + (S) 0.278935718011026 * x;

const auto y_2_3_4_5 = y_2_3 + x_sq * y_4_5;
return x + x_sq * y_2_3_4_5;
return (S) 0.16687742065041710759 + x * ((S) 0.070980446338571381859 + x * (S) 0.066682760821292624831);
}
if constexpr (order == 6)
{
const auto y_5_6 = (S) -0.516068582317285 + (S) 0.437544978265334 * x;
const auto y_3_4 = (S) 0.0946375652126262 + (S) 0.313911974469437 * x;

const auto y_3_4_5_6 = y_3_4 + x_sq * y_5_6;

const auto y_2_3_4_5_6 = (S) 0.00577946556085762 + x * y_3_4_5_6;
return x + x_sq * y_2_3_4_5_6;
}
if constexpr (order == 7)
{
const auto y_6_7 = (S) -0.993023225129115 + (S) 0.604213345541030 * x;
const auto y_4_5 = (S) -0.242568404368623 + (S) 0.780715776826480 * x;
const auto y_2_3 = (S) -0.00231743294930714 + (S) 0.205916081200406 * x;

const auto y_4_5_6_7 = y_4_5 + x_sq * y_6_7;

const auto y_2_3_4_5_6_7 = y_2_3 + x_sq * y_4_5_6_7;
return x + x_sq * y_2_3_4_5_6_7;
}
if constexpr (order == 8)
{
const auto y_7_8 = (S) -1.68181413527251 + (S) 0.833569228384441 * x;
const auto y_5_6 = (S) -0.614138628435564 + (S) 1.51390471735914 * x;
const auto y_3_4 = (S) 0.146440161696543 + (S) 0.167766328527588 * x;

const auto y_5_6_7_8 = y_5_6 + x_sq * y_7_8;
const auto y_3_4_5_6_7_8 = y_3_4 + x_sq * y_5_6_7_8;

const auto y_2_3_4_5_6_7_8 = (S) 0.000914775210828589 + x * y_3_4_5_6_7_8;
return x + x_sq * y_2_3_4_5_6_7_8;
}
if constexpr (order == 9)
if constexpr (order == 3)
{
const auto y_8_9 = (S) -2.8729113246543627191 + (S) 1.1910880616677141930 * x;
const auto y_6_7 = (S) -1.7250043993765906691 + (S) 3.0742940024198017746 * x;
const auto y_4_5 = (S) -0.10358468520191396745 + (S) 0.63814601829123507315 * x;
const auto y_2_3 = (S) -0.00034869418257434217938 + (S) 0.17639243703620430259 * x;

const auto y_6_7_8_9 = y_6_7 + x_sq * y_8_9;
const auto y_2_3_4_5 = y_2_3 + x_sq * y_4_5;

const auto y_2_3_4_5_6_7_8_9 = y_2_3_4_5 + (x_sq * x_sq) * y_6_7_8_9;
return x + x_sq * y_2_3_4_5_6_7_8_9;
return (S) 0.16665080061757006624 + x * ((S) 0.075508850204912977833 + x * ((S) 0.039376231206556484843 + x * (S) 0.051275338699694958389));
}
if constexpr (order == 10)
if constexpr (order == 4)
{
const auto y_9_10 = (S) -4.7928604989214971255 + (S) 1.7203396621648587850 * x;
const auto y_7_8 = (S) -3.9882416242478972990 + (S) 5.9100541437570059955 * x;
const auto y_5_6 = (S) -0.33522250091818628359 + (S) 1.6520149306717599735 * x;
const auto y_3_4 = (S) 0.16219681678629885302 + (S) 0.059321118077451009953 * x;

const auto y_7_8_9_10 = y_7_8 + x_sq * y_9_10;
const auto y_3_4_5_6 = y_3_4 + x_sq * y_5_6;

const auto y_3_4_5_6_7_8_9_10 = y_3_4_5_6 + (x_sq * x_sq) * y_7_8_9_10;
const auto y_2_3_4_5_6_7_8_9_10 = (S) 0.00013025301412532343010 + x * y_3_4_5_6_7_8_9_10;
return x + x_sq * y_2_3_4_5_6_7_8_9_10;
return (S) 0.16666803275183153521 + x * ((S) 0.074936964020844071266 + x * ((S) 0.045640288439217274741 + x * ((S) 0.023435504410713306478 + x * (S) 0.043323710842752508055)));
}
if constexpr (order == 11)
{
const auto y_10_11 = (S) -7.8570177355488999282 + (S) 2.4911046380177723769 * x;
const auto y_8_9 = (S) -8.7527722722991097015 + (S) 11.027490029915364644 * x;
const auto y_6_7 = (S) -1.3541397019129590706 + (S) 4.3623048430828985644 * x;
const auto y_4_5 = (S) -0.031316768977302081312 + (S) 0.34169372825309551889 * x;
const auto y_2_3 = (S) -0.000047491279899706856284 + (S) 0.16861521810527382859 * x;

const auto y_8_9_10_11 = y_8_9 + x_sq * y_10_11;
const auto y_4_5_6_7 = y_4_5 + x_sq * y_6_7;

const auto y_4_5_6_7_8_9_10_11 = y_4_5_6_7 + (x_sq * x_sq) * y_8_9_10_11;
const auto y_2_3_4_5_6_7_8_9_10_11 = y_2_3 + x_sq * y_4_5_6_7_8_9_10_11;

return x + x_sq * y_2_3_4_5_6_7_8_9_10_11;
}
return {};
}
} // namespace asin_detail

/** Asin(x) approximation, valid on the range [-1, 1] */
template <int order, typename T>
T asin (T x)
{
using S = scalar_of_t<T>;
static constexpr auto rsqrt2 = (S) 0.707106781186547524400844362105;

using std::abs, std::sqrt;
#if defined(XSIMD_HPP)
using xsimd::abs, xsimd::sqrt;
#endif

const auto abs_x = abs (x);

const auto reflect = abs_x > rsqrt2;
const auto poly_arg = select (reflect, sqrt ((S) 1 - abs_x * abs_x), abs_x);
const auto poly_res = asin_detail::asin_0_rsqrt2<order> (poly_arg);
const auto res = select (reflect, (S) M_PI_2 - poly_res, poly_res);
const auto reflect = abs_x > (S) 0.5;
auto z0 = select (reflect, (S) 0.5 * ((S) 1 - abs_x), abs_x * abs_x);

auto x2 = select (reflect, sqrt (z0), abs_x);
auto z1 = asin_detail::asin_kernel<order> (z0);

auto z2 = z1 = z1 * (z0 * x2) + x2;
auto res = select (reflect, (S) M_PI_2 - (z2 + z2), z2);
return select (x > (S) 0, res, -res);
}
} // namespace math_approx
74 changes: 17 additions & 57 deletions test/src/inverse_trig_approx_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,76 +33,36 @@ TEST_CASE ("Asin Approx Test")
REQUIRE (max_ulp_error < ulp_bound);
};

SECTION ("11th-Order")
SECTION ("4th-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<11> (x); },
{ return math_approx::asin<4> (x); },
1.5e-7f,
2.5e-7f,
5.0e-7f,
8);
3);
}
SECTION ("10th-Order")
SECTION ("3rd-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<10> (x); },
{ return math_approx::asin<3> (x); },
2.5e-7f,
1.5e-6f,
22);
}
SECTION ("9th-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<9> (x); },
4.0e-7f,
4.5e-6f,
72);
3.0e-7f,
5);
}
SECTION ("8th-Order")
SECTION ("2nd-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<8> (x); },
8.0e-7f,
1.5e-5f,
250);
{ return math_approx::asin<2> (x); },
2.0e-6f,
4.0e-6f,
50);
}
SECTION ("7th-Order")
SECTION ("1st-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<7> (x); },
3.0e-6f,
5.0e-5f,
0);
}
SECTION ("6th-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<6> (x); },
1.5e-5f,
2.0e-4f,
0);
}
SECTION ("5th-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<5> (x); },
5.5e-5f,
5.0e-4f,
0);
}
SECTION ("4th-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<4> (x); },
3.0e-4f,
2.0e-3f,
0);
}
SECTION ("3rd-Order")
{
test_approx ([] (auto x)
{ return math_approx::asin<3> (x); },
2.0e-3f,
6.0e-3f,
{ return math_approx::asin<1> (x); },
4.0e-5f,
6.5e-5f,
0);
}
}
24 changes: 7 additions & 17 deletions tools/bench/inverse_trig_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,11 @@ benchmark::DoNotOptimize (y); \
} \
BENCHMARK (name);

// TRIG_BENCH (asin_std, std::asin)
// TRIG_BENCH (asin_approx11, math_approx::asin<11>)
// TRIG_BENCH (asin_approx10, math_approx::asin<10>)
// TRIG_BENCH (asin_approx9, math_approx::asin<9>)
// TRIG_BENCH (asin_approx8, math_approx::asin<8>)
// TRIG_BENCH (asin_approx7, math_approx::asin<7>)
// TRIG_BENCH (asin_approx6, math_approx::asin<6>)
// TRIG_BENCH (asin_approx5, math_approx::asin<5>)
// TRIG_BENCH (asin_approx4, math_approx::asin<4>)
// TRIG_BENCH (asin_approx3, math_approx::asin<3>)
TRIG_BENCH (asin_std, std::asin)
TRIG_BENCH (asin_approx4, math_approx::asin<4>)
TRIG_BENCH (asin_approx3, math_approx::asin<3>)
TRIG_BENCH (asin_approx2, math_approx::asin<2>)
TRIG_BENCH (asin_approx1, math_approx::asin<1>)

#define TRIG_SIMD_BENCH(name, func) \
void name (benchmark::State& state) \
Expand All @@ -52,14 +47,9 @@ benchmark::DoNotOptimize (y); \
BENCHMARK (name);

TRIG_SIMD_BENCH (asin_xsimd, xsimd::asin)
TRIG_SIMD_BENCH (asin_simd_approx11, math_approx::asin<11>)
TRIG_SIMD_BENCH (asin_simd_approx10, math_approx::asin<10>)
TRIG_SIMD_BENCH (asin_simd_approx9, math_approx::asin<9>)
TRIG_SIMD_BENCH (asin_simd_approx8, math_approx::asin<8>)
TRIG_SIMD_BENCH (asin_simd_approx7, math_approx::asin<7>)
TRIG_SIMD_BENCH (asin_simd_approx6, math_approx::asin<6>)
TRIG_SIMD_BENCH (asin_simd_approx5, math_approx::asin<5>)
TRIG_SIMD_BENCH (asin_simd_approx4, math_approx::asin<4>)
TRIG_SIMD_BENCH (asin_simd_approx3, math_approx::asin<3>)
TRIG_SIMD_BENCH (asin_simd_approx2, math_approx::asin<2>)
TRIG_SIMD_BENCH (asin_simd_approx1, math_approx::asin<1>)

BENCHMARK_MAIN();
9 changes: 8 additions & 1 deletion tools/plotter/plotter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ void plot_function (std::span<const float> all_floats,

#define FLOAT_FUNC(func) [] (float x) { return func (x); }

float asin_xsimd (float x)
{
return xsimd::asin (xsimd::broadcast (x)).get (0);
}

int main()
{
plt::figure();
Expand All @@ -66,7 +71,9 @@ int main()

const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol);
const auto y_exact = test_helpers::compute_all<float> (all_floats, FLOAT_FUNC (std::asin));
plot_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::asin<3>) ), "asin-3");
plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((asin_xsimd) ), "asin-xsimd");
plot_ulp_error (all_floats, y_exact, FLOAT_FUNC ((math_approx::asin<4>) ), "asin-4");
// plot_function (all_floats, FLOAT_FUNC ((math_approx::asin<4>) ), "asin-4");

plt::legend ({ { "loc", "upper right" } });
plt::xlim (range.first, range.second);
Expand Down

0 comments on commit c8977bd

Please sign in to comment.