Skip to content

Commit

Permalink
Add asinh implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
jatinchowdhury18 committed Dec 8, 2023
1 parent 5653c28 commit 449a1c5
Show file tree
Hide file tree
Showing 8 changed files with 286 additions and 89 deletions.
1 change: 1 addition & 0 deletions include/math_approx/math_approx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace math_approx
#include "src/log_approx.hpp"
#include "src/tanh_approx.hpp"
#include "src/sinh_cosh_approx.hpp"
#include "src/asinh_approx.hpp"
#include "src/sigmoid_approx.hpp"
#include "src/wright_omega_approx.hpp"
#include "src/polylogarithm_approx.hpp"
79 changes: 79 additions & 0 deletions include/math_approx/src/asinh_approx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#pragma once

#include "log_approx.hpp"

namespace math_approx
{
struct AsinhLog2Provider
{
/** approximation for log2(x), optimized on the range [1, 2], to be used within an asinh(x) computation */
template <typename T, int order, bool /*C1_continuous*/>
static constexpr T log2_approx (T x)
{
static_assert (order >= 3 && order <= 5);
using S = scalar_of_t<T>;

const auto x_sq = x * x;
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.21535595794871 + (S) 0.194363894384581 * x;
const auto x_0_1 = (S) -2.26452854958994 + (S) 3.28552061315407 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.770443387059628 + (S) -0.102652345633016 * x;
const auto x_1_2 = (S) 4.33013912645867 + (S) -2.39448588379361 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.60344428409168 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.511946284688366 + (S) 0.0578217518982235 * x;
const auto x_2_3 = (S) -3.94632584968643 + (S) 1.90796087279737 * x;
const auto x_0_1 = (S) -2.87748189127908 + (S) 5.36997140095829 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else
{
return {};
}
}
};

/**
* Approximation of asinh(x) in the full range, using identity
* asinh(x) = log(x + sqrt(x^2 + 1)).
*
* Orders 6 and 7 use an additional Newton-Raphson iteration,
* but for most cases the accuracy improvement is not worth
* the additional cost (when compared to the performance and
* accuracy achieved by the STL implementation).
*/
template <int order, typename T>
T asinh (T x)
{
using S = scalar_of_t<T>;
using std::abs;
using std::sqrt;
#if defined(XSIMD_HPP)
using xsimd::abs;
using xsimd::sqrt;
#endif

const auto sign = select (x > (S) 0, (T) (S) 1, select (x < (S) 0, (T) (S) -1, (T) (S) 0));
x = abs (x);

const auto log_arg = x + sqrt (x * x + (S) 1);
auto y = log<pow_detail::BaseE<scalar_of_t<T>>, std::min (order, 5), false, AsinhLog2Provider> (log_arg);

if constexpr (order > 5)
{
const auto exp_y = math_approx::exp<order - 1> (y);
y -= (exp_y - log_arg) / exp_y;
}

return sign * y;
}
} // namespace math_approx
169 changes: 86 additions & 83 deletions include/math_approx/src/log_approx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,89 +7,92 @@ namespace math_approx
{
namespace log_detail
{
/** approximation for log2(x), optimized on the range [1, 2] */
template <typename T, int order, bool C1_continuous>
constexpr T log2_approx (T x)
struct Log2Provider
{
static_assert (order >= 3 && order <= 6);
using S = scalar_of_t<T>;

const auto x_sq = x * x;
if constexpr (C1_continuous)
/** approximation for log2(x), optimized on the range [1, 2] */
template <typename T, int order, bool C1_continuous>
static constexpr T log2_approx (T x)
{
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x;
const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x;
const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.55185920824 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x;
const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x;
const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x;
const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x;
const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.06081857306 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}
else
{
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x;
const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x;
const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.51982743265 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x;
const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x;
const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
static_assert (order >= 3 && order <= 6);
using S = scalar_of_t<T>;

const auto x_sq = x * x;
if constexpr (C1_continuous)
{
const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x;
const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x;
const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.04376925958 + x_1_2_3_4_5_6 * x;
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x;
const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x;
const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.55185920824 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x;
const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x;
const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x;
const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x;
const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.06081857306 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}
else
{
return {};
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x;
const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x;
const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.51982743265 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x;
const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x;
const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x;
const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x;
const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.04376925958 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}
}
}
};
}

#if defined(__GNUC__)
Expand All @@ -99,7 +102,7 @@ namespace log_detail
#endif

/** approximation for log(Base, x) (32-bit) */
template <typename Base, int order, bool C1_continuous>
template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
float log (float x)
{
const auto vi = reinterpret_cast<int32_t&> (x);
Expand All @@ -109,11 +112,11 @@ float log (float x)
const auto vf = reinterpret_cast<const float&> (vfi);

static constexpr auto log2_base_r = 1.0f / Base::log2_base;
return log2_base_r * ((float) e + log_detail::log2_approx<float, order, C1_continuous> (vf));
return log2_base_r * ((float) e + Log2ProviderType::template log2_approx<float, order, C1_continuous> (vf));
}

/** approximation for log(x) (64-bit) */
template <typename Base, int order, bool C1_continuous>
template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
double log (double x)
{
const auto vi = reinterpret_cast<int64_t&> (x);
Expand All @@ -123,12 +126,12 @@ double log (double x)
const auto vf = reinterpret_cast<const double&> (vfi);

static constexpr auto log2_base_r = 1.0 / Base::log2_base;
return log2_base_r * ((double) e + log_detail::log2_approx<double, order, C1_continuous> (vf));
return log2_base_r * ((double) e + Log2ProviderType::template log2_approx<double, order, C1_continuous> (vf));
}

#if defined(XSIMD_HPP)
/** approximation for pow(Base, x) (32-bit SIMD) */
template <typename Base, int order, bool C1_continuous>
template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
xsimd::batch<float> log (xsimd::batch<float> x)
{
const auto vi = reinterpret_cast<xsimd::batch<int32_t>&> (x); // NOSONAR
Expand All @@ -138,11 +141,11 @@ xsimd::batch<float> log (xsimd::batch<float> x)
const auto vf = reinterpret_cast<const xsimd::batch<float>&> (vfi); // NOSONAR

static constexpr auto log2_base_r = 1.0f / Base::log2_base;
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<float>, order, C1_continuous> (vf));
return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx<xsimd::batch<float>, order, C1_continuous> (vf));
}

/** approximation for pow(Base, x) (64-bit SIMD) */
template <typename Base, int order, bool C1_continuous>
template <typename Base, int order, bool C1_continuous, typename Log2ProviderType = log_detail::Log2Provider>
xsimd::batch<double> log (xsimd::batch<double> x)
{
const auto vi = reinterpret_cast<xsimd::batch<int64_t>&> (x); // NOSONAR
Expand All @@ -152,7 +155,7 @@ xsimd::batch<double> log (xsimd::batch<double> x)
const auto vf = reinterpret_cast<const xsimd::batch<double>&> (vfi); // NOSONAR

static constexpr auto log2_base_r = 1.0 / Base::log2_base;
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<double>, order, C1_continuous> (vf));
return log2_base_r * (xsimd::to_float (e) + Log2ProviderType::template log2_approx<xsimd::batch<double>, order, C1_continuous> (vf));
}
#endif

Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ setup_catch_test(log_approx_test)
setup_catch_test(wright_omega_approx_test)
setup_catch_test(polylog_approx_test)
setup_catch_test(sinh_cosh_approx_test)
setup_catch_test(asinh_approx_test)
54 changes: 54 additions & 0 deletions test/src/asinh_approx_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include "test_helpers.hpp"
#include "catch2/catch_template_test_macros.hpp"

#include <catch2/catch_test_macros.hpp>
#include <iostream>

#include <math_approx/math_approx.hpp>

template <typename T = float>
void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound)
{
const auto y_approx = test_helpers::compute_all<T> (all_floats, f_approx);
const auto error = test_helpers::compute_error<T> (y_exact, y_approx);
const auto max_error = test_helpers::abs_max<T> (error);

std::cout << max_error << std::endl;
REQUIRE (std::abs (max_error) < err_bound);
}

TEMPLATE_TEST_CASE ("Asinh Approx Test", "", float, double)
{
#if ! defined(WIN32)
const auto all_floats = test_helpers::all_32_bit_floats<TestType> (-10.0f, 10.0f, 1.0e-2f);
#else
const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f);
#endif
const auto y_exact = test_helpers::compute_all<TestType> (all_floats, [] (auto x)
{ return std::asinh (x); });

SECTION ("6th-Order")
{
test_approx<TestType> (all_floats, y_exact, [] (auto x)
{ return math_approx::asinh<6> (x); },
5.0e-7f);
}
SECTION ("5th-Order")
{
test_approx<TestType> (all_floats, y_exact, [] (auto x)
{ return math_approx::asinh<5> (x); },
6.0e-5f);
}
SECTION ("4th-Order")
{
test_approx<TestType> (all_floats, y_exact, [] (auto x)
{ return math_approx::asinh<4> (x); },
3.5e-4f);
}
SECTION ("3th-Order")
{
test_approx<TestType> (all_floats, y_exact, [] (auto x)
{ return math_approx::asinh<3> (x); },
2.5e-3f);
}
}
3 changes: 3 additions & 0 deletions tools/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ target_link_libraries(polylog_approx_bench PRIVATE benchmark::benchmark math_app

add_executable(sinh_cosh_approx_bench sinh_cosh_bench.cpp)
target_link_libraries(sinh_cosh_approx_bench PRIVATE benchmark::benchmark math_approx)

add_executable(asinh_approx_bench asinh_bench.cpp)
target_link_libraries(asinh_approx_bench PRIVATE benchmark::benchmark math_approx)
Loading

0 comments on commit 449a1c5

Please sign in to comment.