Skip to content

Commit

Permalink
Add C1-continuous variations for pow and log methods
Browse files Browse the repository at this point in the history
  • Loading branch information
jatinchowdhury18 committed Nov 25, 2023
1 parent 109bb58 commit 72f53bf
Show file tree
Hide file tree
Showing 4 changed files with 372 additions and 88 deletions.
128 changes: 84 additions & 44 deletions include/math_approx/src/log_approx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,86 @@ namespace math_approx
namespace log_detail
{
/** approximation for log2(x), optimized on the range [1, 2] */
template <typename T, int order>
template <typename T, int order, bool C1_continuous>
constexpr T log2_approx (T x)
{
static_assert (order >= 3 && order <= 6);
using S = scalar_of_t<T>;

const auto x_sq = x * x;
if constexpr (order == 3)
if constexpr (C1_continuous)
{
const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x;
const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x;
const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.51982743265 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x;
const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x;
const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x;
const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x;
const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.04376925958 + x_1_2_3_4_5_6 * x;
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.09886528622 + (S) 0.164042561333 * x;
const auto x_0_1 = (S) -2.21347520444 + (S) 3.14829792933 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.671618567027 + (S) -0.0845960009489 * x;
const auto x_1_2 = (S) 4.16344994072 + (S) -2.19861329856 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.55185920824 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.432338320780 + (S) 0.0464481811023 * x;
const auto x_2_3 = (S) -3.65368350361 + (S) 1.68976432066 * x;
const auto x_0_1 = (S) -2.82807214111 + (S) 5.17788146374 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.284794437502 + (S) -0.0265448504094 * x;
const auto x_3_4 = (S) 3.38542517475 + (S) -1.31007090775 * x;
const auto x_1_2 = (S) 6.19242937536 + (S) -5.46521465640 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.06081857306 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}
else
{
return {};
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x;
const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x;
const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.51982743265 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x;
const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x;
const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x;
const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x;
const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.04376925958 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}
}
}
Expand All @@ -59,7 +99,7 @@ namespace log_detail
#endif

/** approximation for log(Base, x) (32-bit) */
template <typename Base, int order>
template <typename Base, int order, bool C1_continuous>
float log (float x)
{
const auto vi = reinterpret_cast<int32_t&> (x);
Expand All @@ -69,11 +109,11 @@ float log (float x)
const auto vf = reinterpret_cast<const float&> (vfi);

static constexpr auto log2_base_r = 1.0f / Base::log2_base;
return log2_base_r * ((float) e + log_detail::log2_approx<float, order> (vf));
return log2_base_r * ((float) e + log_detail::log2_approx<float, order, C1_continuous> (vf));
}

/** approximation for log(x) (64-bit) */
template <typename Base, int order>
template <typename Base, int order, bool C1_continuous>
double log (double x)
{
const auto vi = reinterpret_cast<int64_t&> (x);
Expand All @@ -83,12 +123,12 @@ double log (double x)
const auto vf = reinterpret_cast<const double&> (vfi);

static constexpr auto log2_base_r = 1.0 / Base::log2_base;
return log2_base_r * ((double) e + log_detail::log2_approx<double, order> (vf));
return log2_base_r * ((double) e + log_detail::log2_approx<double, order, C1_continuous> (vf));
}

#if defined(XSIMD_HPP)
/** approximation for pow(Base, x) (32-bit SIMD) */
template <typename Base, int order>
template <typename Base, int order, bool C1_continuous>
xsimd::batch<float> log (xsimd::batch<float> x)
{
const auto vi = reinterpret_cast<xsimd::batch<int32_t>&> (x); // NOSONAR
Expand All @@ -98,11 +138,11 @@ xsimd::batch<float> log (xsimd::batch<float> x)
const auto vf = reinterpret_cast<const xsimd::batch<float>&> (vfi); // NOSONAR

static constexpr auto log2_base_r = 1.0f / Base::log2_base;
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<float>, order> (vf));
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<float>, order, C1_continuous> (vf));
}

/** approximation for pow(Base, x) (64-bit SIMD) */
template <typename Base, int order>
template <typename Base, int order, bool C1_continuous>
xsimd::batch<double> log (xsimd::batch<double> x)
{
const auto vi = reinterpret_cast<xsimd::batch<int64_t>&> (x); // NOSONAR
Expand All @@ -112,29 +152,29 @@ xsimd::batch<double> log (xsimd::batch<double> x)
const auto vf = reinterpret_cast<const xsimd::batch<double>&> (vfi); // NOSONAR

static constexpr auto log2_base_r = 1.0 / Base::log2_base;
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<double>, order> (vf));
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<double>, order, C1_continuous> (vf));
}
#endif

#if defined(__GNUC__)
#pragma GCC diagnostic pop // end ignore strict-aliasing warnings
#endif

template <int order, typename T>
template <int order, bool C1_continuous = false, typename T>
T log (T x)
{
return log<pow_detail::BaseE<scalar_of_t<T>>, order> (x);
return log<pow_detail::BaseE<scalar_of_t<T>>, order, C1_continuous> (x);
}

template <int order, typename T>
template <int order, bool C1_continuous = false, typename T>
T log2 (T x)
{
return log<pow_detail::Base2<scalar_of_t<T>>, order> (x);
return log<pow_detail::Base2<scalar_of_t<T>>, order, C1_continuous> (x);
}

template <int order, typename T>
template <int order, bool C1_continuous = false, typename T>
T log10 (T x)
{
return log<pow_detail::Base10<scalar_of_t<T>>, order> (x);
return log<pow_detail::Base10<scalar_of_t<T>>, order, C1_continuous> (x);
}
}
Loading

0 comments on commit 72f53bf

Please sign in to comment.