Skip to content

Commit

Permalink
Power and Logarithm Approximations (#1)
Browse files Browse the repository at this point in the history
* Working pow functions, needs more tests and benchmarks

* Improved tests and started on benchmarks

* Test improvements

* First pass at log approximations

* More complete log implementations

* More benchmarks work

* Fixing test issues

* Fix warning ignores

* GCCCCCCC

* Ignoring uninitialized warnings

* Contending with memory issues on Linux

* More test fixing for Linux
  • Loading branch information
jatinchowdhury18 authored Nov 23, 2023
1 parent a1590f8 commit 109bb58
Show file tree
Hide file tree
Showing 14 changed files with 821 additions and 41 deletions.
2 changes: 2 additions & 0 deletions include/math_approx/math_approx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ namespace math_approx
#include "src/tanh_approx.hpp"
#include "src/sigmoid_approx.hpp"
#include "src/trig_approx.hpp"
#include "src/pow_approx.hpp"
#include "src/log_approx.hpp"
140 changes: 140 additions & 0 deletions include/math_approx/src/log_approx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#pragma once

#include "basic_math.hpp"
#include "pow_approx.hpp"

namespace math_approx
{
namespace log_detail
{
/** approximation for log2(x), optimized on the range [1, 2] */
template <typename T, int order>
constexpr T log2_approx (T x)
{
static_assert (order >= 3 && order <= 6);
using S = scalar_of_t<T>;

const auto x_sq = x * x;
if constexpr (order == 3)
{
const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x;
const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x;
const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) -2.51982743265 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x;
const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x;
const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x;
const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x;
const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) -3.04376925958 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}
}

#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning
#pragma GCC diagnostic ignored "-Wuninitialized"
#endif

/** approximation for log(Base, x) (32-bit) */
template <typename Base, int order>
float log (float x)
{
const auto vi = reinterpret_cast<int32_t&> (x);
const auto ex = vi & 0x7f800000;
const auto e = (ex >> 23) - 127;
const auto vfi = (vi - ex) | 0x3f800000;
const auto vf = reinterpret_cast<const float&> (vfi);

static constexpr auto log2_base_r = 1.0f / Base::log2_base;
return log2_base_r * ((float) e + log_detail::log2_approx<float, order> (vf));
}

/** approximation for log(x) (64-bit) */
template <typename Base, int order>
double log (double x)
{
const auto vi = reinterpret_cast<int64_t&> (x);
const auto ex = vi & 0x7ff0000000000000;
const auto e = (ex >> 52) - 1023;
const auto vfi = (vi - ex) | 0x3ff0000000000000;
const auto vf = reinterpret_cast<const double&> (vfi);

static constexpr auto log2_base_r = 1.0 / Base::log2_base;
return log2_base_r * ((double) e + log_detail::log2_approx<double, order> (vf));
}

#if defined(XSIMD_HPP)
/** approximation for pow(Base, x) (32-bit SIMD) */
template <typename Base, int order>
xsimd::batch<float> log (xsimd::batch<float> x)
{
const auto vi = reinterpret_cast<xsimd::batch<int32_t>&> (x); // NOSONAR
const auto ex = vi & 0x7f800000;
const auto e = (ex >> 23) - 127;
const auto vfi = (vi - ex) | 0x3f800000;
const auto vf = reinterpret_cast<const xsimd::batch<float>&> (vfi); // NOSONAR

static constexpr auto log2_base_r = 1.0f / Base::log2_base;
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<float>, order> (vf));
}

/** approximation for pow(Base, x) (64-bit SIMD) */
template <typename Base, int order>
xsimd::batch<double> log (xsimd::batch<double> x)
{
const auto vi = reinterpret_cast<xsimd::batch<int64_t>&> (x); // NOSONAR
const auto ex = vi & 0x7ff0000000000000;
const auto e = (ex >> 52) - 1023;
const auto vfi = (vi - ex) | 0x3ff0000000000000;
const auto vf = reinterpret_cast<const xsimd::batch<double>&> (vfi); // NOSONAR

static constexpr auto log2_base_r = 1.0 / Base::log2_base;
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<double>, order> (vf));
}
#endif

#if defined(__GNUC__)
#pragma GCC diagnostic pop // end ignore strict-aliasing warnings
#endif

template <int order, typename T>
T log (T x)
{
return log<pow_detail::BaseE<scalar_of_t<T>>, order> (x);
}

template <int order, typename T>
T log2 (T x)
{
return log<pow_detail::Base2<scalar_of_t<T>>, order> (x);
}

template <int order, typename T>
T log10 (T x)
{
return log<pow_detail::Base10<scalar_of_t<T>>, order> (x);
}
}
157 changes: 157 additions & 0 deletions include/math_approx/src/pow_approx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#pragma once

#include "basic_math.hpp"

namespace math_approx
{
namespace pow_detail
{
/** approximation for 2^x, optimized on the range [0, 1] */
template <typename T, int order>
constexpr T pow2_approx (T x)
{
static_assert (order >= 3 && order <= 6);
using S = scalar_of_t<T>;

const auto x_sq = x * x;
if constexpr (order == 3)
{
const auto x_2_3 = (S) 0.226307586882 + (S) 0.0782680256330 * x;
const auto x_0_1 = (S) 1 + (S) 0.695424387485 * x;
return x_0_1 + x_2_3 * x_sq;
}
else if constexpr (order == 4)
{
const auto x_3_4 = (S) 0.0520324008177 + (S) 0.0135557244044 * x;
const auto x_1_2 = (S) 0.693032120001 + (S) 0.241379754777 * x;
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq;
return (S) 1 + x_1_2_3_4 * x;
}
else if constexpr (order == 5)
{
const auto x_4_5 = (S) 0.00899009909264 + (S) 0.00187839071291 * x;
const auto x_2_3 = (S) 0.240156326598 + (S) 0.0558229130202 * x;
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq;
const auto x_0_1 = (S) 1 + (S) 0.693152270576 * x;
return x_0_1 + x_2_3_4_5 * x_sq;
}
else if constexpr (order == 6)
{
const auto x_5_6 = (S) 0.00124359387839 + (S) 0.000217187820427 * x;
const auto x_3_4 = (S) 0.0554833098983 + (S) 0.00967911763840 * x;
const auto x_1_2 = (S) 0.693147003658 + (S) 0.240229787107 * x;
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq;
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq;
return (S) 1 + x_1_2_3_4_5_6 * x;
}
else
{
return {};
}
}

template <typename T>
struct BaseE
{
static constexpr auto log2_base = (T) 1.4426950408889634074;
};

template <typename T>
struct Base2
{
static constexpr auto log2_base = (T) 1;
};

template <typename T>
struct Base10
{
static constexpr auto log2_base = (T) 3.3219280948873623479;
};
}

#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning
#pragma GCC diagnostic ignored "-Wuninitialized"
#endif

/** approximation for pow(Base, x) (32-bit) */
template <typename Base, int order>
float pow (float x)
{
x = std::max (-126.0f, Base::log2_base * x);

const auto xi = (int32_t) x;
const auto l = x < 0.0f ? xi - 1 : xi;
const auto f = x - (float) l;
const auto vi = (l + 127) << 23;

return reinterpret_cast<const float&> (vi) * pow_detail::pow2_approx<float, order> (f);
}

/** approximation for pow(Base, x) (64-bit) */
template <typename Base, int order>
double pow (double x)
{
x = std::max (-1022.0, Base::log2_base * x);

const auto xi = (int64_t) x;
const auto l = x < 0.0 ? xi - 1 : xi;
const auto d = x - (double) l;
const auto vi = (l + 1023) << 52;

return reinterpret_cast<const double&> (vi) * pow_detail::pow2_approx<double, order> (d);
}

#if defined(XSIMD_HPP)
/** approximation for pow(Base, x) (32-bit SIMD) */
template <typename Base, int order>
xsimd::batch<float> pow (xsimd::batch<float> x)
{
x = xsimd::max (xsimd::broadcast (-126.0f), Base::log2_base * x);

const auto xi = xsimd::to_int (x);
const auto l = xsimd::select (xsimd::batch_bool_cast<int32_t> (x < 0.0f), xi - 1, xi);
const auto f = x - xsimd::to_float (l);
const auto vi = (l + 127) << 23;

return reinterpret_cast<const xsimd::batch<float>&> (vi) * pow_detail::pow2_approx<xsimd::batch<float>, order> (f);
}

/** approximation for pow(Base, x) (64-bit SIMD) */
template <typename Base, int order>
xsimd::batch<double> pow (xsimd::batch<double> x)
{
x = xsimd::max (-1022.0, Base::log2_base * x);

const auto xi = xsimd::to_int (x);
const auto l = xsimd::select (xsimd::batch_bool_cast<int64_t> (x < 0.0), xi - 1, xi);
const auto d = x - xsimd::to_float (l);
const auto vi = (l + 1023) << 52;

return reinterpret_cast<const xsimd::batch<double>&> (vi) * pow_detail::pow2_approx<xsimd::batch<double>, order> (d);
}
#endif

#if defined(__GNUC__)
#pragma GCC diagnostic pop // end ignore strict-aliasing warnings
#endif

template <int order, typename T>
T exp (T x)
{
return pow<pow_detail::BaseE<scalar_of_t<T>>, order> (x);
}

template <int order, typename T>
T exp2 (T x)
{
return pow<pow_detail::Base2<scalar_of_t<T>>, order> (x);
}

template <int order, typename T>
T exp10 (T x)
{
return pow<pow_detail::Base10<scalar_of_t<T>>, order> (x);
}
}
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ endfunction(setup_catch_test)
setup_catch_test(tanh_approx_test)
setup_catch_test(sigmoid_approx_test)
setup_catch_test(trig_approx_test)
setup_catch_test(pow_approx_test)
setup_catch_test(log_approx_test)
Loading

0 comments on commit 109bb58

Please sign in to comment.