Adding Wright-Omega function (#2)

* Add implementation of the Wright-Omega function with tests * Add benchmarks and some more testing for wright-omega
Chowdhury-DSP · Nov 28, 2023 · f4642b0 · f4642b0
1 parent 72f53bf
commit f4642b0
Show file tree

Hide file tree

Showing 10 changed files with 676 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 .vscode/
 
 build*/
+
+.DS_Store
diff --git a/README.md b/README.md
@@ -2,4 +2,11 @@
 
 A C++ library for math approximations.
 
-More info coming soon!
+Currently supported:
+
+- sin/cos
+- exp/exp2/exp10
+- log/log2/log10
+- tanh
+- sigmoid
+- Wright-Omega function
diff --git a/include/math_approx/math_approx.hpp b/include/math_approx/math_approx.hpp
@@ -11,3 +11,4 @@ namespace math_approx
 #include "src/trig_approx.hpp"
 #include "src/pow_approx.hpp"
 #include "src/log_approx.hpp"
+#include "src/wright_omega_approx.hpp"
diff --git a/include/math_approx/src/wright_omega_approx.hpp b/include/math_approx/src/wright_omega_approx.hpp
@@ -0,0 +1,84 @@
+#pragma once
+
+#include "basic_math.hpp"
+
+namespace math_approx
+{
+template <int num_nr_iters, int poly_order = 3, int log_order = (num_nr_iters <= 1 ? 3 : 4), int exp_order = log_order, typename T>
+T wright_omega (T x)
+{
+    static_assert (poly_order == 3 || poly_order == 5);
+
+    using S = scalar_of_t<T>;
+    static constexpr auto E = (S) 2.7182818284590452354;
+
+    const auto x1 = [] (T _x)
+    {
+        const auto x_sq = _x * _x;
+        if constexpr (poly_order == 3)
+        {
+            const auto y_2_3 = (S) 0.0534379648805832 + (S) -0.00251076420630778 * _x;
+            const auto y_0_1 = (S) 0.616522951065868 + (S) 0.388418422853809 * _x;
+            return y_0_1 + y_2_3 * x_sq;
+        }
+        else if constexpr (poly_order == 5)
+        {
+            const auto y_4_5 = (S) -0.00156418794118294 + (S) -0.00151562297325209 * _x;
+            const auto y_2_3 = (S) 0.0719291313363515 + (S) 0.0216881206167543 * _x;
+            const auto y_0_1 = (S) 0.569291529016010 + (S) 0.290890537885083 * _x;
+            const auto y_2_3_4_5 = y_2_3 + y_4_5 * x_sq;
+            return y_0_1 + y_2_3_4_5 * x_sq;
+        }
+        else
+        {
+            return T {};
+        }
+    }(x);
+    const auto x2 = x - log<log_order> (x) + (S) 0.32352057096397160124 * exp<exp_order> ((S) -0.029614177658043381316 * x);
+
+    auto y = select (x < (S) -3, T {}, select (x < (S) E, x1, x2));
+
+    const auto nr_update = [] (T _x, T _y)
+    {
+        return _y - (_y - exp<exp_order> (_x - _y)) / (_y + (S) 1);
+    };
+
+    for (int i = 0; i < num_nr_iters; ++i)
+        y = nr_update (x, y);
+
+    return y;
+}
+
+/**
+ * Wright-Omega function using Stephano D'Angelo's derivation (https://www.dafx.de/paper-archive/2019/DAFx2019_paper_5.pdf)
+ * With `num_nr_iters == 0`, this is the fastest implementation, but the least accurate.
+ * With `num_nr_iters == 1`, this is faster than the other implementation with 0 iterations, and little bit more accurate.
+ * For more accuracy, use the other implementation with at least 1 NR iteration.
+ */
+template <int num_nr_iters, int log_order = 3, int exp_order = log_order, typename T>
+T wright_omega_dangelo (T x)
+{
+    using S = scalar_of_t<T>;
+
+    const auto x1 = [] (T _x)
+    {
+        const auto x_sq = _x * _x;
+        const auto y_2_3 = (S) 4.775931364975583e-2 + (S) -1.314293149877800e-3 * _x;
+        const auto y_0_1 = (S) 6.313183464296682e-1 + (S) 3.631952663804445e-1 * _x;
+        return y_0_1 + y_2_3 * x_sq;
+    }(x);
+    const auto x2 = x - log<log_order> (x);
+
+    auto y = select (x < (S) -3.341459552768620, T {}, select (x < (S) 8, x1, x2));
+
+    const auto nr_update = [] (T _x, T _y)
+    {
+        return _y - (_y - exp<exp_order> (_x - _y)) / (_y + (S) 1);
+    };
+
+    for (int i = 0; i < num_nr_iters; ++i)
+        y = nr_update (x, y);
+
+    return y;
+}
+} // namespace math_approx
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -33,3 +33,4 @@ setup_catch_test(sigmoid_approx_test)
 setup_catch_test(trig_approx_test)
 setup_catch_test(pow_approx_test)
 setup_catch_test(log_approx_test)
+setup_catch_test(wright_omega_approx_test)