GH-44952: [C++][Python] Add Hyperbolic Trig functions (#44630)

### Rationale for this change Hyperbolic trigonometric functions are a common transformation for dealing with skewed data. And they are built into the core C++ libraries so require minimal change. ### What changes are included in this PR? Adding `a?(sin|cos|tan)h` to the base C++ library and substrait and tests for these functions in pyarrow. ### Are these changes tested? Yes, in the same style as the trigonometric functions. ### Are there any user-facing changes? Yes. Additional compute functions are added. * GitHub Issue: #44952 Authored-by: Kevin H Wilson <[email protected]> Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
apache · Dec 9, 2024 · 104b040 · 104b040
1 parent f1b293d
commit 104b040
Show file tree

Hide file tree

Showing 7 changed files with 320 additions and 13 deletions.
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
@@ -732,20 +732,26 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
 
 SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
 SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
+SCALAR_ARITHMETIC_UNARY(Acosh, "acosh", "acosh_checked")
 SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
+SCALAR_ARITHMETIC_UNARY(Atanh, "atanh", "atanh_checked")
 SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
 SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
 SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
 SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")
 SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
-SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked")
 SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
 SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
+SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked")
 SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
+SCALAR_EAGER_UNARY(Asinh, "asinh")
 SCALAR_EAGER_UNARY(Atan, "atan")
+SCALAR_EAGER_UNARY(Cosh, "cosh")
 SCALAR_EAGER_UNARY(Exp, "exp")
 SCALAR_EAGER_UNARY(Expm1, "expm1")
 SCALAR_EAGER_UNARY(Sign, "sign")
+SCALAR_EAGER_UNARY(Sinh, "sinh")
+SCALAR_EAGER_UNARY(Tanh, "tanh")
 
 Result<Datum> Round(const Datum& arg, RoundOptions options, ExecContext* ctx) {
   return CallFunction("round", {arg}, &options, ctx);

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
@@ -784,6 +784,52 @@ Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
 ARROW_EXPORT
 Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);
 
+/// \brief Compute the hyperbolic sine of the array values.
+/// \param[in] arg The values to compute the hyperbolic sine for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise hyperbolic sine of the values
+ARROW_EXPORT
+Result<Datum> Sinh(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the hyperbolic cosine of the array values.
+/// \param[in] arg The values to compute the hyperbolic cosine for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise hyperbolic cosine of the values
+ARROW_EXPORT
+Result<Datum> Cosh(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the hyperbolic tangent of the array values.
+/// \param[in] arg The values to compute the hyperbolic tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise hyperbolic tangent of the values
+ARROW_EXPORT
+Result<Datum> Tanh(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse hyperbolic sine of the array values.
+/// \param[in] arg The values to compute the inverse hyperbolic sine for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse hyperbolic sine of the values
+ARROW_EXPORT
+Result<Datum> Asinh(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse hyperbolic cosine of the array values.
+/// \param[in] arg The values to compute the inverse hyperbolic cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse hyperbolic cosine of the values
+ARROW_EXPORT
+Result<Datum> Acosh(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse hyperbolic tangent of the array values.
+/// \param[in] arg The values to compute the inverse hyperbolic tangent for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse hyperbolic tangent of the values
+ARROW_EXPORT
+Result<Datum> Atanh(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
 /// \brief Get the natural log of a value.
 ///
 /// If argument is null the result will be null.

diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -178,6 +178,14 @@ struct SinChecked {
   }
 };
 
+struct Sinh {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::sinh(val);
+  }
+};
+
 struct Cos {
   template <typename T, typename Arg0>
   static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
@@ -198,6 +206,14 @@ struct CosChecked {
   }
 };
 
+struct Cosh {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::cosh(val);
+  }
+};
+
 struct Tan {
   template <typename T, typename Arg0>
   static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
@@ -219,6 +235,14 @@ struct TanChecked {
   }
 };
 
+struct Tanh {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::tanh(val);
+  }
+};
+
 struct Asin {
   template <typename T, typename Arg0>
   static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
@@ -242,6 +266,14 @@ struct AsinChecked {
   }
 };
 
+struct Asinh {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::asinh(val);
+  }
+};
+
 struct Acos {
   template <typename T, typename Arg0>
   static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
@@ -265,6 +297,29 @@ struct AcosChecked {
   }
 };
 
+struct Acosh {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < 1.0)) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::acosh(val);
+  }
+};
+
+struct AcoshChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < 1.0)) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::acosh(val);
+  }
+};
+
 struct Atan {
   template <typename T, typename Arg0>
   static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
@@ -273,6 +328,35 @@ struct Atan {
   }
 };
 
+struct Atanh {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      // N.B. This predicate does *not* match the predicate in AtanhChecked. In
+      // GH-44630 it was decided that the checked version should error when asked
+      // for +/- 1 as an input and the unchecked version should return +/- oo
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::atanh(val);
+  }
+};
+
+struct AtanhChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val <= -1.0 || val >= 1.0))) {
+      // N.B. This predicate does *not* match the predicate in Atanh. In GH-44630 it was
+      // decided that the checked version should error when asked for +/- 1 as an input
+      // and the unchecked version should return +/- oo
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::atanh(val);
+  }
+};
+
 struct Atan2 {
   template <typename T, typename Arg0, typename Arg1>
   static enable_if_floating_value<Arg0, T> Call(KernelContext*, Arg0 y, Arg1 x, Status*) {
@@ -1178,6 +1262,8 @@ const FunctionDoc sin_checked_doc{"Compute the sine",
                                    "to return NaN instead, see \"sin\"."),
                                   {"x"}};
 
+const FunctionDoc sinh_doc{"Compute the hyperbolic sine", (""), {"x"}};
+
 const FunctionDoc cos_doc{"Compute the cosine",
                           ("NaN is returned for invalid input values;\n"
                            "to raise an error instead, see \"cos_checked\"."),
@@ -1188,6 +1274,8 @@ const FunctionDoc cos_checked_doc{"Compute the cosine",
                                    "to return NaN instead, see \"cos\"."),
                                   {"x"}};
 
+const FunctionDoc cosh_doc{"Compute the hyperbolic cosine", (""), {"x"}};
+
 const FunctionDoc tan_doc{"Compute the tangent",
                           ("NaN is returned for invalid input values;\n"
                            "to raise an error instead, see \"tan_checked\"."),
@@ -1198,6 +1286,8 @@ const FunctionDoc tan_checked_doc{"Compute the tangent",
                                    "to return NaN instead, see \"tan\"."),
                                   {"x"}};
 
+const FunctionDoc tanh_doc{"Compute the hyperbolic tangent", (""), {"x"}};
+
 const FunctionDoc asin_doc{"Compute the inverse sine",
                            ("NaN is returned for invalid input values;\n"
                             "to raise an error instead, see \"asin_checked\"."),
@@ -1208,6 +1298,8 @@ const FunctionDoc asin_checked_doc{"Compute the inverse sine",
                                     "to return NaN instead, see \"asin\"."),
                                    {"x"}};
 
+const FunctionDoc asinh_doc{"Compute the inverse hyperbolic sine", (""), {"x"}};
+
 const FunctionDoc acos_doc{"Compute the inverse cosine",
                            ("NaN is returned for invalid input values;\n"
                             "to raise an error instead, see \"acos_checked\"."),
@@ -1218,6 +1310,16 @@ const FunctionDoc acos_checked_doc{"Compute the inverse cosine",
                                     "to return NaN instead, see \"acos\"."),
                                    {"x"}};
 
+const FunctionDoc acosh_doc{"Compute the inverse hyperbolic cosine",
+                            ("NaN is returned for input values < 1.0;\n"
+                             "to raise an error instead, see \"acosh_checked\"."),
+                            {"x"}};
+
+const FunctionDoc acosh_checked_doc{"Compute the inverse hyperbolic cosine",
+                                    ("Input values < 1.0 raise an error;\n"
+                                     "to return NaN instead, see \"acosh\"."),
+                                    {"x"}};
+
 const FunctionDoc atan_doc{"Compute the inverse tangent of x",
                            ("The return value is in the range [-pi/2, pi/2];\n"
                             "for a full return range [-pi, pi], see \"atan2\"."),
@@ -1227,6 +1329,17 @@ const FunctionDoc atan2_doc{"Compute the inverse tangent of y/x",
                             ("The return value is in the range [-pi, pi]."),
                             {"y", "x"}};
 
+const FunctionDoc atanh_doc{"Compute the inverse hyperbolic tangent",
+                            ("NaN is returned for input values x with |x| > 1.\n"
+                             "At x = +/- 1, returns +/- infinity.\n"
+                             "To raise an error instead, see \"atanh_checked\"."),
+                            {"x"}};
+
+const FunctionDoc atanh_checked_doc{"Compute the inverse hyperbolic tangent",
+                                    ("Input values x with |x| >= 1.0 raise an error\n"
+                                     "to return NaN instead, see \"atanh\"."),
+                                    {"x"}};
+
 const FunctionDoc ln_doc{
     "Compute natural logarithm",
     ("Non-positive values return -inf or NaN. Null values return null.\n"
@@ -1691,40 +1804,66 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
       "sin_checked", sin_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(sin_checked)));
 
+  auto sinh = MakeUnaryArithmeticFunctionFloatingPoint<Sinh>("sinh", sinh_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sinh)));
+
   auto cos = MakeUnaryArithmeticFunctionFloatingPoint<Cos>("cos", cos_doc);
   DCHECK_OK(registry->AddFunction(std::move(cos)));
 
   auto cos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<CosChecked>(
       "cos_checked", cos_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(cos_checked)));
 
+  auto cosh = MakeUnaryArithmeticFunctionFloatingPoint<Cosh>("cosh", cosh_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cosh)));
+
   auto tan = MakeUnaryArithmeticFunctionFloatingPoint<Tan>("tan", tan_doc);
   DCHECK_OK(registry->AddFunction(std::move(tan)));
 
   auto tan_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<TanChecked>(
       "tan_checked", tan_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(tan_checked)));
 
+  auto tanh = MakeUnaryArithmeticFunctionFloatingPoint<Tanh>("tanh", tanh_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tanh)));
+
   auto asin = MakeUnaryArithmeticFunctionFloatingPoint<Asin>("asin", asin_doc);
   DCHECK_OK(registry->AddFunction(std::move(asin)));
 
   auto asin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AsinChecked>(
       "asin_checked", asin_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(asin_checked)));
 
+  auto asinh = MakeUnaryArithmeticFunctionFloatingPoint<Asinh>("asinh", asinh_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asinh)));
+
   auto acos = MakeUnaryArithmeticFunctionFloatingPoint<Acos>("acos", acos_doc);
   DCHECK_OK(registry->AddFunction(std::move(acos)));
 
   auto acos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AcosChecked>(
       "acos_checked", acos_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(acos_checked)));
 
+  auto acosh = MakeUnaryArithmeticFunctionFloatingPoint<Acosh>("acosh", acosh_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acosh)));
+
+  auto acosh_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AcoshChecked>(
+      "acosh_checked", acosh_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acosh_checked)));
+
   auto atan = MakeUnaryArithmeticFunctionFloatingPoint<Atan>("atan", atan_doc);
   DCHECK_OK(registry->AddFunction(std::move(atan)));
 
   auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", atan2_doc);
   DCHECK_OK(registry->AddFunction(std::move(atan2)));
 
+  auto atanh = MakeUnaryArithmeticFunctionFloatingPoint<Atanh>("atanh", atanh_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atanh)));
+
+  auto atanh_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AtanhChecked>(
+      "atanh_checked", atanh_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atanh_checked)));
+
   // ----------------------------------------------------------------------
   // Logarithms
   auto ln = MakeUnaryArithmeticFunctionFloatingPoint<LogNatural>("ln", ln_doc);