apache · alamb · Apr 15, 2024 · Apr 14, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
@@ -37,14 +37,8 @@ use strum_macros::EnumIter;
 #[derive(Debug, Clone, PartialEq, Eq, Hash, EnumIter, Copy)]
 pub enum BuiltinScalarFunction {
     // math functions
-    /// ceil
-    Ceil,
     /// coalesce
     Coalesce,
-    /// exp
-    Exp,
-    /// factorial
-    Factorial,
     // string functions
     /// concat
     Concat,
@@ -106,10 +100,7 @@ impl BuiltinScalarFunction {
     pub fn volatility(&self) -> Volatility {
         match self {
             // Immutable scalar builtins
-            BuiltinScalarFunction::Ceil => Volatility::Immutable,
             BuiltinScalarFunction::Coalesce => Volatility::Immutable,
-            BuiltinScalarFunction::Exp => Volatility::Immutable,
-            BuiltinScalarFunction::Factorial => Volatility::Immutable,
             BuiltinScalarFunction::Concat => Volatility::Immutable,
             BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable,
             BuiltinScalarFunction::EndsWith => Volatility::Immutable,
@@ -145,15 +136,6 @@ impl BuiltinScalarFunction {
                 utf8_to_str_type(&input_expr_types[0], "initcap")
             }
             BuiltinScalarFunction::EndsWith => Ok(Boolean),
-
-            BuiltinScalarFunction::Factorial => Ok(Int64),
-
-            BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp => {
-                match input_expr_types[0] {
-                    Float32 => Ok(Float32),
-                    _ => Ok(Float64),
-                }
-            }
         }
     }
 
@@ -185,43 +167,19 @@ impl BuiltinScalarFunction {
                 ],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::Factorial => {
-                Signature::uniform(1, vec![Int64], self.volatility())
-            }
-            BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp => {
-                // math expressions expect 1 argument of type f64 or f32
-                // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
-                // return the best approximation for it (in f64).
-                // We accept f32 because in this case it is clear that the best approximation
-                // will be as good as the number of digits in the number
-                Signature::uniform(1, vec![Float64, Float32], self.volatility())
-            }
         }
     }
 
     /// This function specifies monotonicity behaviors for built-in scalar functions.
     /// The list can be extended, only mathematical and datetime functions are
     /// considered for the initial implementation of this feature.
     pub fn monotonicity(&self) -> Option<FuncMonotonicity> {
-        if matches!(
-            &self,
-            BuiltinScalarFunction::Ceil
-                | BuiltinScalarFunction::Exp
-                | BuiltinScalarFunction::Factorial
-        ) {
-            Some(vec![Some(true)])
-        } else {
-            None
-        }
+        None
     }
 
     /// Returns all names that can be used to call this function
     pub fn aliases(&self) -> &'static [&'static str] {
         match self {
-            BuiltinScalarFunction::Ceil => &["ceil"],
-            BuiltinScalarFunction::Exp => &["exp"],
-            BuiltinScalarFunction::Factorial => &["factorial"],
-
             // conditional functions
             BuiltinScalarFunction::Coalesce => &["coalesce"],
 

diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
@@ -525,16 +525,6 @@ macro_rules! nary_scalar_expr {
 // generate methods for creating the supported unary/binary expressions
 
 // math functions
-scalar_expr!(Factorial, factorial, num, "factorial");
-scalar_expr!(
-    Ceil,
-    ceil,
-    num,
-    "nearest integer greater than or equal to argument"
-);
-
-scalar_expr!(Exp, exp, num, "exponential");
-
 scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase");
 scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
 nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evaluates to the value of the first [Expr] which is not NULL");
@@ -877,22 +867,6 @@ mod test {
         );
     }
 
-    macro_rules! test_unary_scalar_expr {
-        ($ENUM:ident, $FUNC:ident) => {{
-            if let Expr::ScalarFunction(ScalarFunction {
-                func_def: ScalarFunctionDefinition::BuiltIn(fun),
-                args,
-            }) = $FUNC(col("tableA.a"))
-            {
-                let name = built_in_function::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(1, args.len());
-            } else {
-                assert!(false, "unexpected");
-            }
-        }};
-    }
-
     macro_rules! test_scalar_expr {
     ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
         let expected = [$(stringify!($arg)),*];
@@ -913,10 +887,6 @@ mod test {
 
     #[test]
     fn scalar_function_definitions() {
-        test_unary_scalar_expr!(Factorial, factorial);
-        test_unary_scalar_expr!(Ceil, ceil);
-        test_unary_scalar_expr!(Exp, exp);
-
         test_scalar_expr!(InitCap, initcap, string);
         test_scalar_expr!(EndsWith, ends_with, string, characters);
     }

diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, Int64Array};
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::Int64;
+
+use crate::utils::make_scalar_function;
+use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+
+#[derive(Debug)]
+pub struct FactorialFunc {
+    signature: Signature,
+}
+
+impl Default for FactorialFunc {
+    fn default() -> Self {
+        FactorialFunc::new()
+    }
+}
+
+impl FactorialFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::uniform(1, vec![Int64], Volatility::Volatile),
+        }
+    }
+}
+
+impl ScalarUDFImpl for FactorialFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "factorial"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(Int64)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        make_scalar_function(factorial, vec![])(args)
+    }
+}
+
+macro_rules! make_function_scalar_inputs {
+    ($ARG: expr, $NAME:expr, $ARRAY_TYPE:ident, $FUNC: block) => {{
+        let arg = downcast_arg!($ARG, $NAME, $ARRAY_TYPE);
+
+        arg.iter()
+            .map(|a| match a {
+                Some(a) => Some($FUNC(a)),
+                _ => None,
+            })
+            .collect::<$ARRAY_TYPE>()
+    }};
+}
+
+/// Factorial SQL function
+fn factorial(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::Int64 => Ok(Arc::new(make_function_scalar_inputs!(
+            &args[0],
+            "value",
+            Int64Array,
+            { |value: i64| { (1..=value).product() } }
+        )) as ArrayRef),
+        other => exec_err!("Unsupported data type {other:?} for function factorial."),
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use datafusion_common::cast::as_int64_array;
+
+    use super::*;
+
+    #[test]
+    fn test_factorial_i64() {
+        let args: Vec<ArrayRef> = vec![
+            Arc::new(Int64Array::from(vec![0, 1, 2, 4])), // input
+        ];
+
+        let result = factorial(&args).expect("failed to initialize function factorial");
+        let ints =
+            as_int64_array(&result).expect("failed to initialize function factorial");
+
+        let expected = Int64Array::from(vec![1, 1, 2, 24]);
+
+        assert_eq!(ints, &expected);
+    }
+}
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
@@ -22,6 +22,7 @@ use std::sync::Arc;
 
 pub mod abs;
 pub mod cot;
+pub mod factorial;
 pub mod gcd;
 pub mod iszero;
 pub mod lcm;
@@ -44,10 +45,13 @@ make_math_unary_udf!(AtanFunc, ATAN, atan, atan, Some(vec![Some(true)]));
 make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, Some(vec![Some(true)]));
 make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, Some(vec![Some(true)]));
 make_math_unary_udf!(CbrtFunc, CBRT, cbrt, cbrt, None);
+make_math_unary_udf!(CeilFunc, CEIL, ceil, ceil, Some(vec![Some(true)]));
 make_math_unary_udf!(CosFunc, COS, cos, cos, None);
 make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, None);
 make_udf_function!(cot::CotFunc, COT, cot);
 make_math_unary_udf!(DegreesFunc, DEGREES, degrees, to_degrees, None);
+make_math_unary_udf!(ExpFunc, EXP, exp, exp, Some(vec![Some(true)]));
+make_udf_function!(factorial::FactorialFunc, FACTORIAL, factorial);
 make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, Some(vec![Some(true)]));
 make_udf_function!(log::LogFunc, LOG, log);
 make_udf_function!(gcd::GcdFunc, GCD, gcd);
@@ -119,6 +123,11 @@ pub mod expr_fn {
         super::cbrt().call(vec![num])
     }
 
+    #[doc = "nearest integer greater than or equal to argument"]
+    pub fn ceil(num: Expr) -> Expr {
+        super::ceil().call(vec![num])
+    }
+
     #[doc = "cosine"]
     pub fn cos(num: Expr) -> Expr {
         super::cos().call(vec![num])
@@ -139,6 +148,16 @@ pub mod expr_fn {
         super::degrees().call(vec![num])
     }
 
+    #[doc = "exponential"]
+    pub fn exp(num: Expr) -> Expr {
+        super::exp().call(vec![num])
+    }
+
+    #[doc = "factorial"]
+    pub fn factorial(num: Expr) -> Expr {
+        super::factorial().call(vec![num])
+    }
+
     #[doc = "nearest integer less than or equal to argument"]
     pub fn floor(num: Expr) -> Expr {
         super::floor().call(vec![num])
@@ -262,10 +281,13 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         atan2(),
         atanh(),
         cbrt(),
+        ceil(),
         cos(),
         cosh(),
         cot(),
         degrees(),
+        exp(),
+        factorial(),
         floor(),
         gcd(),
         isnan(),

diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs
@@ -228,8 +228,7 @@ mod tests {
     use itertools::Itertools;
 
     use datafusion_common::{DFSchema, Result};
-    use datafusion_expr::execution_props::ExecutionProps;
-    use datafusion_expr::{BuiltinScalarFunction, Operator, ScalarUDF};
+    use datafusion_expr::{Operator, ScalarUDF};
 
     use crate::equivalence::tests::{
         convert_to_orderings, convert_to_sort_exprs, create_random_schema,
@@ -241,7 +240,6 @@ mod tests {
     };
     use crate::expressions::Column;
     use crate::expressions::{col, BinaryExpr};
-    use crate::functions::create_physical_expr;
     use crate::utils::tests::TestScalarUDF;
     use crate::{PhysicalExpr, PhysicalSortExpr};
 
@@ -301,11 +299,12 @@ mod tests {
             &[],
             &DFSchema::empty(),
         )?;
-        let exp_a = &create_physical_expr(
-            &BuiltinScalarFunction::Exp,
+        let exp_a = &crate::udf::create_physical_expr(
+            &test_fun,
             &[col("a", &test_schema)?],
             &test_schema,
-            &ExecutionProps::default(),
+            &[],
+            &DFSchema::empty(),
         )?;
         let a_plus_b = Arc::new(BinaryExpr::new(
             col_a.clone(),