From b0c417e3dec51f3cbb20f25c784e01895e71c113 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:28:53 -0700 Subject: [PATCH] fix: Correctly handle signature-less functions for Py UDF calls (#6368) Fixes #6349 --- .../engine/util/PyCallableWrapper.java | 1 + .../engine/util/PyCallableWrapperJpyImpl.java | 21 ++++++++++++++++--- py/server/deephaven/_udf.py | 19 +++++++++++------ py/server/tests/test_udf_scalar_args.py | 6 ++++++ py/server/tests/test_vectorization.py | 10 +++++++++ 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java index bcbcfb5462a..11c560804d8 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java @@ -56,6 +56,7 @@ public String getName() { class Signature { private final List parameters; private final Class returnType; + public final static Signature EMPTY = new Signature(List.of(), null); public Signature(List parameters, Class returnType) { this.parameters = parameters; diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java index fe8dc367439..bfb061c5e7a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java @@ -193,18 +193,29 @@ private void prepareSignature() { vectorized = false; } pyUdfDecorator = dh_udf_module.call("_udf_parser", pyCallable); - signatureString = pyUdfDecorator.getAttribute("signature").toString(); + // The Python UDF parser failed to get/parse the callable's signature. This is likely due to it being + // a function in an extension module or a signature-less/multi-signature builtin function such as 'max'. + if (pyUdfDecorator.isNone()) { + pyUdfDecorator = null; + signature = Signature.EMPTY; + } else { + signatureString = pyUdfDecorator.getAttribute("signature").toString(); + } } @Override public void parseSignature() { - if (signatureString != null) { + if (signature != null) { return; } prepareSignature(); + if (signature == Signature.EMPTY) { + return; + } + // the 'types' field of a vectorized function follows the pattern of '[ilhfdb?O]*->[ilhfdb?O]', // eg. [ll->d] defines two int64 (long) arguments and a double return type. if (signatureString == null || signatureString.isEmpty()) { @@ -287,6 +298,10 @@ public static boolean isLosslessWideningPrimitiveConversion(@NotNull Class or } public void verifyArguments(Class[] argTypes) { + if (signature == Signature.EMPTY) { + return; + } + String callableName = pyCallable.getAttribute("__name__").toString(); List parameters = signature.getParameters(); @@ -358,7 +373,7 @@ public void verifyArguments(Class[] argTypes) { // In vectorized mode, we want to call the vectorized function directly. public PyObject vectorizedCallable() { - if (numbaVectorized) { + if (numbaVectorized || pyUdfDecorator == null) { return pyCallable; } else { return pyUdfDecorator.call("__call__", this.argTypesStr, true); diff --git a/py/server/deephaven/_udf.py b/py/server/deephaven/_udf.py index 5163a51b577..1140c8a08e6 100644 --- a/py/server/deephaven/_udf.py +++ b/py/server/deephaven/_udf.py @@ -485,7 +485,7 @@ def _parse_np_ufunc_signature(fn: numpy.ufunc) -> _ParsedSignature: return p_sig -def _parse_signature(fn: Callable) -> _ParsedSignature: +def _parse_signature(fn: Callable) -> Optional[_ParsedSignature]: """ Parse the signature of a function """ if numba: @@ -496,10 +496,14 @@ def _parse_signature(fn: Callable) -> _ParsedSignature: return _parse_np_ufunc_signature(fn) else: p_sig = _ParsedSignature(fn=fn) - if sys.version_info >= (3, 10): - sig = inspect.signature(fn, eval_str=True) # novermin - else: - sig = inspect.signature(fn) + try: + if sys.version_info >= (3, 10): + sig = inspect.signature(fn, eval_str=True) # novermin + else: + sig = inspect.signature(fn) + except ValueError: + # some built-in functions don't have a signature, neither do some functions from C extensions + return None for n, p in sig.parameters.items(): # when from __future__ import annotations is used, the annotation is a string, we need to eval it to get @@ -513,7 +517,7 @@ def _parse_signature(fn: Callable) -> _ParsedSignature: p_sig.ret_annotation = _parse_return_annotation(t) return p_sig -def _udf_parser(fn: Callable): +def _udf_parser(fn: Callable) -> Optional[Callable]: """A decorator that acts as a transparent translator for Python UDFs used in Deephaven query formulas between Python and Java. This decorator is intended for internal use by the Deephaven query engine and should not be used by users. @@ -531,6 +535,9 @@ def _udf_parser(fn: Callable): return fn p_sig = _parse_signature(fn) + if p_sig is None: + return None + return_array = p_sig.ret_annotation.has_array ret_np_char = p_sig.ret_annotation.encoded_type[-1] ret_dtype = dtypes.from_np_dtype(np.dtype(ret_np_char if ret_np_char != "X" else "O")) diff --git a/py/server/tests/test_udf_scalar_args.py b/py/server/tests/test_udf_scalar_args.py index 408b58486aa..617fa85be86 100644 --- a/py/server/tests/test_udf_scalar_args.py +++ b/py/server/tests/test_udf_scalar_args.py @@ -613,6 +613,12 @@ def f(p1: float, p2: np.float64) -> bool: self.assertRegex(str(w[-1].message), "numpy scalar type.*is used") self.assertEqual(10, t.to_string().count("true")) + def test_no_signature(self): + builtin_max = max + t = empty_table(10).update("X = (int) builtin_max(1, 2, 3)") + self.assertEqual(t.columns[0].data_type, dtypes.int32) + self.assertEqual(10, t.to_string().count("3")) + if __name__ == "__main__": unittest.main() diff --git a/py/server/tests/test_vectorization.py b/py/server/tests/test_vectorization.py index ab227d02cc5..7039ebceb38 100644 --- a/py/server/tests/test_vectorization.py +++ b/py/server/tests/test_vectorization.py @@ -342,5 +342,15 @@ def f3(p1: np.ndarray[np.bool_]) -> np.ndarray[np.bool_]: self.assertEqual(_udf.vectorized_count, 1) _udf.vectorized_count = 0 + def test_no_signature_array(self): + builtin_max = max + + t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? `deephaven`: `rocks`"]).group_by("X").update("Y = Y.toArray()") + t1 = t.update(["X1 = builtin_max(Y)"]) + self.assertEqual(t1.columns[2].data_type, dtypes.JObject) + self.assertEqual(_udf.vectorized_count, 0) + _udf.vectorized_count = 0 + + if __name__ == "__main__": unittest.main()