Skip to content

Commit

Permalink
fix: Correctly handle signature-less functions for Py UDF calls (#6368)
Browse files Browse the repository at this point in the history
Fixes #6349
  • Loading branch information
jmao-denver authored Nov 15, 2024
1 parent 9692a02 commit b0c417e
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public String getName() {
class Signature {
private final List<Parameter> parameters;
private final Class<?> returnType;
public final static Signature EMPTY = new Signature(List.of(), null);

public Signature(List<Parameter> parameters, Class<?> returnType) {
this.parameters = parameters;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,29 @@ private void prepareSignature() {
vectorized = false;
}
pyUdfDecorator = dh_udf_module.call("_udf_parser", pyCallable);
signatureString = pyUdfDecorator.getAttribute("signature").toString();
// The Python UDF parser failed to get/parse the callable's signature. This is likely due to it being
// a function in an extension module or a signature-less/multi-signature builtin function such as 'max'.
if (pyUdfDecorator.isNone()) {
pyUdfDecorator = null;
signature = Signature.EMPTY;
} else {
signatureString = pyUdfDecorator.getAttribute("signature").toString();
}
}


@Override
public void parseSignature() {
if (signatureString != null) {
if (signature != null) {
return;
}

prepareSignature();

if (signature == Signature.EMPTY) {
return;
}

// the 'types' field of a vectorized function follows the pattern of '[ilhfdb?O]*->[ilhfdb?O]',
// eg. [ll->d] defines two int64 (long) arguments and a double return type.
if (signatureString == null || signatureString.isEmpty()) {
Expand Down Expand Up @@ -287,6 +298,10 @@ public static boolean isLosslessWideningPrimitiveConversion(@NotNull Class<?> or
}

public void verifyArguments(Class<?>[] argTypes) {
if (signature == Signature.EMPTY) {
return;
}

String callableName = pyCallable.getAttribute("__name__").toString();
List<Parameter> parameters = signature.getParameters();

Expand Down Expand Up @@ -358,7 +373,7 @@ public void verifyArguments(Class<?>[] argTypes) {

// In vectorized mode, we want to call the vectorized function directly.
public PyObject vectorizedCallable() {
if (numbaVectorized) {
if (numbaVectorized || pyUdfDecorator == null) {
return pyCallable;
} else {
return pyUdfDecorator.call("__call__", this.argTypesStr, true);
Expand Down
19 changes: 13 additions & 6 deletions py/server/deephaven/_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def _parse_np_ufunc_signature(fn: numpy.ufunc) -> _ParsedSignature:
return p_sig


def _parse_signature(fn: Callable) -> _ParsedSignature:
def _parse_signature(fn: Callable) -> Optional[_ParsedSignature]:
""" Parse the signature of a function """

if numba:
Expand All @@ -496,10 +496,14 @@ def _parse_signature(fn: Callable) -> _ParsedSignature:
return _parse_np_ufunc_signature(fn)
else:
p_sig = _ParsedSignature(fn=fn)
if sys.version_info >= (3, 10):
sig = inspect.signature(fn, eval_str=True) # novermin
else:
sig = inspect.signature(fn)
try:
if sys.version_info >= (3, 10):
sig = inspect.signature(fn, eval_str=True) # novermin
else:
sig = inspect.signature(fn)
except ValueError:
# some built-in functions don't have a signature, neither do some functions from C extensions
return None

for n, p in sig.parameters.items():
# when from __future__ import annotations is used, the annotation is a string, we need to eval it to get
Expand All @@ -513,7 +517,7 @@ def _parse_signature(fn: Callable) -> _ParsedSignature:
p_sig.ret_annotation = _parse_return_annotation(t)
return p_sig

def _udf_parser(fn: Callable):
def _udf_parser(fn: Callable) -> Optional[Callable]:
"""A decorator that acts as a transparent translator for Python UDFs used in Deephaven query formulas between
Python and Java. This decorator is intended for internal use by the Deephaven query engine and should not be used by
users.
Expand All @@ -531,6 +535,9 @@ def _udf_parser(fn: Callable):
return fn

p_sig = _parse_signature(fn)
if p_sig is None:
return None

return_array = p_sig.ret_annotation.has_array
ret_np_char = p_sig.ret_annotation.encoded_type[-1]
ret_dtype = dtypes.from_np_dtype(np.dtype(ret_np_char if ret_np_char != "X" else "O"))
Expand Down
6 changes: 6 additions & 0 deletions py/server/tests/test_udf_scalar_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,12 @@ def f(p1: float, p2: np.float64) -> bool:
self.assertRegex(str(w[-1].message), "numpy scalar type.*is used")
self.assertEqual(10, t.to_string().count("true"))

def test_no_signature(self):
builtin_max = max
t = empty_table(10).update("X = (int) builtin_max(1, 2, 3)")
self.assertEqual(t.columns[0].data_type, dtypes.int32)
self.assertEqual(10, t.to_string().count("3"))


if __name__ == "__main__":
unittest.main()
10 changes: 10 additions & 0 deletions py/server/tests/test_vectorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,5 +342,15 @@ def f3(p1: np.ndarray[np.bool_]) -> np.ndarray[np.bool_]:
self.assertEqual(_udf.vectorized_count, 1)
_udf.vectorized_count = 0

def test_no_signature_array(self):
builtin_max = max

t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? `deephaven`: `rocks`"]).group_by("X").update("Y = Y.toArray()")
t1 = t.update(["X1 = builtin_max(Y)"])
self.assertEqual(t1.columns[2].data_type, dtypes.JObject)
self.assertEqual(_udf.vectorized_count, 0)
_udf.vectorized_count = 0


if __name__ == "__main__":
unittest.main()

0 comments on commit b0c417e

Please sign in to comment.