From 441d26d8feeb90c1152d84113762bc5c94c239e0 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Mon, 21 Aug 2023 12:56:26 -0700
Subject: [PATCH 01/18] pyudf class/trait impl framework:

---
 crates/sparrow-expressions/src/evaluator.rs   |   2 +-
 crates/sparrow-expressions/src/work_area.rs   |   2 +-
 crates/sparrow-session/src/lib.rs             |   2 +
 crates/sparrow-session/src/session.rs         |   2 +-
 crates/sparrow-syntax/src/syntax/signature.rs |   1 +
 python/Cargo.lock                             |   4 +
 python/Cargo.toml                             |   2 +
 python/pysrc/kaskada/_ffi.pyi                 |  12 +-
 python/pysrc/kaskada/_timestream.py           |  80 +++++++++++++
 python/pysrc/kaskada/udf.py                   |  72 ++++++-----
 python/pytests/udf_test.py                    |  49 +++++---
 python/src/expr.rs                            |  62 ++++++++++
 python/src/lib.rs                             |   3 +-
 python/src/udf.rs                             | 113 ++++++++++++++----
 14 files changed, 323 insertions(+), 83 deletions(-)
diff --git a/crates/sparrow-expressions/src/evaluator.rs b/crates/sparrow-expressions/src/evaluator.rs
index 1dec9df79..2e15bade3 100644
--- a/crates/sparrow-expressions/src/evaluator.rs
+++ b/crates/sparrow-expressions/src/evaluator.rs
@@ -4,7 +4,7 @@ use crate::work_area::WorkArea;
 use crate::Error;
 
 /// Trait for evaluating an individual expression node.
-pub(super) trait Evaluator: Send + Sync {
+pub trait Evaluator: Send + Sync {
     /// Evaluate the function with the given runtime info.
     fn evaluate(&self, work_area: &WorkArea<'_>) -> error_stack::Result<ArrayRef, Error>;
 }
diff --git a/crates/sparrow-expressions/src/work_area.rs b/crates/sparrow-expressions/src/work_area.rs
index 444b614cb..8ab9f923e 100644
--- a/crates/sparrow-expressions/src/work_area.rs
+++ b/crates/sparrow-expressions/src/work_area.rs
@@ -4,7 +4,7 @@ use sparrow_arrow::Batch;
 use crate::values::WorkAreaValue;
 
 /// Information about an in-progress batch used for evaluation.
-pub(super) struct WorkArea<'a> {
+pub struct WorkArea<'a> {
     pub input: &'a Batch,
     pub expressions: Vec<ArrayRef>,
 }
diff --git a/crates/sparrow-session/src/lib.rs b/crates/sparrow-session/src/lib.rs
index bbc72adef..22caadd2d 100644
--- a/crates/sparrow-session/src/lib.rs
+++ b/crates/sparrow-session/src/lib.rs
@@ -9,3 +9,5 @@ pub use execution::Execution;
 pub use expr::{Expr, Literal};
 pub use session::{ExecutionOptions, Session};
 pub use table::Table;
+
+pub type Udf = sparrow_instructions::Udf;
diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index 367db8418..e5b616342 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -292,7 +292,7 @@ impl Session {
     /// This bypasses much of the plumbing of the [ExprOp] required due to our construction
     /// of the AST.
     #[allow(unused)]
-    fn add_udf_to_dfg(
+    pub fn add_udf_to_dfg(
         &mut self,
         udf: Arc<dyn Udf>,
         args: Vec<Expr>,
diff --git a/crates/sparrow-syntax/src/syntax/signature.rs b/crates/sparrow-syntax/src/syntax/signature.rs
index 7083b5678..c20942388 100644
--- a/crates/sparrow-syntax/src/syntax/signature.rs
+++ b/crates/sparrow-syntax/src/syntax/signature.rs
@@ -189,6 +189,7 @@ impl Signature {
                 self.name,
                 self.parameters.names().len(),
                 num_args,
+                me
             )
         }
     }
diff --git a/python/Cargo.lock b/python/Cargo.lock
index 689b222f6..5bdff94f2 100644
--- a/python/Cargo.lock
+++ b/python/Cargo.lock
@@ -1971,9 +1971,13 @@ dependencies = [
  "mimalloc",
  "pyo3",
  "pyo3-asyncio",
+ "sparrow-compiler",
+ "sparrow-instructions",
  "sparrow-session",
+ "sparrow-syntax",
  "tokio",
  "tracing",
+ "uuid 1.4.1",
 ]
 
 [[package]]
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 02c175078..49c250868 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -23,8 +23,10 @@ mimalloc = { version = "0.1.37", default-features = false, features = ["local_dy
 pyo3 = {version = "0.19.1", features = ["abi3-py38", "extension-module", "generate-import-lib"]}
 pyo3-asyncio = { version = "0.19.0", features = ["tokio-runtime"] }
 sparrow-session = { path = "../crates/sparrow-session" }
+sparrow-syntax = { path = "../crates/sparrow-syntax" }
 tokio = { version = "1.27.0", features = ["sync"] }
 tracing = "0.1.37"
+uuid = { version = "1.3.0", features = ["v4"] }
 
 [lib]
 name = "kaskada"
diff --git a/python/pysrc/kaskada/_ffi.pyi b/python/pysrc/kaskada/_ffi.pyi
index f34f7f254..70ccf94ec 100644
--- a/python/pysrc/kaskada/_ffi.pyi
+++ b/python/pysrc/kaskada/_ffi.pyi
@@ -1,6 +1,7 @@
+from typing import Callable
 from typing import List
 from typing import Optional
-from typing import Sequence
+from typing import Sequencem
 
 import pyarrow as pa
 
@@ -35,8 +36,6 @@ class Expr:
     def execute(self, options: Optional[ExecutionOptions] = None) -> Execution: ...
     def grouping(self) -> Optional[str]: ...
 
-def call_udf(udf: Udf, result_type: pa.DataType, *args: pa.Array) -> pa.Array: ...
-
 class Table(Expr):
     def __init__(
         self,
@@ -52,3 +51,10 @@ class Table(Expr):
     @property
     def name(self) -> str: ...
     def add_pyarrow(self, data: pa.RecordBatch) -> None: ...
+
+class Udf(object):
+    def __init__(
+        self,
+        result_ty: str,
+        result_fn: Callable[..., pa.Array]
+    ) -> None: ...
\ No newline at end of file
diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py
index 44c7f915c..58b44ee57 100644
--- a/python/pysrc/kaskada/_timestream.py
+++ b/python/pysrc/kaskada/_timestream.py
@@ -4,6 +4,8 @@
 
 import sys
 import warnings
+import inspect
+
 from datetime import datetime
 from datetime import timedelta
 from typing import Callable
@@ -65,6 +67,74 @@ def _literal(value: Literal, session: _ffi.Session) -> Timestream:
             raise TypeError("Cannot create a literal Timestream from a datetime")
         return Timestream(_ffi.Expr.literal(session, value))
 
+    @staticmethod
+    def _test_apply_udf(
+        signature: inspect.Signature,
+        *args: Union[Timestream, Literal],
+        session: Optional[_ffi.Session] = None,
+    ) -> Timestream:
+        """
+        todo
+        """
+        if session is None:
+            session = next(
+                arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
+            )
+
+        def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
+            if isinstance(arg, Timestream):
+                return arg._ffi_expr
+            else:
+                return Timestream._literal(arg, session)._ffi_expr
+
+        ffi_args = [make_arg(arg) for arg in args]
+
+        arg_types = [param.annotation for param in signature.parameters.values()]
+        result_type = signature.return_annotation
+
+        try:
+            return Timestream(
+                _ffi.Expr.udf(session=session, arg_types=arg_types, result_type=result_type, args=ffi_args)
+            )
+        except TypeError as e:
+            # noqa: DAR401
+            raise _augment_error(args, TypeError(str(e))) from e
+        except ValueError as e:
+            raise _augment_error(args, ValueError(str(e))) from e
+
+    @staticmethod
+    def _test_decorator_udf(
+        signature: inspect.Signature,
+        *args: Union[Timestream, Literal],
+        session: Optional[_ffi.Session] = None,
+    ) -> Timestream:
+        """
+        todo
+        """
+        if session is None:
+            session = next(
+                arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
+            )
+
+        def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
+            if isinstance(arg, Timestream):
+                return arg._ffi_expr
+            else:
+                return Timestream._literal(arg, session)._ffi_expr
+
+        try:
+            return Timestream(
+                _ffi.Expr.decorator_udf(session=session, args=ffi_args)
+            )
+        except TypeError as e:
+            # noqa: DAR401
+            raise _augment_error(args, TypeError(str(e))) from e
+        except ValueError as e:
+            raise _augment_error(args, ValueError(str(e))) from e
+
+
+
+
     @staticmethod
     def _call(
         func: str,
@@ -199,6 +269,16 @@ def pipe(
         else:
             return func(self, *args, **kwargs)
 
+# TODO: How does udf work? We kind of want it like... apply some function on an input? 
+# But also want like `Timestream.udf(f, args*)`
+    def udf(callable: Callable, *args: Timestream) -> Timestream: 
+        """
+        todo
+        """
+        signature = inspect.signature(callable)
+
+        return Timestream._test_udf(signature, args)
+
     def add(self, rhs: Union[Timestream, Literal]) -> Timestream:
         """
         Create a Timestream adding this and `rhs`.
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 05cf5e3b8..4cb71642d 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -1,57 +1,53 @@
 """Functionality for calling Python UDFs from Kaskada."""
+from __future__ import annotations
+
 import functools
 from typing import Callable
 
 import pandas as pd
 import pyarrow as pa
 
+from ._timestream import Timestream
+from ._timestream import Literal
+import _ffi
 
 # TODO: Allow functions to return `pd.DataFrame` for struct arrays.
 FuncType = Callable[..., pd.Series]
 
-
-class Udf(object):
-    """Class wrapping a UDF used in Kaskada."""
-
-    def __init__(self, name, func: FuncType, signature: str) -> None:
-        """Create a UDF for a function returning a Pandas series.
-
-        Parameters
-        ----------
-        name : str
-            Name of the function being wrapped.
-
-        func : FuncType
-            The callable to wrap.
-
-        signature : str
-            The Kaskada function signature for this UDF. Will be used to check
-            types of calls to this function and propagate type information for
-            the rest of the query.
-        """
+class Udf:
+    def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
+        """Create a UDF with the given signature."""
         functools.update_wrapper(self, func)
-        self.name = name
-        self.func = func
-        self.signature = signature
-
-    def run_pyarrow(self, result_type: pa.DataType, *args: pa.Array) -> pa.Array:
-        """Run the function producing the given result type."""
-        # TODO: I believe this will return a series for simple arrays, and a
-        # dataframe for struct arrays. We should explore how this handles
-        # different types.
-        pd_args = [arg.to_pandas() for arg in args]
-        pd_result = self.func(*pd_args)
-
-        if isinstance(pd_result, pd.Series):
-            return pa.Array.from_pandas(pd_result, type=result_type)
-        else:
-            raise TypeError(f"Unsupported result type: {type(pd_result)}")
+        self._ffi = _ffi.Udf(signature, func)
 
+    def __call__(self, *args: Timestream | Literal) -> Timestream:
+        """Apply the UDF to the given arguments."""
+        Timestream._call(self._ffi, *args)
 
-def fenl_udf(name: str, signature: str):
+def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
 
     def decorator(func: FuncType):
-        return Udf(name, func, signature)
+        # 1. Convert the `FuncType` to the type expected by Udf.
+        # This needs to take the PyArrow result type and PyArrow inputs,
+        # convert them to Pandas, call the function, and convert the result
+        # back to PyArrow.
+        func = lambda *args: _converted_func(func, *args)
+
+        # 2. Create the UDF object.
+        return Udf(signature, func)
 
     return decorator
+
+def _converted_func(func: FuncType, result_type: pa.DataType, *args: pa.Array) -> pa.Array:
+    """Run the function producing the given result type."""
+    # TODO: I believe this will return a series for simple arrays, and a
+    # dataframe for struct arrays. We should explore how this handles
+    # different types.
+    pd_args = [arg.to_pandas() for arg in args]
+    pd_result = func(*pd_args)
+
+    if isinstance(pd_result, pd.Series):
+        return pa.Array.from_pandas(pd_result, type=result_type)
+    else:
+        raise TypeError(f"Unsupported result type: {type(pd_result)}")
\ No newline at end of file
diff --git a/python/pytests/udf_test.py b/python/pytests/udf_test.py
index c60b1f6ee..7ea34621d 100644
--- a/python/pytests/udf_test.py
+++ b/python/pytests/udf_test.py
@@ -1,26 +1,43 @@
 import pandas as pd
 import pyarrow as pa
-from kaskada._ffi import call_udf
-from kaskada.udf import Udf
-from kaskada.udf import fenl_udf
+import kaskada as kd
+import kaskdaa._ffi as _ffi
+import pytest
 
-
-@fenl_udf("add", "add(x: number, y: number) -> number")
+@kd.udf("add(x: number, y: number) -> number")
 def add(x: pd.Series, y: pd.Series) -> pd.Series:
+    """Use Pandas to add two numbers."""
     return x + y
 
 
-def test_numeric_udf_pure_python() -> None:
-    assert isinstance(add, Udf)
+def test_docstring() -> None:
+    assert add.__doc__ == "Use Pandas to add two numbers."    
+
+
+def test_udf_instance() -> None:
+    assert isinstance(add, _ffi.Udf)
 
-    x = pa.array([1, 12, 17, 23, 28], type=pa.int8())
-    y = pa.array([1, 13, 18, 20, 4], type=pa.int8())
-    result = add.run_pyarrow(pa.int8(), x, y)
-    assert result == pa.array([2, 25, 35, 43, 32], type=pa.int8())
+@pytest.fixture
+def source_int64() -> kd.sources.CsvString:
+    content = "\n".join(
+        [
+            "time,key,m,n",
+            "1996-12-19T16:39:57,A,5,10",
+            "1996-12-19T16:39:58,B,24,3",
+            "1996-12-19T16:39:59,A,17,6",
+            "1996-12-19T16:40:00,A,,9",
+            "1996-12-19T16:40:01,A,12,",
+            "1996-12-19T16:40:02,A,,",
+        ]
+    )
+    return kd.sources.CsvString(content, time_column_name="time", key_column_name="key")
 
+def test_add_udf_direct(golden, source_int64) -> None:
+    m = source_int64.col("m")
+    n = source_int64.col("n")
+    golden.jsonl(add(m, n))
 
-def test_numeric_udf_rust() -> None:
-    x = pa.array([1, 12, 17, 23, 28], type=pa.int8())
-    y = pa.array([1, 13, 18, 20, 4], type=pa.int8())
-    result = call_udf(add, pa.int8(), x, y)
-    assert result == pa.array([2, 25, 35, 43, 32], type=pa.int8())
+def test_add_udf_pipe(golden, source_int64) -> None:
+    m = source_int64.col("m")
+    n = source_int64.col("n")
+    golden.jsonl(m.pipe(add, n))
diff --git a/python/src/expr.rs b/python/src/expr.rs
index 0acc47464..582f64b5d 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -44,6 +44,68 @@ impl Expr {
         Ok(Self { rust_expr, session })
     }
 
+    // TODO: FRAZ
+    // Could you add a udf() method here that the _timestream calls?
+    // I want to be able to call a python function from the _timestream.
+    // I want to be able to do like polars:
+    // I like their `map` and `apply` method distinction -- `map` works on series,
+    // and `apply` works on individual values.
+    //
+    //
+    // TODO: Here is where you need to create your `Arc<PythonUdf>`.
+    // That means you need to
+    // 1. Infer arg types and result types from python signature
+    // 2. Infer FENL TYPES conversions (aka rust/data types kinda)
+    // 3. Create a SIGNATURE (Do arg names matter?)
+    // 4. Create Python Udf - signature + args?
+    //
+    // Signature inspection happens in _timestream.
+    // So I pass in arg_types and result_type, then construct signature here.
+
+    #[staticmethod]
+    #[pyo3(signature = (session, arg_types, result_type, args))]
+    fn udf(
+        session: Session,
+        arg_types: Vec<&str>,
+        result_type: &str,
+        args: Vec<Expr>,
+    ) -> PyResult<Self> {
+        if !args.iter().all(|e| e.session() == session) {
+            return Err(PyValueError::new_err(
+                "all arguments must be in the same session",
+            ));
+        }
+
+        let mut rust_session = session.rust_session()?;
+        let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
+        println!("arg_types: {:?}", arg_types);
+        println!("result_type: {:?}", result_type);
+
+        panic!("ok")
+
+        // The python signature
+    }
+
+    #[staticmethod]
+    #[pyo3(signature = (session, args))]
+    fn decorator_udf(
+        session: Session,
+        // TODO: signature?
+        args: Vec<Expr>,
+    ) -> PyResult<Self> {
+        if !args.iter().all(|e| e.session() == session) {
+            return Err(PyValueError::new_err(
+                "all arguments must be in the same session",
+            ));
+        }
+
+        let mut rust_session = session.rust_session()?;
+        let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
+        println!("arg_types: {:?}", arg_types);
+        println!("result_type: {:?}", result_type);
+        panic!("ok")
+    }
+
     #[staticmethod]
     #[pyo3(signature = (session, value))]
     fn literal(session: Session, value: Option<Arg>) -> PyResult<Self> {
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 957981d31..814656bf3 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -1,5 +1,6 @@
 use pyo3::prelude::*;
 
+mod call_udf;
 mod error;
 mod execution;
 mod expr;
@@ -16,7 +17,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
 #[pymodule]
 #[pyo3(name = "_ffi")]
 fn ffi(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
-    m.add_function(wrap_pyfunction!(udf::call_udf, m)?)?;
+    m.add_class::<udf::Udf>()?;
     m.add_class::<session::Session>()?;
     m.add_class::<expr::Expr>()?;
     m.add_class::<table::Table>()?;
diff --git a/python/src/udf.rs b/python/src/udf.rs
index 5557b4c80..b2854bf97 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -4,29 +4,98 @@ use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::prelude::*;
 use pyo3::types::PyTuple;
 
-#[pyfunction]
-#[pyo3(signature = (udf, result_type, *args))]
-pub(super) fn call_udf<'py>(
-    py: Python<'py>,
-    udf: &'py PyAny,
-    result_type: &'py PyAny,
-    args: &'py PyTuple,
-) -> PyResult<&'py PyAny> {
-    let result_type = DataType::from_pyarrow(result_type)?;
-
-    // 1. Make sure we can convert each input to and from arrow arrays.
-    let mut udf_args = Vec::with_capacity(args.len() + 1);
-    udf_args.push(result_type.to_pyarrow(py)?);
-    for arg in args {
-        let array_data = ArrayData::from_pyarrow(arg)?;
-        let py_array: PyObject = array_data.to_pyarrow(py)?;
-        udf_args.push(py_array);
+struct PyUdf {
+    signature: Signature,
+    callable: Py<PyAny>,
+    uuid: uuid::Uuid,
+}
+
+struct PyUdfEvaluator {
+    args: Vec<ValueRef>,
+    callable: Py<PyAny>,
+}
+
+#[derive(Clone)]
+#[pyclass]
+pub(crate) struct Udf(Arc<dyn sparrow_session::Udf>);
+
+impl sparrow_session::Udf for PyUdf {
+    fn signature(&self) -> &Signature {
+        &self.signatrue
     }
-    let args = PyTuple::new(py, udf_args);
-    let result = udf.call_method("run_pyarrow", args, None)?;
 
-    let array_data: ArrayData = ArrayData::from_pyarrow(result)?;
-    assert_eq!(array_data.data_type(), &result_type);
+    // FRAZ: Should we allow this to return an error?
+    fn make_evaluator(&self, static_info: StaticInfo<'_>) -> Box<dyn Evaluator> {
+        // Idea: Create a random value of each argument type and verify that the callbale
+        // is invokable on that. This would let us detect errors early.
 
-    Ok(result)
+        // FRAZ: Get the value refs for each static arg.
+        Box::new(PyUdfEvaluator {
+            args,
+            callable: self.callable.clone(),
+        })
+    }
+
+    fn uuid(&self) -> &uuid::Uuid {
+        &self.uuid
+    }
+}
+
+impl Evaluator for PyUdfEvaluator {
+    /// Evaluate the function with the given runtime info.
+    fn evaluate(&self, work_area: &WorkArea<'_>) -> error_stack::Result<ArrayRef, Error> {
+        let result = Python::with_gil(|py| -> PyResult<()> { evaluate_py(py, args) });
+        // FRAZ: handle python errors.
+        Ok(result)
+    }
 }
+
+impl PyUdfEvaluator {
+    fn evaluate_py<'py>(&self, py: Python<'py>, args: Vec<ArrayRef>) -> ArrayRef {
+        // FRAZ: Fill in based on `call_udf`
+        todo!()
+    }
+}
+
+#[pymethods]
+impl Udf {
+    #[new]
+    fn new(signature: String, callable: Py<PyAny>) -> Self {
+        // FRAZ: Parse the signature and create an instance of PyUdf.
+        // Then, create an `Arc<dyn Udf>` from that and pass that to
+        // Udf.
+        let udf = PyUdf {
+            signature: todo!(),
+            callable,
+            uuid: todo!(),
+        };
+        Udf(Arc::new(udf))
+    }
+}
+
+// #[pyfunction]
+// #[pyo3(signature = (udf, result_type, *args))]
+// pub(super) fn call_udf<'py>(
+//     py: Python<'py>,
+//     udf: &'py PyAny,
+//     result_type: &'py PyAny,
+//     args: &'py PyTuple,
+// ) -> PyResult<&'py PyAny> {
+//     let result_type = DataType::from_pyarrow(result_type)?;
+
+//     // 1. Make sure we can convert each input to and from arrow arrays.
+//     let mut udf_args = Vec::with_capacity(args.len() + 1);
+//     udf_args.push(result_type.to_pyarrow(py)?);
+//     for arg in args {
+//         let array_data = ArrayData::from_pyarrow(arg)?;
+//         let py_array: PyObject = array_data.to_pyarrow(py)?;
+//         udf_args.push(py_array);
+//     }
+//     let args = PyTuple::new(py, udf_args);
+//     let result = udf.call_method("run_pyarrow", args, None)?;
+
+//     let array_data: ArrayData = ArrayData::from_pyarrow(result)?;
+//     assert_eq!(array_data.data_type(), &result_type);
+
+//     Ok(result)
+// }

From 01b0c188996104dccb2f78e7e4f865dec2525082 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Tue, 22 Aug 2023 22:15:49 -0700
Subject: [PATCH 02/18] few more plubming changes

---
 crates/sparrow-expressions/src/evaluator.rs   |   2 +-
 crates/sparrow-expressions/src/work_area.rs   |   2 +-
 crates/sparrow-instructions/src/evaluators.rs |   2 +-
 crates/sparrow-runtime/src/execute.rs         |   1 +
 .../sparrow-runtime/src/execute/operation.rs  |   1 +
 .../execute/operation/expression_executor.rs  |   2 +-
 crates/sparrow-session/src/lib.rs             |   2 +-
 crates/sparrow-syntax/src/syntax/signature.rs |   1 -
 python/Cargo.lock                             |   3 +-
 python/Cargo.toml                             |   3 +
 python/pysrc/kaskada/__init__.py              |   6 +-
 python/pysrc/kaskada/_ffi.pyi                 |   2 +-
 python/pysrc/kaskada/_timestream.py           | 140 +++++++++---------
 python/pysrc/kaskada/udf.py                   |   3 +-
 python/pytests/udf_test.py                    |   7 +-
 python/src/expr.rs                            |  62 --------
 python/src/lib.rs                             |   1 -
 python/src/udf.rs                             | 113 ++++++++------
 18 files changed, 164 insertions(+), 189 deletions(-)

diff --git a/crates/sparrow-expressions/src/evaluator.rs b/crates/sparrow-expressions/src/evaluator.rs
index 2e15bade3..1dec9df79 100644
--- a/crates/sparrow-expressions/src/evaluator.rs
+++ b/crates/sparrow-expressions/src/evaluator.rs
@@ -4,7 +4,7 @@ use crate::work_area::WorkArea;
 use crate::Error;
 
 /// Trait for evaluating an individual expression node.
-pub trait Evaluator: Send + Sync {
+pub(super) trait Evaluator: Send + Sync {
     /// Evaluate the function with the given runtime info.
     fn evaluate(&self, work_area: &WorkArea<'_>) -> error_stack::Result<ArrayRef, Error>;
 }
diff --git a/crates/sparrow-expressions/src/work_area.rs b/crates/sparrow-expressions/src/work_area.rs
index 8ab9f923e..444b614cb 100644
--- a/crates/sparrow-expressions/src/work_area.rs
+++ b/crates/sparrow-expressions/src/work_area.rs
@@ -4,7 +4,7 @@ use sparrow_arrow::Batch;
 use crate::values::WorkAreaValue;
 
 /// Information about an in-progress batch used for evaluation.
-pub struct WorkArea<'a> {
+pub(super) struct WorkArea<'a> {
     pub input: &'a Batch,
     pub expressions: Vec<ArrayRef>,
 }
diff --git a/crates/sparrow-instructions/src/evaluators.rs b/crates/sparrow-instructions/src/evaluators.rs
index 721f4c0e1..719748aa7 100644
--- a/crates/sparrow-instructions/src/evaluators.rs
+++ b/crates/sparrow-instructions/src/evaluators.rs
@@ -44,7 +44,7 @@ use time::*;
 #[derive(Debug)]
 pub struct StaticInfo<'a> {
     inst_kind: &'a InstKind,
-    args: Vec<StaticArg>,
+    pub args: Vec<StaticArg>,
     result_type: &'a DataType,
 }
 
diff --git a/crates/sparrow-runtime/src/execute.rs b/crates/sparrow-runtime/src/execute.rs
index 3b284df57..915057ef1 100644
--- a/crates/sparrow-runtime/src/execute.rs
+++ b/crates/sparrow-runtime/src/execute.rs
@@ -21,6 +21,7 @@ use crate::execute::output::Destination;
 use crate::key_hash_inverse::{KeyHashInverse, ThreadSafeKeyHashInverse};
 use crate::stores::ObjectStoreRegistry;
 use crate::RuntimeOptions;
+pub use operation::WorkArea;
 
 mod checkpoints;
 mod compute_executor;
diff --git a/crates/sparrow-runtime/src/execute/operation.rs b/crates/sparrow-runtime/src/execute/operation.rs
index 600cb8abb..f7d09c0c4 100644
--- a/crates/sparrow-runtime/src/execute/operation.rs
+++ b/crates/sparrow-runtime/src/execute/operation.rs
@@ -66,6 +66,7 @@ use crate::execute::Error;
 use crate::key_hash_inverse::ThreadSafeKeyHashInverse;
 use crate::stores::ObjectStoreRegistry;
 use crate::Batch;
+pub use expression_executor::WorkArea;
 
 /// Information used while creating operations.
 ///
diff --git a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
index 75d1f4c28..33b6c4944 100644
--- a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
+++ b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
@@ -263,7 +263,7 @@ impl ExpressionExecutor {
 }
 
 /// Wrapper around the columns produced during execution.
-struct WorkArea {
+pub struct WorkArea {
     time: ArrayRef,
     subsort: ArrayRef,
     key_hash: ArrayRef,
diff --git a/crates/sparrow-session/src/lib.rs b/crates/sparrow-session/src/lib.rs
index 22caadd2d..a06a88662 100644
--- a/crates/sparrow-session/src/lib.rs
+++ b/crates/sparrow-session/src/lib.rs
@@ -10,4 +10,4 @@ pub use expr::{Expr, Literal};
 pub use session::{ExecutionOptions, Session};
 pub use table::Table;
 
-pub type Udf = sparrow_instructions::Udf;
+// pub type Udf = dyn sparrow_instructions::Udf;
diff --git a/crates/sparrow-syntax/src/syntax/signature.rs b/crates/sparrow-syntax/src/syntax/signature.rs
index c20942388..7083b5678 100644
--- a/crates/sparrow-syntax/src/syntax/signature.rs
+++ b/crates/sparrow-syntax/src/syntax/signature.rs
@@ -189,7 +189,6 @@ impl Signature {
                 self.name,
                 self.parameters.names().len(),
                 num_args,
-                me
             )
         }
     }
diff --git a/python/Cargo.lock b/python/Cargo.lock
index 5bdff94f2..0dae8ea3c 100644
--- a/python/Cargo.lock
+++ b/python/Cargo.lock
@@ -1963,6 +1963,7 @@ dependencies = [
 name = "kaskada"
 version = "0.6.0-a.1"
 dependencies = [
+ "anyhow",
  "arrow",
  "derive_more",
  "error-stack",
@@ -1971,8 +1972,8 @@ dependencies = [
  "mimalloc",
  "pyo3",
  "pyo3-asyncio",
- "sparrow-compiler",
  "sparrow-instructions",
+ "sparrow-runtime",
  "sparrow-session",
  "sparrow-syntax",
  "tokio",
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 49c250868..e681c28d0 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -13,6 +13,7 @@ Python library for building and executing temporal queries.
 
 [dependencies]
 arrow = { version = "43.0.0", features = ["pyarrow"] }
+anyhow = { version = "1.0.70", features = ["backtrace"] }
 derive_more = "0.99.17"
 error-stack = { version = "0.3.1", features = ["anyhow", "spantrace"] }
 futures = "0.3.27"
@@ -23,6 +24,8 @@ mimalloc = { version = "0.1.37", default-features = false, features = ["local_dy
 pyo3 = {version = "0.19.1", features = ["abi3-py38", "extension-module", "generate-import-lib"]}
 pyo3-asyncio = { version = "0.19.0", features = ["tokio-runtime"] }
 sparrow-session = { path = "../crates/sparrow-session" }
+sparrow-instructions = { path = "../crates/sparrow-instructions" }
+sparrow-runtime = { path = "../crates/sparrow-runtime" }
 sparrow-syntax = { path = "../crates/sparrow-syntax" }
 tokio = { version = "1.27.0", features = ["sync"] }
 tracing = "0.1.37"
diff --git a/python/pysrc/kaskada/__init__.py b/python/pysrc/kaskada/__init__.py
index 4534b480e..5c2d73fe5 100644
--- a/python/pysrc/kaskada/__init__.py
+++ b/python/pysrc/kaskada/__init__.py
@@ -1,4 +1,4 @@
-"""Kaskada query builder and local executon engine."""
+"""Kaskada query builder and local execution engine."""
 from __future__ import annotations
 
 from . import plot
@@ -10,7 +10,8 @@
 from ._timestream import Literal
 from ._timestream import Timestream
 from ._timestream import record
-
+from .udf import Udf
+from .udf import udf
 
 __all__ = [
     "ExecutionOptions",
@@ -21,5 +22,6 @@
     "Result",
     "sources",
     "Timestream",
+    "udf",
     "windows",
 ]
diff --git a/python/pysrc/kaskada/_ffi.pyi b/python/pysrc/kaskada/_ffi.pyi
index 70ccf94ec..d77c3ae58 100644
--- a/python/pysrc/kaskada/_ffi.pyi
+++ b/python/pysrc/kaskada/_ffi.pyi
@@ -1,7 +1,7 @@
 from typing import Callable
 from typing import List
 from typing import Optional
-from typing import Sequencem
+from typing import Sequence
 
 import pyarrow as pa
 
diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py
index 58b44ee57..a074c0bd3 100644
--- a/python/pysrc/kaskada/_timestream.py
+++ b/python/pysrc/kaskada/_timestream.py
@@ -67,70 +67,70 @@ def _literal(value: Literal, session: _ffi.Session) -> Timestream:
             raise TypeError("Cannot create a literal Timestream from a datetime")
         return Timestream(_ffi.Expr.literal(session, value))
 
-    @staticmethod
-    def _test_apply_udf(
-        signature: inspect.Signature,
-        *args: Union[Timestream, Literal],
-        session: Optional[_ffi.Session] = None,
-    ) -> Timestream:
-        """
-        todo
-        """
-        if session is None:
-            session = next(
-                arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
-            )
-
-        def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
-            if isinstance(arg, Timestream):
-                return arg._ffi_expr
-            else:
-                return Timestream._literal(arg, session)._ffi_expr
-
-        ffi_args = [make_arg(arg) for arg in args]
-
-        arg_types = [param.annotation for param in signature.parameters.values()]
-        result_type = signature.return_annotation
-
-        try:
-            return Timestream(
-                _ffi.Expr.udf(session=session, arg_types=arg_types, result_type=result_type, args=ffi_args)
-            )
-        except TypeError as e:
-            # noqa: DAR401
-            raise _augment_error(args, TypeError(str(e))) from e
-        except ValueError as e:
-            raise _augment_error(args, ValueError(str(e))) from e
-
-    @staticmethod
-    def _test_decorator_udf(
-        signature: inspect.Signature,
-        *args: Union[Timestream, Literal],
-        session: Optional[_ffi.Session] = None,
-    ) -> Timestream:
-        """
-        todo
-        """
-        if session is None:
-            session = next(
-                arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
-            )
-
-        def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
-            if isinstance(arg, Timestream):
-                return arg._ffi_expr
-            else:
-                return Timestream._literal(arg, session)._ffi_expr
-
-        try:
-            return Timestream(
-                _ffi.Expr.decorator_udf(session=session, args=ffi_args)
-            )
-        except TypeError as e:
-            # noqa: DAR401
-            raise _augment_error(args, TypeError(str(e))) from e
-        except ValueError as e:
-            raise _augment_error(args, ValueError(str(e))) from e
+    # @staticmethod
+    # def _test_apply_udf(
+    #     signature: inspect.Signature,
+    #     *args: Union[Timestream, Literal],
+    #     session: Optional[_ffi.Session] = None,
+    # ) -> Timestream:
+    #     """
+    #     todo
+    #     """
+    #     if session is None:
+    #         session = next(
+    #             arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
+    #         )
+
+    #     def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
+    #         if isinstance(arg, Timestream):
+    #             return arg._ffi_expr
+    #         else:
+    #             return Timestream._literal(arg, session)._ffi_expr
+
+    #     ffi_args = [make_arg(arg) for arg in args]
+
+    #     arg_types = [param.annotation for param in signature.parameters.values()]
+    #     result_type = signature.return_annotation
+
+    #     try:
+    #         return Timestream(
+    #             _ffi.Expr.udf(session=session, arg_types=arg_types, result_type=result_type, args=ffi_args)
+    #         )
+    #     except TypeError as e:
+    #         # noqa: DAR401
+    #         raise _augment_error(args, TypeError(str(e))) from e
+    #     except ValueError as e:
+    #         raise _augment_error(args, ValueError(str(e))) from e
+
+    # @staticmethod
+    # def _test_decorator_udf(
+    #     signature: inspect.Signature,
+    #     *args: Union[Timestream, Literal],
+    #     session: Optional[_ffi.Session] = None,
+    # ) -> Timestream:
+    #     """
+    #     todo
+    #     """
+    #     if session is None:
+    #         session = next(
+    #             arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
+    #         )
+
+    #     def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
+    #         if isinstance(arg, Timestream):
+    #             return arg._ffi_expr
+    #         else:
+    #             return Timestream._literal(arg, session)._ffi_expr
+
+    #     try:
+    #         return Timestream(
+    #             _ffi.Expr.decorator_udf(session=session, args=ffi_args)
+    #         )
+    #     except TypeError as e:
+    #         # noqa: DAR401
+    #         raise _augment_error(args, TypeError(str(e))) from e
+    #     except ValueError as e:
+    #         raise _augment_error(args, ValueError(str(e))) from e
 
 
 
@@ -271,13 +271,13 @@ def pipe(
 
 # TODO: How does udf work? We kind of want it like... apply some function on an input? 
 # But also want like `Timestream.udf(f, args*)`
-    def udf(callable: Callable, *args: Timestream) -> Timestream: 
-        """
-        todo
-        """
-        signature = inspect.signature(callable)
+    # def udf(callable: Callable, *args: Timestream) -> Timestream: 
+    #     """
+    #     todo
+    #     """
+    #     signature = inspect.signature(callable)
 
-        return Timestream._test_udf(signature, args)
+    #     return Timestream._test_udf(signature, args)
 
     def add(self, rhs: Union[Timestream, Literal]) -> Timestream:
         """
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 4cb71642d..df699bb62 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -9,7 +9,7 @@
 
 from ._timestream import Timestream
 from ._timestream import Literal
-import _ffi
+from . import _ffi
 
 # TODO: Allow functions to return `pd.DataFrame` for struct arrays.
 FuncType = Callable[..., pd.Series]
@@ -22,6 +22,7 @@ def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
 
     def __call__(self, *args: Timestream | Literal) -> Timestream:
         """Apply the UDF to the given arguments."""
+        # TODO: Timestream>_udf_call?
         Timestream._call(self._ffi, *args)
 
 def udf(signature: str):
diff --git a/python/pytests/udf_test.py b/python/pytests/udf_test.py
index 7ea34621d..f58fe906b 100644
--- a/python/pytests/udf_test.py
+++ b/python/pytests/udf_test.py
@@ -1,10 +1,10 @@
 import pandas as pd
 import pyarrow as pa
 import kaskada as kd
-import kaskdaa._ffi as _ffi
+import kaskada._ffi as _ffi
 import pytest
 
-@kd.udf("add(x: number, y: number) -> number")
+@kd.udf("add<N: number>(x: N, y: N) -> N")
 def add(x: pd.Series, y: pd.Series) -> pd.Series:
     """Use Pandas to add two numbers."""
     return x + y
@@ -15,7 +15,8 @@ def test_docstring() -> None:
 
 
 def test_udf_instance() -> None:
-    assert isinstance(add, _ffi.Udf)
+    assert isinstance(add, kd.Udf)
+    assert isinstance(add._ffi, _ffi.Udf)
 
 @pytest.fixture
 def source_int64() -> kd.sources.CsvString:
diff --git a/python/src/expr.rs b/python/src/expr.rs
index 582f64b5d..0acc47464 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -44,68 +44,6 @@ impl Expr {
         Ok(Self { rust_expr, session })
     }
 
-    // TODO: FRAZ
-    // Could you add a udf() method here that the _timestream calls?
-    // I want to be able to call a python function from the _timestream.
-    // I want to be able to do like polars:
-    // I like their `map` and `apply` method distinction -- `map` works on series,
-    // and `apply` works on individual values.
-    //
-    //
-    // TODO: Here is where you need to create your `Arc<PythonUdf>`.
-    // That means you need to
-    // 1. Infer arg types and result types from python signature
-    // 2. Infer FENL TYPES conversions (aka rust/data types kinda)
-    // 3. Create a SIGNATURE (Do arg names matter?)
-    // 4. Create Python Udf - signature + args?
-    //
-    // Signature inspection happens in _timestream.
-    // So I pass in arg_types and result_type, then construct signature here.
-
-    #[staticmethod]
-    #[pyo3(signature = (session, arg_types, result_type, args))]
-    fn udf(
-        session: Session,
-        arg_types: Vec<&str>,
-        result_type: &str,
-        args: Vec<Expr>,
-    ) -> PyResult<Self> {
-        if !args.iter().all(|e| e.session() == session) {
-            return Err(PyValueError::new_err(
-                "all arguments must be in the same session",
-            ));
-        }
-
-        let mut rust_session = session.rust_session()?;
-        let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
-        println!("arg_types: {:?}", arg_types);
-        println!("result_type: {:?}", result_type);
-
-        panic!("ok")
-
-        // The python signature
-    }
-
-    #[staticmethod]
-    #[pyo3(signature = (session, args))]
-    fn decorator_udf(
-        session: Session,
-        // TODO: signature?
-        args: Vec<Expr>,
-    ) -> PyResult<Self> {
-        if !args.iter().all(|e| e.session() == session) {
-            return Err(PyValueError::new_err(
-                "all arguments must be in the same session",
-            ));
-        }
-
-        let mut rust_session = session.rust_session()?;
-        let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
-        println!("arg_types: {:?}", arg_types);
-        println!("result_type: {:?}", result_type);
-        panic!("ok")
-    }
-
     #[staticmethod]
     #[pyo3(signature = (session, value))]
     fn literal(session: Session, value: Option<Arg>) -> PyResult<Self> {
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 814656bf3..e18cda982 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -1,6 +1,5 @@
 use pyo3::prelude::*;
 
-mod call_udf;
 mod error;
 mod execution;
 mod expr;
diff --git a/python/src/udf.rs b/python/src/udf.rs
index b2854bf97..db9c525de 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -1,9 +1,17 @@
-use arrow::array::ArrayData;
+use std::fmt::Debug;
+use std::sync::Arc;
+use itertools::Itertools;
+
+use arrow::array::{ArrayData, ArrayRef};
 use arrow::datatypes::DataType;
 use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::prelude::*;
 use pyo3::types::PyTuple;
+use sparrow_instructions::{Evaluator, RuntimeInfo, StaticInfo, ValueRef};
+use sparrow_syntax::{FeatureSetPart, Signature};
+use uuid::Uuid;
 
+#[derive(Debug)]
 struct PyUdf {
     signature: Signature,
     callable: Py<PyAny>,
@@ -17,18 +25,21 @@ struct PyUdfEvaluator {
 
 #[derive(Clone)]
 #[pyclass]
-pub(crate) struct Udf(Arc<dyn sparrow_session::Udf>);
+pub(crate) struct Udf(Arc<dyn sparrow_instructions::Udf>);
 
-impl sparrow_session::Udf for PyUdf {
+impl sparrow_instructions::Udf for PyUdf {
     fn signature(&self) -> &Signature {
-        &self.signatrue
+        &self.signature
     }
 
     // FRAZ: Should we allow this to return an error?
-    fn make_evaluator(&self, static_info: StaticInfo<'_>) -> Box<dyn Evaluator> {
+    fn make_evaluator(&self, info: StaticInfo<'_>) -> Box<dyn Evaluator> {
+        println!("Making udf evaluator");
         // Idea: Create a random value of each argument type and verify that the callbale
         // is invokable on that. This would let us detect errors early.
 
+        let args = info.args.iter().map(|arg| arg.value_ref.clone()).collect();
+
         // FRAZ: Get the value refs for each static arg.
         Box::new(PyUdfEvaluator {
             args,
@@ -42,9 +53,17 @@ impl sparrow_session::Udf for PyUdf {
 }
 
 impl Evaluator for PyUdfEvaluator {
-    /// Evaluate the function with the given runtime info.
-    fn evaluate(&self, work_area: &WorkArea<'_>) -> error_stack::Result<ArrayRef, Error> {
-        let result = Python::with_gil(|py| -> PyResult<()> { evaluate_py(py, args) });
+    fn evaluate(&mut self, info: &dyn RuntimeInfo) -> anyhow::Result<ArrayRef> {
+        let args = self.args
+            .iter()
+            .map(|value_ref| -> anyhow::Result<_> {
+                Ok(info.value(value_ref)?.array_ref()?)
+            }).try_collect()?;
+ 
+        let result = Python::with_gil(|py| -> anyhow::Result<ArrayRef> {
+            // todo: uhh
+            Ok(self.evaluate_py(py, args))
+        })?;
         // FRAZ: handle python errors.
         Ok(result)
     }
@@ -52,8 +71,45 @@ impl Evaluator for PyUdfEvaluator {
 
 impl PyUdfEvaluator {
     fn evaluate_py<'py>(&self, py: Python<'py>, args: Vec<ArrayRef>) -> ArrayRef {
-        // FRAZ: Fill in based on `call_udf`
-        todo!()
+        println!("Evaluating python");
+
+        // self.callable.call1(py, (args,)).unwrap().extract(py).unwrap();
+        // let args: Vec<PyObject> = args 
+        //     .iter()
+        //     .map::<anyhow::Result<_>, _>(|value_ref| {
+        //         let value: ColumnarValue = info.value(value_ref)?;
+        //         let array_ref: ArrayRef = value.array_ref()?;
+        //         let array_data: ArrayData = aray_ref.to_data();
+        //         let py_obj: PyObject = array_data.to_pyarrow(py)?;
+
+        //         Ok(py_obj)
+        //     })
+        //     .collect::<anyhow::Result<_>>()?;
+
+        // let locals = [("args", args)].into_py_dict(py);
+        // py.run(&self.code, None, Some(&locals))?;
+        // let result_py: PyObject = locals.get_item("ret").unwrap().into();
+
+        // let result_array_data: ArrayData = ArrayData::from_pyarrow(result_py.as_ref(py))?;
+
+        // // We can't control the returned type, but we can refuse to accept the "wrong" type.
+        // ensure!(
+        //     *result_array_data.data_type() == self.result_type,
+        //     "expected Python UDF to return type {:?}, but received {:?}",
+        //     result_array_data.data_type(),
+        //     self.result_type
+        // );
+
+        // let result_array_ref: ArrayRef = array::make_array(result_array_data);
+
+        // // This is a "map" operation - each row should reflect the time of the
+        // // corresponding input row, and have the same length.
+        // ensure!(
+        //     result_array_ref.len() == info.num_rows(),
+        //     "unexpected result length from Python UDF"
+        // );
+
+        panic!("arrayref")
     }
 }
 
@@ -61,41 +117,14 @@ impl PyUdfEvaluator {
 impl Udf {
     #[new]
     fn new(signature: String, callable: Py<PyAny>) -> Self {
-        // FRAZ: Parse the signature and create an instance of PyUdf.
-        // Then, create an `Arc<dyn Udf>` from that and pass that to
-        // Udf.
+        let uuid = Uuid::new_v4();
+        // TODO: sig name string cannot be &'static str
+        let signature = Signature::try_from_str(FeatureSetPart::Function("udf"), &signature).expect("signature to parse");
         let udf = PyUdf {
-            signature: todo!(),
+            signature,
             callable,
-            uuid: todo!(),
+            uuid,
         };
         Udf(Arc::new(udf))
     }
 }
-
-// #[pyfunction]
-// #[pyo3(signature = (udf, result_type, *args))]
-// pub(super) fn call_udf<'py>(
-//     py: Python<'py>,
-//     udf: &'py PyAny,
-//     result_type: &'py PyAny,
-//     args: &'py PyTuple,
-// ) -> PyResult<&'py PyAny> {
-//     let result_type = DataType::from_pyarrow(result_type)?;
-
-//     // 1. Make sure we can convert each input to and from arrow arrays.
-//     let mut udf_args = Vec::with_capacity(args.len() + 1);
-//     udf_args.push(result_type.to_pyarrow(py)?);
-//     for arg in args {
-//         let array_data = ArrayData::from_pyarrow(arg)?;
-//         let py_array: PyObject = array_data.to_pyarrow(py)?;
-//         udf_args.push(py_array);
-//     }
-//     let args = PyTuple::new(py, udf_args);
-//     let result = udf.call_method("run_pyarrow", args, None)?;
-
-//     let array_data: ArrayData = ArrayData::from_pyarrow(result)?;
-//     assert_eq!(array_data.data_type(), &result_type);
-
-//     Ok(result)
-// }

From e0782a2675e16d461a43dcbeae095aacd9b65cf9 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Tue, 22 Aug 2023 23:18:31 -0700
Subject: [PATCH 03/18] Add call_udf method to expr

---
 crates/sparrow-session/src/session.rs |   5 +-
 python/pysrc/kaskada/_ffi.pyi         |   7 ++
 python/pysrc/kaskada/_timestream.py   | 114 ++++++++++++--------------
 python/pysrc/kaskada/udf.py           |   2 +-
 python/src/expr.rs                    |  29 +++++++
 python/src/udf.rs                     |   4 +-
 6 files changed, 93 insertions(+), 68 deletions(-)

diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index e5b616342..89efb8dee 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -291,12 +291,11 @@ impl Session {
     ///
     /// This bypasses much of the plumbing of the [ExprOp] required due to our construction
     /// of the AST.
-    #[allow(unused)]
     pub fn add_udf_to_dfg(
         &mut self,
         udf: Arc<dyn Udf>,
         args: Vec<Expr>,
-    ) -> error_stack::Result<AstDfgRef, Error> {
+    ) -> error_stack::Result<Expr, Error> {
         let signature = udf.signature();
         let arg_names = signature.arg_names().to_owned();
         signature.assert_valid_argument_count(args.len());
@@ -334,7 +333,7 @@ impl Session {
                 .collect();
             Err(Error::Errors(errors))?
         } else {
-            Ok(result)
+            Ok(Expr(result))
         }
     }
 
diff --git a/python/pysrc/kaskada/_ffi.pyi b/python/pysrc/kaskada/_ffi.pyi
index d77c3ae58..aabc16f6a 100644
--- a/python/pysrc/kaskada/_ffi.pyi
+++ b/python/pysrc/kaskada/_ffi.pyi
@@ -25,6 +25,12 @@ class Expr:
         args: Sequence[Expr],
     ) -> Expr: ...
     @staticmethod
+    def call_udf(
+        session: Session,
+        udf: Udf, # TODO: ffi udf? 
+        args: Sequence[Expr],
+    ) -> Expr: ...
+    @staticmethod
     def literal(
         session: Session,
         value: int | float | str | None,
@@ -35,6 +41,7 @@ class Expr:
     def session(self) -> Session: ...
     def execute(self, options: Optional[ExecutionOptions] = None) -> Execution: ...
     def grouping(self) -> Optional[str]: ...
+    
 
 class Table(Expr):
     def __init__(
diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py
index a074c0bd3..fb00abfba 100644
--- a/python/pysrc/kaskada/_timestream.py
+++ b/python/pysrc/kaskada/_timestream.py
@@ -67,72 +67,61 @@ def _literal(value: Literal, session: _ffi.Session) -> Timestream:
             raise TypeError("Cannot create a literal Timestream from a datetime")
         return Timestream(_ffi.Expr.literal(session, value))
 
-    # @staticmethod
-    # def _test_apply_udf(
-    #     signature: inspect.Signature,
-    #     *args: Union[Timestream, Literal],
-    #     session: Optional[_ffi.Session] = None,
-    # ) -> Timestream:
-    #     """
-    #     todo
-    #     """
-    #     if session is None:
-    #         session = next(
-    #             arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
-    #         )
-
-    #     def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
-    #         if isinstance(arg, Timestream):
-    #             return arg._ffi_expr
-    #         else:
-    #             return Timestream._literal(arg, session)._ffi_expr
-
-    #     ffi_args = [make_arg(arg) for arg in args]
-
-    #     arg_types = [param.annotation for param in signature.parameters.values()]
-    #     result_type = signature.return_annotation
-
-    #     try:
-    #         return Timestream(
-    #             _ffi.Expr.udf(session=session, arg_types=arg_types, result_type=result_type, args=ffi_args)
-    #         )
-    #     except TypeError as e:
-    #         # noqa: DAR401
-    #         raise _augment_error(args, TypeError(str(e))) from e
-    #     except ValueError as e:
-    #         raise _augment_error(args, ValueError(str(e))) from e
-
-    # @staticmethod
-    # def _test_decorator_udf(
-    #     signature: inspect.Signature,
-    #     *args: Union[Timestream, Literal],
-    #     session: Optional[_ffi.Session] = None,
-    # ) -> Timestream:
-    #     """
-    #     todo
-    #     """
-    #     if session is None:
-    #         session = next(
-    #             arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
-    #         )
+    @staticmethod
+    def _call_udf(
+        udf: _ffi.Udf,
+        *args: Union[Timestream, Literal],
+        session: Optional[_ffi.Session] = None,
+    ) -> Timestream:
+        """
+        Construct a new Timestream by calling the given function.
+
+        Parameters
+        ----------
+        func : str
+            Name of the function to apply.
+        *args : Timestream | int | str | float | bool | None
+            List of arguments to the expression.
+        session : FFI Session
+            FFI Session to create the expression in.
+            If unspecified, will infer from the arguments.
+            Will fail if all arguments are literals and the session is not provided.
+
+        Returns
+        -------
+        Timestream
+            Timestream representing the result of the function applied to the arguments.
 
-    #     def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
-    #         if isinstance(arg, Timestream):
-    #             return arg._ffi_expr
-    #         else:
-    #             return Timestream._literal(arg, session)._ffi_expr
+        Raises
+        ------
+        # noqa: DAR401 _augment_error
+        TypeError
+            If the argument types are invalid for the given function.
+        ValueError
+            If the argument values are invalid for the given function.
+        """
+        if session is None:
+            session = next(
+                arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
+            )
 
-    #     try:
-    #         return Timestream(
-    #             _ffi.Expr.decorator_udf(session=session, args=ffi_args)
-    #         )
-    #     except TypeError as e:
-    #         # noqa: DAR401
-    #         raise _augment_error(args, TypeError(str(e))) from e
-    #     except ValueError as e:
-    #         raise _augment_error(args, ValueError(str(e))) from e
+        def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
+            if isinstance(arg, Timestream):
+                return arg._ffi_expr
+            else:
+                return Timestream._literal(arg, session)._ffi_expr
 
+        ffi_args = [make_arg(arg) for arg in args]
 
+        try:
+            return Timestream(
+                _ffi.Expr.call_udf(session=session, udf=udf, args=ffi_args)
+            )
+        except TypeError as e:
+            # noqa: DAR401
+            raise _augment_error(args, TypeError(str(e))) from e
+        except ValueError as e:
+            raise _augment_error(args, ValueError(str(e))) from e
 
 
     @staticmethod
@@ -182,7 +171,6 @@ def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
         ffi_args = [make_arg(arg) for arg in args]
         try:
             return Timestream(
-                # TODO: FRAZ - so I need a `call` that can take the udf
                 _ffi.Expr.call(session=session, operation=func, args=ffi_args)
             )
         except TypeError as e:
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index df699bb62..6121d0f8b 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -23,7 +23,7 @@ def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
     def __call__(self, *args: Timestream | Literal) -> Timestream:
         """Apply the UDF to the given arguments."""
         # TODO: Timestream>_udf_call?
-        Timestream._call(self._ffi, *args)
+        Timestream._call_udf(self._ffi, *args)
 
 def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
diff --git a/python/src/expr.rs b/python/src/expr.rs
index 0acc47464..4a19ab3c9 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -1,10 +1,12 @@
 use crate::error::Result;
 use crate::execution::Execution;
 use crate::session::Session;
+use crate::udf::Udf;
 use arrow::datatypes::DataType;
 use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
+use std::sync::Arc;
 use sparrow_session::{Expr as RustExpr, Literal, Session as RustSession};
 
 /// Kaskada expression node.
@@ -44,6 +46,33 @@ impl Expr {
         Ok(Self { rust_expr, session })
     }
 
+    /// Create a new udf.
+    ///
+    /// This creates a new udf based on the `udf` and `args` provided.
+    #[staticmethod]
+    #[pyo3(signature = (session, udf, args))]
+    fn call_udf(session: Session, udf: Udf, args: Vec<Expr>) -> PyResult<Self> {
+        let sparrow_udf = udf.0;
+        if !args.iter().all(|e| e.session() == session) {
+            return Err(PyValueError::new_err(
+                "all arguments must be in the same session",
+            ));
+        }
+
+        let mut rust_session = session.rust_session()?;
+        let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
+        let rust_expr = match rust_session.add_udf_to_dfg(sparrow_udf.clone(), args) {
+            Ok(node) => node,
+            Err(e) => {
+                // DO NOT SUBMIT: Better error handling.
+                return Err(PyValueError::new_err(e.to_string()));
+            }
+        };
+        std::mem::drop(rust_session);
+
+        Ok(Self { rust_expr, session })
+    }
+
     #[staticmethod]
     #[pyo3(signature = (session, value))]
     fn literal(session: Session, value: Option<Arg>) -> PyResult<Self> {
diff --git a/python/src/udf.rs b/python/src/udf.rs
index db9c525de..bfffdf640 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -25,7 +25,8 @@ struct PyUdfEvaluator {
 
 #[derive(Clone)]
 #[pyclass]
-pub(crate) struct Udf(Arc<dyn sparrow_instructions::Udf>);
+// TODO: Does it need to extend Expr for some reason? 
+pub(crate) struct Udf(pub Arc<dyn sparrow_instructions::Udf>);
 
 impl sparrow_instructions::Udf for PyUdf {
     fn signature(&self) -> &Signature {
@@ -116,6 +117,7 @@ impl PyUdfEvaluator {
 #[pymethods]
 impl Udf {
     #[new]
+    #[pyo3(signature = (signature, callable))]
     fn new(signature: String, callable: Py<PyAny>) -> Self {
         let uuid = Uuid::new_v4();
         // TODO: sig name string cannot be &'static str

From f75050f976180c8507b459de87c2a236978de41a Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Wed, 23 Aug 2023 00:19:53 -0700
Subject: [PATCH 04/18] Fix is_new and value mixup

---
 crates/sparrow-compiler/src/ast_to_dfg.rs             |  5 +++--
 .../sparrow-compiler/src/plan/expression_to_plan.rs   |  7 ++++++-
 crates/sparrow-instructions/src/evaluators.rs         |  5 ++++-
 .../src/execute/operation/expression_executor.rs      |  6 ++++++
 crates/sparrow-session/src/session.rs                 |  1 +
 python/pysrc/kaskada/udf.py                           |  4 ++--
 python/src/expr.rs                                    |  2 +-
 python/src/udf.rs                                     | 11 +++++------
 8 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/crates/sparrow-compiler/src/ast_to_dfg.rs b/crates/sparrow-compiler/src/ast_to_dfg.rs
index 2b05e12f8..84be28e3a 100644
--- a/crates/sparrow-compiler/src/ast_to_dfg.rs
+++ b/crates/sparrow-compiler/src/ast_to_dfg.rs
@@ -104,6 +104,7 @@ pub fn add_udf_to_dfg(
     data_context: &mut DataContext,
     diagnostics: &mut DiagnosticCollector<'_>,
 ) -> anyhow::Result<AstDfgRef> {
+    println!("sparrow_compiler::add_udf_to_dfg");
     let argument_types = arguments.transform(|i| i.with_value(i.value_type().clone()));
     let signature = udf.signature();
 
@@ -141,8 +142,8 @@ pub fn add_udf_to_dfg(
         })
         .try_collect()?;
 
-    let is_new = dfg.add_udf(udf.clone(), args.iter().map(|i| i.value()).collect())?;
-    let value = is_any_new(dfg, &args)?;
+    let value = dfg.add_udf(udf.clone(), args.iter().map(|i| i.value()).collect())?;
+    let is_new = is_any_new(dfg, &args)?;
 
     let time_domain_check = TimeDomainCheck::Compatible;
     let time_domain =
diff --git a/crates/sparrow-compiler/src/plan/expression_to_plan.rs b/crates/sparrow-compiler/src/plan/expression_to_plan.rs
index d767dcb90..ee9b8b2d7 100644
--- a/crates/sparrow-compiler/src/plan/expression_to_plan.rs
+++ b/crates/sparrow-compiler/src/plan/expression_to_plan.rs
@@ -35,11 +35,16 @@ pub(super) fn dfg_to_plan(
                 InstKind::FieldRef => "field_ref".to_owned(),
                 InstKind::Record => "record".to_owned(),
                 InstKind::Cast(_) => "cast".to_owned(),
-                InstKind::Udf(udf) => udf.signature().name().to_owned(),
+                InstKind::Udf(udf) => {
+                    println!("dfg_to_plan udf: {:?}", udf.signature().name());
+                    udf.signature().name().to_owned()
+                    // TODO: Do we need to serialize the callable into the plan?
+                }
             };
 
             let result_type =
                 infer_result_type(inst, &arguments, plan_builder.expressions(operation_index)?)?;
+            println!("result type: {:?}", result_type);
 
             let operator = expression_plan::Operator::Instruction(name);
             (operator, result_type)
diff --git a/crates/sparrow-instructions/src/evaluators.rs b/crates/sparrow-instructions/src/evaluators.rs
index 719748aa7..4bf096720 100644
--- a/crates/sparrow-instructions/src/evaluators.rs
+++ b/crates/sparrow-instructions/src/evaluators.rs
@@ -160,7 +160,10 @@ pub fn create_evaluator(info: StaticInfo<'_>) -> anyhow::Result<Box<dyn Evaluato
             Ok(CastEvaluator::try_new(info)?)
         }
         InstKind::Record => Ok(RecordEvaluator::try_new(info)?),
-        InstKind::Udf(udf) => Ok(udf.make_evaluator(info)),
+        InstKind::Udf(udf) => {
+            println!("Making udf evaluator");
+            Ok(udf.make_evaluator(info))
+        }
     }
 }
 
diff --git a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
index 33b6c4944..2bf0645a6 100644
--- a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
+++ b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
@@ -95,6 +95,12 @@ impl ExpressionExecutor {
                 Operator::Instruction(inst) => {
                     // TODO: This could probably be cleaned up, possibly by eliminating
                     // `inst kind` entirely.
+                    println!("Inst: {:?}", inst);
+                    // TODO: OH SHIT LOL
+                    // It's called `add`, which is a built-in function, so it's just doing add
+                    // HAHAHA
+                    // This needs to make the udf
+                    // but hm...how?
                     let inst_kind = if inst == "field_ref" {
                         InstKind::FieldRef
                     } else if inst == "record" {
diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index 89efb8dee..099fd78cd 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -296,6 +296,7 @@ impl Session {
         udf: Arc<dyn Udf>,
         args: Vec<Expr>,
     ) -> error_stack::Result<Expr, Error> {
+        println!("session::add_udf_to_dfg");
         let signature = udf.signature();
         let arg_names = signature.arg_names().to_owned();
         signature.assert_valid_argument_count(args.len());
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 6121d0f8b..421fec992 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -22,8 +22,8 @@ def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
 
     def __call__(self, *args: Timestream | Literal) -> Timestream:
         """Apply the UDF to the given arguments."""
-        # TODO: Timestream>_udf_call?
-        Timestream._call_udf(self._ffi, *args)
+        print("Calling udf")
+        return Timestream._call_udf(self._ffi, *args)
 
 def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
diff --git a/python/src/expr.rs b/python/src/expr.rs
index 4a19ab3c9..a623580f0 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -33,7 +33,6 @@ impl Expr {
 
         let mut rust_session = session.rust_session()?;
         let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
-        // TODO: - Support adding a UDF here.
         let rust_expr = match rust_session.add_expr(&operation, args) {
             Ok(node) => node,
             Err(e) => {
@@ -52,6 +51,7 @@ impl Expr {
     #[staticmethod]
     #[pyo3(signature = (session, udf, args))]
     fn call_udf(session: Session, udf: Udf, args: Vec<Expr>) -> PyResult<Self> {
+        println!("SparrowExpr::call_udf");
         let sparrow_udf = udf.0;
         if !args.iter().all(|e| e.session() == session) {
             return Err(PyValueError::new_err(
diff --git a/python/src/udf.rs b/python/src/udf.rs
index bfffdf640..6c4a97500 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -35,13 +35,12 @@ impl sparrow_instructions::Udf for PyUdf {
 
     // FRAZ: Should we allow this to return an error?
     fn make_evaluator(&self, info: StaticInfo<'_>) -> Box<dyn Evaluator> {
-        println!("Making udf evaluator");
-        // Idea: Create a random value of each argument type and verify that the callbale
+        // TODO: Idea: Create a random value of each argument type and verify that the callbale
         // is invokable on that. This would let us detect errors early.
 
+        println!("Making python udf evaluator");
         let args = info.args.iter().map(|arg| arg.value_ref.clone()).collect();
 
-        // FRAZ: Get the value refs for each static arg.
         Box::new(PyUdfEvaluator {
             args,
             callable: self.callable.clone(),
@@ -55,6 +54,7 @@ impl sparrow_instructions::Udf for PyUdf {
 
 impl Evaluator for PyUdfEvaluator {
     fn evaluate(&mut self, info: &dyn RuntimeInfo) -> anyhow::Result<ArrayRef> {
+        println!("Evaluating Python UDF");
         let args = self.args
             .iter()
             .map(|value_ref| -> anyhow::Result<_> {
@@ -62,7 +62,7 @@ impl Evaluator for PyUdfEvaluator {
             }).try_collect()?;
  
         let result = Python::with_gil(|py| -> anyhow::Result<ArrayRef> {
-            // todo: uhh
+            // todo: 
             Ok(self.evaluate_py(py, args))
         })?;
         // FRAZ: handle python errors.
@@ -72,8 +72,6 @@ impl Evaluator for PyUdfEvaluator {
 
 impl PyUdfEvaluator {
     fn evaluate_py<'py>(&self, py: Python<'py>, args: Vec<ArrayRef>) -> ArrayRef {
-        println!("Evaluating python");
-
         // self.callable.call1(py, (args,)).unwrap().extract(py).unwrap();
         // let args: Vec<PyObject> = args 
         //     .iter()
@@ -119,6 +117,7 @@ impl Udf {
     #[new]
     #[pyo3(signature = (signature, callable))]
     fn new(signature: String, callable: Py<PyAny>) -> Self {
+        println!("Creating udf with signature: {:?}", signature);
         let uuid = Uuid::new_v4();
         // TODO: sig name string cannot be &'static str
         let signature = Signature::try_from_str(FeatureSetPart::Function("udf"), &signature).expect("signature to parse");

From 9ba704584179a450bf9413526858586ce788f242 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Wed, 23 Aug 2023 00:21:30 -0700
Subject: [PATCH 05/18] fix comment

---
 .../src/execute/operation/expression_executor.rs            | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
index 2bf0645a6..184bacb54 100644
--- a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
+++ b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
@@ -96,11 +96,9 @@ impl ExpressionExecutor {
                     // TODO: This could probably be cleaned up, possibly by eliminating
                     // `inst kind` entirely.
                     println!("Inst: {:?}", inst);
-                    // TODO: OH SHIT LOL
+                    // TODO: FRAZ
                     // It's called `add`, which is a built-in function, so it's just doing add
-                    // HAHAHA
-                    // This needs to make the udf
-                    // but hm...how?
+                    // This needs to make the udf instead. But how? Do we serialize the Arc<dyn Udf>?
                     let inst_kind = if inst == "field_ref" {
                         InstKind::FieldRef
                     } else if inst == "record" {

From 2664783cf7b7b604ac332beaaf005e1bdb1c005f Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Wed, 23 Aug 2023 14:13:39 -0700
Subject: [PATCH 06/18] don't hold gil for entire collect pyarrow method

---
 Cargo.lock                                    |  1 +
 .../src/plan/expression_to_plan.rs            |  4 +--
 crates/sparrow-runtime/src/execute.rs         | 34 ++++++++++++++++---
 .../src/execute/compute_executor.rs           |  5 +++
 .../sparrow-runtime/src/execute/operation.rs  | 17 +++++++---
 .../execute/operation/expression_executor.rs  | 19 +++++++----
 crates/sparrow-session/Cargo.toml             |  1 +
 crates/sparrow-session/src/session.rs         | 12 +++++--
 python/Cargo.lock                             |  1 +
 python/Pipfile                                | 11 ++++++
 python/pysrc/kaskada/_result.py               |  1 +
 .../golden/udf_test/test_add_udf_direct.jsonl |  6 ++++
 .../golden/udf_test/test_add_udf_pipe.jsonl   |  6 ++++
 python/src/execution.rs                       | 16 +++++----
 python/src/expr.rs                            |  3 +-
 python/src/udf.rs                             | 27 ++++++++-------
 16 files changed, 125 insertions(+), 39 deletions(-)
 create mode 100644 python/Pipfile
 create mode 100644 python/pytests/golden/udf_test/test_add_udf_direct.jsonl
 create mode 100644 python/pytests/golden/udf_test/test_add_udf_pipe.jsonl

diff --git a/Cargo.lock b/Cargo.lock
index a3ef7d2fa..357b82c94 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4774,6 +4774,7 @@ dependencies = [
  "derive_more",
  "error-stack",
  "futures",
+ "hashbrown 0.14.0",
  "itertools 0.11.0",
  "smallvec",
  "sparrow-api",
diff --git a/crates/sparrow-compiler/src/plan/expression_to_plan.rs b/crates/sparrow-compiler/src/plan/expression_to_plan.rs
index ee9b8b2d7..99495e4fe 100644
--- a/crates/sparrow-compiler/src/plan/expression_to_plan.rs
+++ b/crates/sparrow-compiler/src/plan/expression_to_plan.rs
@@ -37,14 +37,12 @@ pub(super) fn dfg_to_plan(
                 InstKind::Cast(_) => "cast".to_owned(),
                 InstKind::Udf(udf) => {
                     println!("dfg_to_plan udf: {:?}", udf.signature().name());
-                    udf.signature().name().to_owned()
-                    // TODO: Do we need to serialize the callable into the plan?
+                    udf.uuid().to_string()
                 }
             };
 
             let result_type =
                 infer_result_type(inst, &arguments, plan_builder.expressions(operation_index)?)?;
-            println!("result type: {:?}", result_type);
 
             let operator = expression_plan::Operator::Instruction(name);
             (operator, result_type)
diff --git a/crates/sparrow-runtime/src/execute.rs b/crates/sparrow-runtime/src/execute.rs
index 915057ef1..45630ad7d 100644
--- a/crates/sparrow-runtime/src/execute.rs
+++ b/crates/sparrow-runtime/src/execute.rs
@@ -4,6 +4,7 @@ use chrono::NaiveDateTime;
 use enum_map::EnumMap;
 use error_stack::{IntoReport, IntoReportCompat, ResultExt};
 use futures::Stream;
+use hashbrown::HashMap;
 use prost_wkt_types::Timestamp;
 use sparrow_api::kaskada::v1alpha::execute_request::Limits;
 use sparrow_api::kaskada::v1alpha::{
@@ -12,7 +13,9 @@ use sparrow_api::kaskada::v1alpha::{
 };
 use sparrow_arrow::scalar_value::ScalarValue;
 use sparrow_compiler::{hash_compute_plan_proto, DataContext};
+use sparrow_instructions::Udf;
 use sparrow_qfr::kaskada::sparrow::v1alpha::FlightRecordHeader;
+use uuid::Uuid;
 
 use crate::execute::compute_store_guard::ComputeStoreGuard;
 use crate::execute::error::Error;
@@ -67,9 +70,15 @@ pub async fn execute(
         ..ExecutionOptions::default()
     };
 
-    // let output_at_time = request.final_result_time;
-
-    execute_new(plan, destination, data_context, options, None).await
+    execute_new(
+        plan,
+        destination,
+        data_context,
+        options,
+        None,
+        &HashMap::new(),
+    )
+    .await
 }
 
 #[derive(Default, Debug)]
@@ -215,12 +224,18 @@ async fn load_key_hash_inverse(
 /// ----------
 /// - key_hash_inverse: If set, specifies the key hash inverses to use. If None, the
 ///   key hashes will be created.
+/// - udfs: contains the mapping of uuid to udf implementation. This is currently used
+///   so we can serialize the uuid to the ComputePlan, then look up what implementation to use
+///   when creating the evaluator. This works because we are on a single machine, and don't need
+///   to pass the plan around. However, we'll eventually need to look into serializing/pickling
+///   the callable.
 pub async fn execute_new(
     plan: ComputePlan,
     destination: Destination,
     mut data_context: DataContext,
     options: ExecutionOptions,
     key_hash_inverse: Option<Arc<ThreadSafeKeyHashInverse>>,
+    udfs: &HashMap<Uuid, Arc<dyn Udf>>,
 ) -> error_stack::Result<impl Stream<Item = error_stack::Result<ExecuteResponse, Error>>, Error> {
     let object_stores = Arc::new(ObjectStoreRegistry::default());
 
@@ -287,6 +302,7 @@ pub async fn execute_new(
         progress_updates_rx,
         destination,
         options.stop_signal_rx,
+        udfs,
     )
     .await
     .change_context(Error::internal_msg("spawn compute executor"))?;
@@ -327,5 +343,15 @@ pub async fn materialize(
     // TODO: the `execute_with_progress` method contains a lot of additional logic that is theoretically not needed,
     // as the materialization does not exit, and should not need to handle cleanup tasks that regular
     // queries do. We should likely refactor this to use a separate `materialize_with_progress` method.
-    execute_new(plan, destination, data_context, options, None).await
+
+    // TODO: Materialize with udfs
+    execute_new(
+        plan,
+        destination,
+        data_context,
+        options,
+        None,
+        &HashMap::new(),
+    )
+    .await
 }
diff --git a/crates/sparrow-runtime/src/execute/compute_executor.rs b/crates/sparrow-runtime/src/execute/compute_executor.rs
index 479ee2ef6..fb63354e9 100644
--- a/crates/sparrow-runtime/src/execute/compute_executor.rs
+++ b/crates/sparrow-runtime/src/execute/compute_executor.rs
@@ -5,16 +5,19 @@ use enum_map::EnumMap;
 use error_stack::{IntoReport, IntoReportCompat, ResultExt};
 use futures::stream::{FuturesUnordered, PollNext};
 use futures::{FutureExt, Stream, TryFutureExt};
+use hashbrown::HashMap;
 use prost_wkt_types::Timestamp;
 use sparrow_api::kaskada::v1alpha::ComputeSnapshot;
 use sparrow_api::kaskada::v1alpha::{ExecuteResponse, LateBoundValue, PlanHash};
 use sparrow_arrow::scalar_value::ScalarValue;
+use sparrow_instructions::Udf;
 use sparrow_qfr::io::writer::FlightRecordWriter;
 use sparrow_qfr::kaskada::sparrow::v1alpha::FlightRecordHeader;
 use sparrow_qfr::FlightRecorderFactory;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 use tokio_stream::StreamExt;
 use tracing::{error, info, info_span, Instrument};
+use uuid::Uuid;
 
 use crate::execute::compute_store_guard::ComputeStoreGuard;
 use crate::execute::operation::{OperationContext, OperationExecutor};
@@ -56,6 +59,7 @@ impl ComputeExecutor {
         progress_updates_rx: tokio::sync::mpsc::Receiver<ProgressUpdate>,
         destination: Destination,
         stop_signal_rx: Option<tokio::sync::watch::Receiver<bool>>,
+        udfs: &HashMap<Uuid, Arc<dyn Udf>>,
     ) -> error_stack::Result<Self, Error> {
         let mut spawner = ComputeTaskSpawner::new();
 
@@ -147,6 +151,7 @@ impl ComputeExecutor {
                         max_event_time_tx.clone(),
                         late_bindings,
                         stop_signal_rx.clone(),
+                        &udfs,
                     )
                     .await?,
             );
diff --git a/crates/sparrow-runtime/src/execute/operation.rs b/crates/sparrow-runtime/src/execute/operation.rs
index f7d09c0c4..679f5b3af 100644
--- a/crates/sparrow-runtime/src/execute/operation.rs
+++ b/crates/sparrow-runtime/src/execute/operation.rs
@@ -42,14 +42,16 @@ use chrono::NaiveDateTime;
 use enum_map::EnumMap;
 use error_stack::{IntoReport, IntoReportCompat, Report, Result, ResultExt};
 use futures::Future;
+use hashbrown::HashMap;
 use prost_wkt_types::Timestamp;
 use sparrow_api::kaskada::v1alpha::operation_plan::tick_operation::TickBehavior;
 use sparrow_api::kaskada::v1alpha::{operation_plan, ComputePlan, LateBoundValue, OperationPlan};
 use sparrow_arrow::scalar_value::ScalarValue;
 use sparrow_compiler::DataContext;
-use sparrow_instructions::ComputeStore;
+use sparrow_instructions::{ComputeStore, Udf};
 use tokio::task::JoinHandle;
 use tracing::Instrument;
+use uuid::Uuid;
 
 use self::final_tick::FinalTickOperation;
 use self::input_batch::InputBatch;
@@ -176,6 +178,7 @@ impl OperationExecutor {
         max_event_time_tx: tokio::sync::mpsc::UnboundedSender<Timestamp>,
         late_bindings: &EnumMap<LateBoundValue, Option<ScalarValue>>,
         stop_signal_rx: Option<tokio::sync::watch::Receiver<bool>>,
+        udfs: &HashMap<Uuid, Arc<dyn Udf>>,
     ) -> Result<impl Future<Output = Result<(), Error>>, Error> {
         let Self {
             operation,
@@ -188,10 +191,14 @@ impl OperationExecutor {
 
         let operation_label = operator.label();
 
-        let mut expression_executor =
-            ExpressionExecutor::try_new(operation_label, operation.expressions, late_bindings)
-                .into_report()
-                .change_context(Error::internal_msg("unable to create executor"))?;
+        let mut expression_executor = ExpressionExecutor::try_new(
+            operation_label,
+            operation.expressions,
+            late_bindings,
+            udfs,
+        )
+        .into_report()
+        .change_context(Error::internal_msg("unable to create executor"))?;
 
         debug_assert_eq!(operator.input_len(), input_channels.len());
 
diff --git a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
index 184bacb54..b8030d181 100644
--- a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
+++ b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
@@ -6,15 +6,17 @@ use arrow::array::ArrayRef;
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use arrow::record_batch::RecordBatch;
 use enum_map::EnumMap;
+use hashbrown::HashMap;
 use itertools::Itertools;
 use sparrow_api::kaskada::v1alpha::expression_plan::Operator;
 use sparrow_api::kaskada::v1alpha::{ExpressionPlan, LateBoundValue, OperationInputRef};
 use sparrow_arrow::scalar_value::ScalarValue;
-use sparrow_instructions::ValueRef;
 use sparrow_instructions::{
     create_evaluator, ColumnarValue, ComputeStore, Evaluator, GroupingIndices, InstKind, InstOp,
     RuntimeInfo, StaticArg, StaticInfo, StoreKey,
 };
+use sparrow_instructions::{Udf, ValueRef};
+use uuid::Uuid;
 
 use crate::execute::operation::InputBatch;
 use crate::Batch;
@@ -49,6 +51,7 @@ impl ExpressionExecutor {
         operation_label: &'static str,
         expressions: Vec<ExpressionPlan>,
         late_bindings: &EnumMap<LateBoundValue, Option<ScalarValue>>,
+        udfs: &HashMap<Uuid, Arc<dyn Udf>>,
     ) -> anyhow::Result<Self> {
         let mut input_columns = Vec::new();
 
@@ -96,9 +99,6 @@ impl ExpressionExecutor {
                     // TODO: This could probably be cleaned up, possibly by eliminating
                     // `inst kind` entirely.
                     println!("Inst: {:?}", inst);
-                    // TODO: FRAZ
-                    // It's called `add`, which is a built-in function, so it's just doing add
-                    // This needs to make the udf instead. But how? Do we serialize the Arc<dyn Udf>?
                     let inst_kind = if inst == "field_ref" {
                         InstKind::FieldRef
                     } else if inst == "record" {
@@ -106,8 +106,15 @@ impl ExpressionExecutor {
                     } else if inst == "cast" {
                         InstKind::Cast(data_type.clone())
                     } else {
-                        let inst_op = InstOp::from_str(&inst)?;
-                        InstKind::Simple(inst_op)
+                        // This assumes we'll never have an InstOp function name that
+                        // matches a uuid, which should be safe.
+                        if let Ok(uuid) = Uuid::from_str(&inst) {
+                            let udf = udfs.get(&uuid).ok_or(anyhow::anyhow!("expected udf"))?;
+                            InstKind::Udf(udf.clone())
+                        } else {
+                            let inst_op = InstOp::from_str(&inst)?;
+                            InstKind::Simple(inst_op)
+                        }
                     };
 
                     let static_info = StaticInfo::new(&inst_kind, args, &data_type);
diff --git a/crates/sparrow-session/Cargo.toml b/crates/sparrow-session/Cargo.toml
index fde919198..117bcab16 100644
--- a/crates/sparrow-session/Cargo.toml
+++ b/crates/sparrow-session/Cargo.toml
@@ -11,6 +11,7 @@ The Sparrow session builder.
 
 [dependencies]
 arrow-array.workspace = true
+hashbrown.workspace = true
 arrow-schema.workspace = true
 arrow-select.workspace = true
 derive_more.workspace = true
diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index 099fd78cd..46cac7d28 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -1,5 +1,5 @@
+use hashbrown::HashMap;
 use std::borrow::Cow;
-use std::collections::HashMap;
 use std::sync::Arc;
 
 use arrow_schema::SchemaRef;
@@ -25,6 +25,13 @@ pub struct Session {
     data_context: DataContext,
     dfg: Dfg,
     key_hash_inverse: HashMap<GroupId, Arc<ThreadSafeKeyHashInverse>>,
+    /// Keeps track of the uuid mapping.
+    ///
+    /// This is a bit of a hack that allows to serialize the uuid instead of the
+    /// `Arc<dyn Udf` into the compute plan, then look up that mapping during plan
+    /// construction. Once we run on multiple machines we'll need to serialize the
+    /// udf.
+    udfs: HashMap<Uuid, Arc<dyn Udf>>,
 }
 
 #[derive(Default)]
@@ -296,7 +303,6 @@ impl Session {
         udf: Arc<dyn Udf>,
         args: Vec<Expr>,
     ) -> error_stack::Result<Expr, Error> {
-        println!("session::add_udf_to_dfg");
         let signature = udf.signature();
         let arg_names = signature.arg_names().to_owned();
         signature.assert_valid_argument_count(args.len());
@@ -334,6 +340,7 @@ impl Session {
                 .collect();
             Err(Error::Errors(errors))?
         } else {
+            self.udfs.insert(udf.uuid().clone(), udf.clone());
             Ok(Expr(result))
         }
     }
@@ -416,6 +423,7 @@ impl Session {
                 data_context,
                 options,
                 Some(key_hash_inverse),
+                &self.udfs,
             ))
             .change_context(Error::Execute)?
             .map_err(|e| e.change_context(Error::Execute))
diff --git a/python/Cargo.lock b/python/Cargo.lock
index 0dae8ea3c..754ff6bff 100644
--- a/python/Cargo.lock
+++ b/python/Cargo.lock
@@ -3961,6 +3961,7 @@ dependencies = [
  "derive_more",
  "error-stack",
  "futures",
+ "hashbrown 0.14.0",
  "itertools 0.11.0",
  "smallvec",
  "sparrow-api",
diff --git a/python/Pipfile b/python/Pipfile
new file mode 100644
index 000000000..71e4f7cbf
--- /dev/null
+++ b/python/Pipfile
@@ -0,0 +1,11 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+
+[dev-packages]
+
+[requires]
+python_version = "3.9"
diff --git a/python/pysrc/kaskada/_result.py b/python/pysrc/kaskada/_result.py
index eea4e04ac..4377a5dcf 100644
--- a/python/pysrc/kaskada/_result.py
+++ b/python/pysrc/kaskada/_result.py
@@ -46,6 +46,7 @@ def to_pyarrow(self) -> pa.Table:
         This method will block on the complete results of the query and collect
         all results into memory. If this is not desired, use `iter_pyarrow` instead.
         """
+        print("Converting to_pyarrow")
         batches = self._ffi_execution.collect_pyarrow()
         if len(batches) == 0:
             return pa.Table.from_batches([], schema=pa.schema([]))
diff --git a/python/pytests/golden/udf_test/test_add_udf_direct.jsonl b/python/pytests/golden/udf_test/test_add_udf_direct.jsonl
new file mode 100644
index 000000000..5f101ddf1
--- /dev/null
+++ b/python/pytests/golden/udf_test/test_add_udf_direct.jsonl
@@ -0,0 +1,6 @@
+{"_time":"1996-12-19T16:39:57.000000000","_key":"A","result":15.0}
+{"_time":"1996-12-19T16:39:58.000000000","_key":"B","result":27.0}
+{"_time":"1996-12-19T16:39:59.000000000","_key":"A","result":23.0}
+{"_time":"1996-12-19T16:40:00.000000000","_key":"A","result":null}
+{"_time":"1996-12-19T16:40:01.000000000","_key":"A","result":null}
+{"_time":"1996-12-19T16:40:02.000000000","_key":"A","result":23.0}
diff --git a/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl b/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl
new file mode 100644
index 000000000..358c577c5
--- /dev/null
+++ b/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl
@@ -0,0 +1,6 @@
+{"_time":"1996-12-19T16:39:57.000000000","_key":"A","result":15.0}
+{"_time":"1996-12-19T16:39:58.000000000","_key":"B","result":27.0}
+{"_time":"1996-12-19T16:39:59.000000000","_key":"A","result":23.0}
+{"_time":"1996-12-19T16:40:00.000000000","_key":"A","result":null}
+{"_time":"1996-12-19T16:40:01.000000000","_key":"A","result":null}
+{"_time":"1996-12-19T16:40:02.000000000","_key":"A","result":null}
diff --git a/python/src/execution.rs b/python/src/execution.rs
index 51b663289..c49db64be 100644
--- a/python/src/execution.rs
+++ b/python/src/execution.rs
@@ -53,14 +53,18 @@ impl Execution {}
 
 #[pymethods]
 impl Execution {
-    fn collect_pyarrow(&mut self, py: Python<'_>) -> Result<Vec<PyObject>> {
+    fn collect_pyarrow(&mut self) -> Result<Vec<PyObject>> {
         let execution = self.take_execution()?;
         let batches = execution.collect_all_blocking()?;
-        let results = batches
-            .into_iter()
-            .map(|batch| batch.to_pyarrow(py))
-            .collect::<PyResult<Vec<_>>>()?;
-        Ok(results)
+        println!("Execution::collect_pyarrow: batches.len() = {}", batches.len());
+        let result = Python::with_gil(|py| -> PyResult<Vec<_>> {
+            let results = batches
+                .into_iter()
+                .map(|batch| batch.to_pyarrow(py))
+                .collect::<PyResult<Vec<_>>>()?;
+            Ok(results)
+        });
+        result.map_err(|_| error_stack::report!(ErrorContext::Python).into())
     }
 
     fn next_pyarrow(&mut self, py: Python<'_>) -> Result<Option<PyObject>> {
diff --git a/python/src/expr.rs b/python/src/expr.rs
index a623580f0..d0f5111ca 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -6,8 +6,8 @@ use arrow::datatypes::DataType;
 use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
-use std::sync::Arc;
 use sparrow_session::{Expr as RustExpr, Literal, Session as RustSession};
+use std::sync::Arc;
 
 /// Kaskada expression node.
 #[derive(Clone)]
@@ -61,6 +61,7 @@ impl Expr {
 
         let mut rust_session = session.rust_session()?;
         let args: Vec<_> = args.into_iter().map(|e| e.rust_expr).collect();
+
         let rust_expr = match rust_session.add_udf_to_dfg(sparrow_udf.clone(), args) {
             Ok(node) => node,
             Err(e) => {
diff --git a/python/src/udf.rs b/python/src/udf.rs
index 6c4a97500..65059a91b 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -1,8 +1,8 @@
+use itertools::Itertools;
 use std::fmt::Debug;
 use std::sync::Arc;
-use itertools::Itertools;
 
-use arrow::array::{ArrayData, ArrayRef};
+use arrow::array::{new_empty_array, ArrayData, ArrayRef};
 use arrow::datatypes::DataType;
 use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::prelude::*;
@@ -25,7 +25,7 @@ struct PyUdfEvaluator {
 
 #[derive(Clone)]
 #[pyclass]
-// TODO: Does it need to extend Expr for some reason? 
+// TODO: Does it need to extend Expr for some reason?
 pub(crate) struct Udf(pub Arc<dyn sparrow_instructions::Udf>);
 
 impl sparrow_instructions::Udf for PyUdf {
@@ -55,14 +55,15 @@ impl sparrow_instructions::Udf for PyUdf {
 impl Evaluator for PyUdfEvaluator {
     fn evaluate(&mut self, info: &dyn RuntimeInfo) -> anyhow::Result<ArrayRef> {
         println!("Evaluating Python UDF");
-        let args = self.args
+        let args = self
+            .args
             .iter()
-            .map(|value_ref| -> anyhow::Result<_> {
-                Ok(info.value(value_ref)?.array_ref()?)
-            }).try_collect()?;
- 
+            .map(|value_ref| -> anyhow::Result<_> { Ok(info.value(value_ref)?.array_ref()?) })
+            .try_collect()?;
+
         let result = Python::with_gil(|py| -> anyhow::Result<ArrayRef> {
-            // todo: 
+            // todo:
+            println!("inside gil");
             Ok(self.evaluate_py(py, args))
         })?;
         // FRAZ: handle python errors.
@@ -73,7 +74,7 @@ impl Evaluator for PyUdfEvaluator {
 impl PyUdfEvaluator {
     fn evaluate_py<'py>(&self, py: Python<'py>, args: Vec<ArrayRef>) -> ArrayRef {
         // self.callable.call1(py, (args,)).unwrap().extract(py).unwrap();
-        // let args: Vec<PyObject> = args 
+        // let args: Vec<PyObject> = args
         //     .iter()
         //     .map::<anyhow::Result<_>, _>(|value_ref| {
         //         let value: ColumnarValue = info.value(value_ref)?;
@@ -108,7 +109,8 @@ impl PyUdfEvaluator {
         //     "unexpected result length from Python UDF"
         // );
 
-        panic!("arrayref")
+        println!("array ref panic?");
+        new_empty_array(&DataType::Int32)
     }
 }
 
@@ -120,7 +122,8 @@ impl Udf {
         println!("Creating udf with signature: {:?}", signature);
         let uuid = Uuid::new_v4();
         // TODO: sig name string cannot be &'static str
-        let signature = Signature::try_from_str(FeatureSetPart::Function("udf"), &signature).expect("signature to parse");
+        let signature = Signature::try_from_str(FeatureSetPart::Function("udf"), &signature)
+            .expect("signature to parse");
         let udf = PyUdf {
             signature,
             callable,

From b38a71c42a9eea841fe45a0d6fe8ccac9b255d93 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Wed, 23 Aug 2023 14:28:32 -0700
Subject: [PATCH 07/18] Fix gil handling in collect

---
 python/src/execution.rs | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/python/src/execution.rs b/python/src/execution.rs
index c49db64be..19d3e0d46 100644
--- a/python/src/execution.rs
+++ b/python/src/execution.rs
@@ -53,18 +53,21 @@ impl Execution {}
 
 #[pymethods]
 impl Execution {
-    fn collect_pyarrow(&mut self) -> Result<Vec<PyObject>> {
-        let execution = self.take_execution()?;
-        let batches = execution.collect_all_blocking()?;
-        println!("Execution::collect_pyarrow: batches.len() = {}", batches.len());
-        let result = Python::with_gil(|py| -> PyResult<Vec<_>> {
-            let results = batches
-                .into_iter()
-                .map(|batch| batch.to_pyarrow(py))
-                .collect::<PyResult<Vec<_>>>()?;
-            Ok(results)
-        });
-        result.map_err(|_| error_stack::report!(ErrorContext::Python).into())
+    fn collect_pyarrow(&mut self, py: Python<'_>) -> Result<Vec<PyObject>> {
+        let execution = self.take_execution().unwrap();
+
+        // Explicitly allow threads to take the gil during execution, so 
+        // the udf evaluator can acquire it to execute python.
+        let batches = py.allow_threads(move || {
+            execution.collect_all_blocking()
+        })?;
+
+        let results = batches
+            .into_iter()
+            .map(|batch| batch.to_pyarrow(py))
+            .collect::<PyResult<Vec<_>>>()?;
+        Ok(results)
+        // result.map_err(|_| error_stack::report!(ErrorContext::Python).into())
     }
 
     fn next_pyarrow(&mut self, py: Python<'_>) -> Result<Option<PyObject>> {

From fccc9fb380bed28334ce6106f62a83ddeff9f5db Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Thu, 24 Aug 2023 12:05:24 -0700
Subject: [PATCH 08/18] Attemping to invoke callable -- arguments aren't
 correct yet

---
 crates/sparrow-instructions/src/evaluators.rs |   2 +-
 python/pysrc/kaskada/_ffi.pyi                 |   2 +-
 python/pysrc/kaskada/_timestream.py           |  10 --
 python/pysrc/kaskada/udf.py                   |  13 +-
 .../golden/udf_test/test_add_udf_direct.jsonl |   6 -
 .../golden/udf_test/test_add_udf_pipe.jsonl   |   6 -
 python/src/execution.rs                       |   7 +-
 python/src/udf.rs                             | 122 +++++++++++-------
 8 files changed, 87 insertions(+), 81 deletions(-)
 delete mode 100644 python/pytests/golden/udf_test/test_add_udf_direct.jsonl
 delete mode 100644 python/pytests/golden/udf_test/test_add_udf_pipe.jsonl

diff --git a/crates/sparrow-instructions/src/evaluators.rs b/crates/sparrow-instructions/src/evaluators.rs
index 4bf096720..d0cc6950f 100644
--- a/crates/sparrow-instructions/src/evaluators.rs
+++ b/crates/sparrow-instructions/src/evaluators.rs
@@ -45,7 +45,7 @@ use time::*;
 pub struct StaticInfo<'a> {
     inst_kind: &'a InstKind,
     pub args: Vec<StaticArg>,
-    result_type: &'a DataType,
+    pub result_type: &'a DataType,
 }
 
 impl<'a> StaticInfo<'a> {
diff --git a/python/pysrc/kaskada/_ffi.pyi b/python/pysrc/kaskada/_ffi.pyi
index aabc16f6a..80423ba4c 100644
--- a/python/pysrc/kaskada/_ffi.pyi
+++ b/python/pysrc/kaskada/_ffi.pyi
@@ -27,7 +27,7 @@ class Expr:
     @staticmethod
     def call_udf(
         session: Session,
-        udf: Udf, # TODO: ffi udf? 
+        udf: Udf,
         args: Sequence[Expr],
     ) -> Expr: ...
     @staticmethod
diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py
index fb00abfba..1649b54c0 100644
--- a/python/pysrc/kaskada/_timestream.py
+++ b/python/pysrc/kaskada/_timestream.py
@@ -257,16 +257,6 @@ def pipe(
         else:
             return func(self, *args, **kwargs)
 
-# TODO: How does udf work? We kind of want it like... apply some function on an input? 
-# But also want like `Timestream.udf(f, args*)`
-    # def udf(callable: Callable, *args: Timestream) -> Timestream: 
-    #     """
-    #     todo
-    #     """
-    #     signature = inspect.signature(callable)
-
-    #     return Timestream._test_udf(signature, args)
-
     def add(self, rhs: Union[Timestream, Literal]) -> Timestream:
         """
         Create a Timestream adding this and `rhs`.
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 421fec992..32ba28ece 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -33,7 +33,8 @@ def decorator(func: FuncType):
         # This needs to take the PyArrow result type and PyArrow inputs,
         # convert them to Pandas, call the function, and convert the result
         # back to PyArrow.
-        func = lambda *args: _converted_func(func, *args)
+        func = lambda result_type, *args: _converted_func(func, result_type, *args)
+        # func = lambda *args: _converted_func(func, pa.int64(), *args)
 
         # 2. Create the UDF object.
         return Udf(signature, func)
@@ -45,8 +46,14 @@ def _converted_func(func: FuncType, result_type: pa.DataType, *args: pa.Array) -
     # TODO: I believe this will return a series for simple arrays, and a
     # dataframe for struct arrays. We should explore how this handles
     # different types.
-    pd_args = [arg.to_pandas() for arg in args]
-    pd_result = func(*pd_args)
+    print("CONVERTING FUNC")
+    print(f"Result type: ", result_type)
+    print(f"Args: ", args)
+    print()
+
+    # pd_args = [arg.to_pandas() for arg in args]
+    # pd_result = func(*pd_args)
+    pd_result = func(*args)
 
     if isinstance(pd_result, pd.Series):
         return pa.Array.from_pandas(pd_result, type=result_type)
diff --git a/python/pytests/golden/udf_test/test_add_udf_direct.jsonl b/python/pytests/golden/udf_test/test_add_udf_direct.jsonl
deleted file mode 100644
index 5f101ddf1..000000000
--- a/python/pytests/golden/udf_test/test_add_udf_direct.jsonl
+++ /dev/null
@@ -1,6 +0,0 @@
-{"_time":"1996-12-19T16:39:57.000000000","_key":"A","result":15.0}
-{"_time":"1996-12-19T16:39:58.000000000","_key":"B","result":27.0}
-{"_time":"1996-12-19T16:39:59.000000000","_key":"A","result":23.0}
-{"_time":"1996-12-19T16:40:00.000000000","_key":"A","result":null}
-{"_time":"1996-12-19T16:40:01.000000000","_key":"A","result":null}
-{"_time":"1996-12-19T16:40:02.000000000","_key":"A","result":23.0}
diff --git a/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl b/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl
deleted file mode 100644
index 358c577c5..000000000
--- a/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl
+++ /dev/null
@@ -1,6 +0,0 @@
-{"_time":"1996-12-19T16:39:57.000000000","_key":"A","result":15.0}
-{"_time":"1996-12-19T16:39:58.000000000","_key":"B","result":27.0}
-{"_time":"1996-12-19T16:39:59.000000000","_key":"A","result":23.0}
-{"_time":"1996-12-19T16:40:00.000000000","_key":"A","result":null}
-{"_time":"1996-12-19T16:40:01.000000000","_key":"A","result":null}
-{"_time":"1996-12-19T16:40:02.000000000","_key":"A","result":null}
diff --git a/python/src/execution.rs b/python/src/execution.rs
index 19d3e0d46..569194ec9 100644
--- a/python/src/execution.rs
+++ b/python/src/execution.rs
@@ -56,18 +56,15 @@ impl Execution {
     fn collect_pyarrow(&mut self, py: Python<'_>) -> Result<Vec<PyObject>> {
         let execution = self.take_execution().unwrap();
 
-        // Explicitly allow threads to take the gil during execution, so 
+        // Explicitly allow threads to take the gil during execution, so
         // the udf evaluator can acquire it to execute python.
-        let batches = py.allow_threads(move || {
-            execution.collect_all_blocking()
-        })?;
+        let batches = py.allow_threads(move || execution.collect_all_blocking())?;
 
         let results = batches
             .into_iter()
             .map(|batch| batch.to_pyarrow(py))
             .collect::<PyResult<Vec<_>>>()?;
         Ok(results)
-        // result.map_err(|_| error_stack::report!(ErrorContext::Python).into())
     }
 
     fn next_pyarrow(&mut self, py: Python<'_>) -> Result<Option<PyObject>> {
diff --git a/python/src/udf.rs b/python/src/udf.rs
index 65059a91b..39ddebddb 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -6,8 +6,8 @@ use arrow::array::{new_empty_array, ArrayData, ArrayRef};
 use arrow::datatypes::DataType;
 use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::prelude::*;
-use pyo3::types::PyTuple;
-use sparrow_instructions::{Evaluator, RuntimeInfo, StaticInfo, ValueRef};
+use pyo3::types::{PyDict, PyTuple};
+use sparrow_instructions::{ColumnarValue, Evaluator, RuntimeInfo, StaticInfo, ValueRef};
 use sparrow_syntax::{FeatureSetPart, Signature};
 use uuid::Uuid;
 
@@ -15,12 +15,13 @@ use uuid::Uuid;
 struct PyUdf {
     signature: Signature,
     callable: Py<PyAny>,
-    uuid: uuid::Uuid,
+    uuid: Uuid,
 }
 
 struct PyUdfEvaluator {
     args: Vec<ValueRef>,
     callable: Py<PyAny>,
+    result_type: DataType,
 }
 
 #[derive(Clone)]
@@ -39,11 +40,13 @@ impl sparrow_instructions::Udf for PyUdf {
         // is invokable on that. This would let us detect errors early.
 
         println!("Making python udf evaluator");
+        let result_type = info.result_type.clone();
         let args = info.args.iter().map(|arg| arg.value_ref.clone()).collect();
 
         Box::new(PyUdfEvaluator {
             args,
             callable: self.callable.clone(),
+            result_type,
         })
     }
 
@@ -55,62 +58,83 @@ impl sparrow_instructions::Udf for PyUdf {
 impl Evaluator for PyUdfEvaluator {
     fn evaluate(&mut self, info: &dyn RuntimeInfo) -> anyhow::Result<ArrayRef> {
         println!("Evaluating Python UDF");
-        let args = self
+        let inputs = self
             .args
             .iter()
             .map(|value_ref| -> anyhow::Result<_> { Ok(info.value(value_ref)?.array_ref()?) })
             .try_collect()?;
 
-        let result = Python::with_gil(|py| -> anyhow::Result<ArrayRef> {
-            // todo:
-            println!("inside gil");
-            Ok(self.evaluate_py(py, args))
-        })?;
-        // FRAZ: handle python errors.
+        let result =
+            Python::with_gil(|py| -> anyhow::Result<ArrayRef> { self.evaluate_py(py, inputs) })?;
+
+        anyhow::ensure!(
+            result.len() == info.num_rows(),
+            "result has wrong number of rows"
+        );
         Ok(result)
     }
 }
 
 impl PyUdfEvaluator {
-    fn evaluate_py<'py>(&self, py: Python<'py>, args: Vec<ArrayRef>) -> ArrayRef {
-        // self.callable.call1(py, (args,)).unwrap().extract(py).unwrap();
-        // let args: Vec<PyObject> = args
-        //     .iter()
-        //     .map::<anyhow::Result<_>, _>(|value_ref| {
-        //         let value: ColumnarValue = info.value(value_ref)?;
-        //         let array_ref: ArrayRef = value.array_ref()?;
-        //         let array_data: ArrayData = aray_ref.to_data();
-        //         let py_obj: PyObject = array_data.to_pyarrow(py)?;
-
-        //         Ok(py_obj)
-        //     })
-        //     .collect::<anyhow::Result<_>>()?;
-
-        // let locals = [("args", args)].into_py_dict(py);
-        // py.run(&self.code, None, Some(&locals))?;
-        // let result_py: PyObject = locals.get_item("ret").unwrap().into();
-
-        // let result_array_data: ArrayData = ArrayData::from_pyarrow(result_py.as_ref(py))?;
-
-        // // We can't control the returned type, but we can refuse to accept the "wrong" type.
-        // ensure!(
-        //     *result_array_data.data_type() == self.result_type,
-        //     "expected Python UDF to return type {:?}, but received {:?}",
-        //     result_array_data.data_type(),
-        //     self.result_type
-        // );
-
-        // let result_array_ref: ArrayRef = array::make_array(result_array_data);
-
-        // // This is a "map" operation - each row should reflect the time of the
-        // // corresponding input row, and have the same length.
-        // ensure!(
-        //     result_array_ref.len() == info.num_rows(),
-        //     "unexpected result length from Python UDF"
-        // );
-
-        println!("array ref panic?");
-        new_empty_array(&DataType::Int32)
+    fn evaluate_py<'py>(&self, py: Python<'py>, inputs: Vec<ArrayRef>) -> anyhow::Result<ArrayRef> {
+        // 1. Convert all arg inputs into pyarrow arrays then to python objects
+        let inputs: Vec<PyObject> = inputs
+            .iter()
+            .map::<anyhow::Result<_>, _>(|arg_array| {
+                let array_data: ArrayData = arg_array.to_data();
+                let py_obj: PyObject = array_data.to_pyarrow(py)?;
+                Ok(py_obj)
+            })
+            .collect::<anyhow::Result<_>>()?;
+
+        // let asdf: Vec<PyObject> = vec![DataType::to_pyarrow(&self.result_type, py)?];
+        // combine asdf and args
+        // let args: Vec<PyObject> = asdf.into_iter().chain(inputs.into_iter()).collect();
+
+        // let args = PyTuple::new(py, args);
+
+        // 2. Call the python function
+        // Call the Python function
+        let result_ty = DataType::to_pyarrow(&self.result_type, py)?;
+        // let kwargs = PyDict::new(py);
+        // Set the 'result_type' key to a specific value
+        // kwargs.set_item("result_type", result_ty)?;
+
+        let result_ty_vec: Vec<PyObject> = vec![result_ty];
+        let inputs: Vec<_> = result_ty_vec.into_iter().chain(inputs).collect();
+        let inputs = PyTuple::new(py, inputs);
+
+        println!("CALLING CALLABLE ");
+        let py_result = self.callable.call1(py, inputs)?;
+        // let py_result = self.callable.call(py, inputs, Some(kwargs))?;
+
+        //////////
+        // let mut new_args: Vec<PyObject> = vec![DataType::to_pyarrow(&self.result_type, py)?];
+
+        // // Collect the other arguments
+        // for input in inputs.iter() {
+        //     let array_data: ArrayData = input.to_data();
+        //     let py_obj: PyObject = array_data.to_pyarrow(py)?;
+        //     new_args.push(py_obj);
+        // }
+
+        // println!("New args: {:?}", new_args);
+        // let args = PyTuple::new(py, new_args);
+        // let py_result = self.callable.call(py, args, Some(kwargs))?;
+
+        // 3. Convert the result from pyarrow array to arrow array
+        let result_array_data: ArrayData = ArrayData::from_pyarrow(py_result.as_ref(py))?;
+
+        // We can't control the returned type, but we can refuse to accept the "wrong" type.
+        anyhow::ensure!(
+            *result_array_data.data_type() == self.result_type,
+            "expected Python UDF to return type {:?}, but received {:?}",
+            result_array_data.data_type(),
+            self.result_type
+        );
+
+        let result: ArrayRef = arrow::array::make_array(result_array_data);
+        Ok(result)
     }
 }
 

From 7472216201075f5e52890824a12b06d03ad4ccde Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Thu, 24 Aug 2023 12:34:11 -0700
Subject: [PATCH 09/18] reusing func variable name was causing recursion in
 python lambda

---
 python/pysrc/kaskada/udf.py | 15 +++++++--------
 python/src/udf.rs           | 18 +++++++-----------
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 32ba28ece..a598c6313 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -17,7 +17,7 @@
 class Udf:
     def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
         """Create a UDF with the given signature."""
-        functools.update_wrapper(self, func)
+        # functools.update_wrapper(self, func)
         self._ffi = _ffi.Udf(signature, func)
 
     def __call__(self, *args: Timestream | Literal) -> Timestream:
@@ -28,20 +28,19 @@ def __call__(self, *args: Timestream | Literal) -> Timestream:
 def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
 
-    def decorator(func: FuncType):
+    def decorator(func_type: FuncType):
         # 1. Convert the `FuncType` to the type expected by Udf.
         # This needs to take the PyArrow result type and PyArrow inputs,
         # convert them to Pandas, call the function, and convert the result
         # back to PyArrow.
-        func = lambda result_type, *args: _converted_func(func, result_type, *args)
-        # func = lambda *args: _converted_func(func, pa.int64(), *args)
+        func = lambda result_type, *args: _converted_func(func_type, result_type, *args)
 
         # 2. Create the UDF object.
         return Udf(signature, func)
 
     return decorator
 
-def _converted_func(func: FuncType, result_type: pa.DataType, *args: pa.Array) -> pa.Array:
+def _converted_func(func_type: FuncType, result_type: pa.DataType, *args: pa.Array) -> pa.Array:
     """Run the function producing the given result type."""
     # TODO: I believe this will return a series for simple arrays, and a
     # dataframe for struct arrays. We should explore how this handles
@@ -51,9 +50,9 @@ def _converted_func(func: FuncType, result_type: pa.DataType, *args: pa.Array) -
     print(f"Args: ", args)
     print()
 
-    # pd_args = [arg.to_pandas() for arg in args]
-    # pd_result = func(*pd_args)
-    pd_result = func(*args)
+    pd_args = [arg.to_pandas() for arg in args]
+    pd_result = func_type(*pd_args)
+    # pd_result = func_type(*args)
 
     if isinstance(pd_result, pd.Series):
         return pa.Array.from_pandas(pd_result, type=result_type)
diff --git a/python/src/udf.rs b/python/src/udf.rs
index 39ddebddb..9de1d98fe 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -87,25 +87,21 @@ impl PyUdfEvaluator {
             })
             .collect::<anyhow::Result<_>>()?;
 
-        // let asdf: Vec<PyObject> = vec![DataType::to_pyarrow(&self.result_type, py)?];
-        // combine asdf and args
-        // let args: Vec<PyObject> = asdf.into_iter().chain(inputs.into_iter()).collect();
-
-        // let args = PyTuple::new(py, args);
-
         // 2. Call the python function
-        // Call the Python function
         let result_ty = DataType::to_pyarrow(&self.result_type, py)?;
-        // let kwargs = PyDict::new(py);
-        // Set the 'result_type' key to a specific value
-        // kwargs.set_item("result_type", result_ty)?;
-
         let result_ty_vec: Vec<PyObject> = vec![result_ty];
         let inputs: Vec<_> = result_ty_vec.into_iter().chain(inputs).collect();
         let inputs = PyTuple::new(py, inputs);
 
         println!("CALLING CALLABLE ");
         let py_result = self.callable.call1(py, inputs)?;
+
+        // let kwargs = PyDict::new(py);
+        // // Set the 'result_type' key to a specific value
+        // let result_ty = DataType::to_pyarrow(&self.result_type, py)?;
+        // kwargs.set_item("result_type", result_ty)?;
+
+        // let inputs = PyTuple::new(py, inputs);
         // let py_result = self.callable.call(py, inputs, Some(kwargs))?;
 
         //////////

From 1c0f202e4644f4c309fd4d6c0f65e3b533ac506c Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Thu, 24 Aug 2023 17:30:57 -0700
Subject: [PATCH 10/18] clean up comments?

---
 crates/sparrow-runtime/src/execute.rs         | 15 ++--
 .../sparrow-runtime/src/execute/operation.rs  |  1 -
 .../execute/operation/expression_executor.rs  |  2 +-
 crates/sparrow-session/src/lib.rs             |  2 -
 crates/sparrow-session/src/session.rs         |  8 +--
 python/Pipfile                                | 11 ---
 python/pysrc/kaskada/_result.py               |  1 -
 python/pysrc/kaskada/_timestream.py           | 69 ++-----------------
 python/pysrc/kaskada/udf.py                   | 11 +--
 python/src/udf.rs                             | 42 +++--------
 10 files changed, 28 insertions(+), 134 deletions(-)
 delete mode 100644 python/Pipfile

diff --git a/crates/sparrow-runtime/src/execute.rs b/crates/sparrow-runtime/src/execute.rs
index 45630ad7d..6521447e3 100644
--- a/crates/sparrow-runtime/src/execute.rs
+++ b/crates/sparrow-runtime/src/execute.rs
@@ -24,7 +24,6 @@ use crate::execute::output::Destination;
 use crate::key_hash_inverse::{KeyHashInverse, ThreadSafeKeyHashInverse};
 use crate::stores::ObjectStoreRegistry;
 use crate::RuntimeOptions;
-pub use operation::WorkArea;
 
 mod checkpoints;
 mod compute_executor;
@@ -344,14 +343,8 @@ pub async fn materialize(
     // as the materialization does not exit, and should not need to handle cleanup tasks that regular
     // queries do. We should likely refactor this to use a separate `materialize_with_progress` method.
 
-    // TODO: Materialize with udfs
-    execute_new(
-        plan,
-        destination,
-        data_context,
-        options,
-        None,
-        &HashMap::new(),
-    )
-    .await
+    // TODO: Unimplemented feature - UDFs
+    let udfs = HashMap::new();
+
+    execute_new(plan, destination, data_context, options, None, &udfs).await
 }
diff --git a/crates/sparrow-runtime/src/execute/operation.rs b/crates/sparrow-runtime/src/execute/operation.rs
index 679f5b3af..d57eff0a6 100644
--- a/crates/sparrow-runtime/src/execute/operation.rs
+++ b/crates/sparrow-runtime/src/execute/operation.rs
@@ -68,7 +68,6 @@ use crate::execute::Error;
 use crate::key_hash_inverse::ThreadSafeKeyHashInverse;
 use crate::stores::ObjectStoreRegistry;
 use crate::Batch;
-pub use expression_executor::WorkArea;
 
 /// Information used while creating operations.
 ///
diff --git a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
index b8030d181..77e72f3bf 100644
--- a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
+++ b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
@@ -274,7 +274,7 @@ impl ExpressionExecutor {
 }
 
 /// Wrapper around the columns produced during execution.
-pub struct WorkArea {
+struct WorkArea {
     time: ArrayRef,
     subsort: ArrayRef,
     key_hash: ArrayRef,
diff --git a/crates/sparrow-session/src/lib.rs b/crates/sparrow-session/src/lib.rs
index a06a88662..bbc72adef 100644
--- a/crates/sparrow-session/src/lib.rs
+++ b/crates/sparrow-session/src/lib.rs
@@ -9,5 +9,3 @@ pub use execution::Execution;
 pub use expr::{Expr, Literal};
 pub use session::{ExecutionOptions, Session};
 pub use table::Table;
-
-// pub type Udf = dyn sparrow_instructions::Udf;
diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index 46cac7d28..22c95d1db 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -27,10 +27,10 @@ pub struct Session {
     key_hash_inverse: HashMap<GroupId, Arc<ThreadSafeKeyHashInverse>>,
     /// Keeps track of the uuid mapping.
     ///
-    /// This is a bit of a hack that allows to serialize the uuid instead of the
-    /// `Arc<dyn Udf` into the compute plan, then look up that mapping during plan
-    /// construction. Once we run on multiple machines we'll need to serialize the
-    /// udf.
+    /// We currently do not serialize the `dyn Udf` into the plan, and instead
+    /// directly use this local mapping to look up the udf from the serialized
+    /// uuid. Once we run on multiple machines, we'll have to serialize/pickle the
+    /// udf as well.
     udfs: HashMap<Uuid, Arc<dyn Udf>>,
 }
 
diff --git a/python/Pipfile b/python/Pipfile
deleted file mode 100644
index 71e4f7cbf..000000000
--- a/python/Pipfile
+++ /dev/null
@@ -1,11 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-
-[dev-packages]
-
-[requires]
-python_version = "3.9"
diff --git a/python/pysrc/kaskada/_result.py b/python/pysrc/kaskada/_result.py
index 4377a5dcf..eea4e04ac 100644
--- a/python/pysrc/kaskada/_result.py
+++ b/python/pysrc/kaskada/_result.py
@@ -46,7 +46,6 @@ def to_pyarrow(self) -> pa.Table:
         This method will block on the complete results of the query and collect
         all results into memory. If this is not desired, use `iter_pyarrow` instead.
         """
-        print("Converting to_pyarrow")
         batches = self._ffi_execution.collect_pyarrow()
         if len(batches) == 0:
             return pa.Table.from_batches([], schema=pa.schema([]))
diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py
index 1649b54c0..6dd437738 100644
--- a/python/pysrc/kaskada/_timestream.py
+++ b/python/pysrc/kaskada/_timestream.py
@@ -4,7 +4,6 @@
 
 import sys
 import warnings
-import inspect
 
 from datetime import datetime
 from datetime import timedelta
@@ -67,66 +66,9 @@ def _literal(value: Literal, session: _ffi.Session) -> Timestream:
             raise TypeError("Cannot create a literal Timestream from a datetime")
         return Timestream(_ffi.Expr.literal(session, value))
 
-    @staticmethod
-    def _call_udf(
-        udf: _ffi.Udf,
-        *args: Union[Timestream, Literal],
-        session: Optional[_ffi.Session] = None,
-    ) -> Timestream:
-        """
-        Construct a new Timestream by calling the given function.
-
-        Parameters
-        ----------
-        func : str
-            Name of the function to apply.
-        *args : Timestream | int | str | float | bool | None
-            List of arguments to the expression.
-        session : FFI Session
-            FFI Session to create the expression in.
-            If unspecified, will infer from the arguments.
-            Will fail if all arguments are literals and the session is not provided.
-
-        Returns
-        -------
-        Timestream
-            Timestream representing the result of the function applied to the arguments.
-
-        Raises
-        ------
-        # noqa: DAR401 _augment_error
-        TypeError
-            If the argument types are invalid for the given function.
-        ValueError
-            If the argument values are invalid for the given function.
-        """
-        if session is None:
-            session = next(
-                arg._ffi_expr.session() for arg in args if isinstance(arg, Timestream)
-            )
-
-        def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
-            if isinstance(arg, Timestream):
-                return arg._ffi_expr
-            else:
-                return Timestream._literal(arg, session)._ffi_expr
-
-        ffi_args = [make_arg(arg) for arg in args]
-
-        try:
-            return Timestream(
-                _ffi.Expr.call_udf(session=session, udf=udf, args=ffi_args)
-            )
-        except TypeError as e:
-            # noqa: DAR401
-            raise _augment_error(args, TypeError(str(e))) from e
-        except ValueError as e:
-            raise _augment_error(args, ValueError(str(e))) from e
-
-
     @staticmethod
     def _call(
-        func: str,
+        func: Union[str, _ffi.Udf],
         *args: Union[Timestream, Literal],
         session: Optional[_ffi.Session] = None,
     ) -> Timestream:
@@ -170,9 +112,12 @@ def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
 
         ffi_args = [make_arg(arg) for arg in args]
         try:
-            return Timestream(
-                _ffi.Expr.call(session=session, operation=func, args=ffi_args)
-            )
+            if isinstance(func, str):
+                return Timestream(_ffi.Expr.call(session=session, operation=func, args=ffi_args))
+            elif isinstance(func, _ffi.Udf):
+                return Timestream(_ffi.Expr.call_udf(session=session, udf=func, args=ffi_args))
+            else:
+                raise TypeError(f"invalid type for func. Expected str or udf, saw: {type(func)}")
         except TypeError as e:
             # noqa: DAR401
             raise _augment_error(args, TypeError(str(e))) from e
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index a598c6313..f2327bcd0 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -17,13 +17,12 @@
 class Udf:
     def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
         """Create a UDF with the given signature."""
-        # functools.update_wrapper(self, func)
+        functools.update_wrapper(self, func)
         self._ffi = _ffi.Udf(signature, func)
 
     def __call__(self, *args: Timestream | Literal) -> Timestream:
         """Apply the UDF to the given arguments."""
-        print("Calling udf")
-        return Timestream._call_udf(self._ffi, *args)
+        return Timestream._call(self._ffi, *args)
 
 def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
@@ -45,14 +44,8 @@ def _converted_func(func_type: FuncType, result_type: pa.DataType, *args: pa.Arr
     # TODO: I believe this will return a series for simple arrays, and a
     # dataframe for struct arrays. We should explore how this handles
     # different types.
-    print("CONVERTING FUNC")
-    print(f"Result type: ", result_type)
-    print(f"Args: ", args)
-    print()
-
     pd_args = [arg.to_pandas() for arg in args]
     pd_result = func_type(*pd_args)
-    # pd_result = func_type(*args)
 
     if isinstance(pd_result, pd.Series):
         return pa.Array.from_pandas(pd_result, type=result_type)
diff --git a/python/src/udf.rs b/python/src/udf.rs
index 9de1d98fe..9574d7744 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -87,46 +87,26 @@ impl PyUdfEvaluator {
             })
             .collect::<anyhow::Result<_>>()?;
 
-        // 2. Call the python function
-        let result_ty = DataType::to_pyarrow(&self.result_type, py)?;
-        let result_ty_vec: Vec<PyObject> = vec![result_ty];
-        let inputs: Vec<_> = result_ty_vec.into_iter().chain(inputs).collect();
-        let inputs = PyTuple::new(py, inputs);
+        // The callable expects [result_type, *args]
+        let mut py_inputs = vec![DataType::to_pyarrow(&self.result_type, py)?];
+        py_inputs.extend(inputs);
 
-        println!("CALLING CALLABLE ");
-        let py_result = self.callable.call1(py, inputs)?;
-
-        // let kwargs = PyDict::new(py);
-        // // Set the 'result_type' key to a specific value
-        // let result_ty = DataType::to_pyarrow(&self.result_type, py)?;
-        // kwargs.set_item("result_type", result_ty)?;
-
-        // let inputs = PyTuple::new(py, inputs);
-        // let py_result = self.callable.call(py, inputs, Some(kwargs))?;
-
-        //////////
-        // let mut new_args: Vec<PyObject> = vec![DataType::to_pyarrow(&self.result_type, py)?];
+        // let inputs: Vec<_> = result_ty_vec.into_iter().chain(inputs).collect();
+        let inputs = PyTuple::new(py, py_inputs);
 
-        // // Collect the other arguments
-        // for input in inputs.iter() {
-        //     let array_data: ArrayData = input.to_data();
-        //     let py_obj: PyObject = array_data.to_pyarrow(py)?;
-        //     new_args.push(py_obj);
-        // }
-
-        // println!("New args: {:?}", new_args);
-        // let args = PyTuple::new(py, new_args);
-        // let py_result = self.callable.call(py, args, Some(kwargs))?;
+        // 3. Execute the udf
+        let py_result = self.callable.call1(py, inputs)?;
 
         // 3. Convert the result from pyarrow array to arrow array
         let result_array_data: ArrayData = ArrayData::from_pyarrow(py_result.as_ref(py))?;
 
-        // We can't control the returned type, but we can refuse to accept the "wrong" type.
+        // We attempt to coerce the return type into our expected result type, but this
+        // check exists to ensure the roundtrip type is as expected.
         anyhow::ensure!(
             *result_array_data.data_type() == self.result_type,
             "expected Python UDF to return type {:?}, but received {:?}",
-            result_array_data.data_type(),
             self.result_type
+            result_array_data.data_type(),
         );
 
         let result: ArrayRef = arrow::array::make_array(result_array_data);
@@ -139,9 +119,7 @@ impl Udf {
     #[new]
     #[pyo3(signature = (signature, callable))]
     fn new(signature: String, callable: Py<PyAny>) -> Self {
-        println!("Creating udf with signature: {:?}", signature);
         let uuid = Uuid::new_v4();
-        // TODO: sig name string cannot be &'static str
         let signature = Signature::try_from_str(FeatureSetPart::Function("udf"), &signature)
             .expect("signature to parse");
         let udf = PyUdf {

From 1e356d2230a4728bdc78817041c56ce4a0fd0d6c Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Thu, 24 Aug 2023 20:07:24 -0700
Subject: [PATCH 11/18] Add another udf for unit test

---
 python/pysrc/kaskada/__init__.py              |  4 ++-
 python/pysrc/kaskada/_ffi.pyi                 |  7 +----
 python/pysrc/kaskada/_timestream.py           | 13 ++++++---
 python/pysrc/kaskada/udf.py                   | 14 ++++++---
 .../golden/udf_test/test_add_udf_direct.jsonl |  6 ++++
 .../golden/udf_test/test_add_udf_pipe.jsonl   |  6 ++++
 python/pytests/udf_test.py                    | 29 +++++++++++++++----
 python/src/udf.rs                             |  2 +-
 8 files changed, 60 insertions(+), 21 deletions(-)
 create mode 100644 python/pytests/golden/udf_test/test_add_udf_direct.jsonl
 create mode 100644 python/pytests/golden/udf_test/test_add_udf_pipe.jsonl

diff --git a/python/pysrc/kaskada/__init__.py b/python/pysrc/kaskada/__init__.py
index 5c2d73fe5..620bb8233 100644
--- a/python/pysrc/kaskada/__init__.py
+++ b/python/pysrc/kaskada/__init__.py
@@ -10,8 +10,9 @@
 from ._timestream import Literal
 from ._timestream import Timestream
 from ._timestream import record
-from .udf import Udf
 from .udf import udf
+from .udf import Udf
+
 
 __all__ = [
     "ExecutionOptions",
@@ -22,6 +23,7 @@
     "Result",
     "sources",
     "Timestream",
+    "Udf",
     "udf",
     "windows",
 ]
diff --git a/python/pysrc/kaskada/_ffi.pyi b/python/pysrc/kaskada/_ffi.pyi
index 80423ba4c..07791114b 100644
--- a/python/pysrc/kaskada/_ffi.pyi
+++ b/python/pysrc/kaskada/_ffi.pyi
@@ -41,7 +41,6 @@ class Expr:
     def session(self) -> Session: ...
     def execute(self, options: Optional[ExecutionOptions] = None) -> Execution: ...
     def grouping(self) -> Optional[str]: ...
-    
 
 class Table(Expr):
     def __init__(
@@ -60,8 +59,4 @@ class Table(Expr):
     def add_pyarrow(self, data: pa.RecordBatch) -> None: ...
 
 class Udf(object):
-    def __init__(
-        self,
-        result_ty: str,
-        result_fn: Callable[..., pa.Array]
-    ) -> None: ...
\ No newline at end of file
+    def __init__(self, result_ty: str, result_fn: Callable[..., pa.Array]) -> None: ...
diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py
index 6dd437738..fc9c92bce 100644
--- a/python/pysrc/kaskada/_timestream.py
+++ b/python/pysrc/kaskada/_timestream.py
@@ -4,7 +4,6 @@
 
 import sys
 import warnings
-
 from datetime import datetime
 from datetime import timedelta
 from typing import Callable
@@ -113,11 +112,17 @@ def make_arg(arg: Union[Timestream, Literal]) -> _ffi.Expr:
         ffi_args = [make_arg(arg) for arg in args]
         try:
             if isinstance(func, str):
-                return Timestream(_ffi.Expr.call(session=session, operation=func, args=ffi_args))
+                return Timestream(
+                    _ffi.Expr.call(session=session, operation=func, args=ffi_args)
+                )
             elif isinstance(func, _ffi.Udf):
-                return Timestream(_ffi.Expr.call_udf(session=session, udf=func, args=ffi_args))
+                return Timestream(
+                    _ffi.Expr.call_udf(session=session, udf=func, args=ffi_args)
+                )
             else:
-                raise TypeError(f"invalid type for func. Expected str or udf, saw: {type(func)}")
+                raise TypeError(
+                    f"invalid type for func. Expected str or udf, saw: {type(func)}"
+                )
         except TypeError as e:
             # noqa: DAR401
             raise _augment_error(args, TypeError(str(e))) from e
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index f2327bcd0..febccb9a7 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -7,13 +7,15 @@
 import pandas as pd
 import pyarrow as pa
 
-from ._timestream import Timestream
-from ._timestream import Literal
 from . import _ffi
+from ._timestream import Literal
+from ._timestream import Timestream
+
 
 # TODO: Allow functions to return `pd.DataFrame` for struct arrays.
 FuncType = Callable[..., pd.Series]
 
+
 class Udf:
     def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
         """Create a UDF with the given signature."""
@@ -24,6 +26,7 @@ def __call__(self, *args: Timestream | Literal) -> Timestream:
         """Apply the UDF to the given arguments."""
         return Timestream._call(self._ffi, *args)
 
+
 def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
 
@@ -39,7 +42,10 @@ def decorator(func_type: FuncType):
 
     return decorator
 
-def _converted_func(func_type: FuncType, result_type: pa.DataType, *args: pa.Array) -> pa.Array:
+
+def _converted_func(
+    func_type: FuncType, result_type: pa.DataType, *args: pa.Array
+) -> pa.Array:
     """Run the function producing the given result type."""
     # TODO: I believe this will return a series for simple arrays, and a
     # dataframe for struct arrays. We should explore how this handles
@@ -50,4 +56,4 @@ def _converted_func(func_type: FuncType, result_type: pa.DataType, *args: pa.Arr
     if isinstance(pd_result, pd.Series):
         return pa.Array.from_pandas(pd_result, type=result_type)
     else:
-        raise TypeError(f"Unsupported result type: {type(pd_result)}")
\ No newline at end of file
+        raise TypeError(f"Unsupported result type: {type(pd_result)}")
diff --git a/python/pytests/golden/udf_test/test_add_udf_direct.jsonl b/python/pytests/golden/udf_test/test_add_udf_direct.jsonl
new file mode 100644
index 000000000..e1314e28a
--- /dev/null
+++ b/python/pytests/golden/udf_test/test_add_udf_direct.jsonl
@@ -0,0 +1,6 @@
+{"_time":"1996-12-19T16:39:57.000000000","_key":"A","m":5.0,"n":10.0,"add":15.0,"add_x2":30.0}
+{"_time":"1996-12-19T16:39:58.000000000","_key":"B","m":24.0,"n":3.0,"add":27.0,"add_x2":54.0}
+{"_time":"1996-12-19T16:39:59.000000000","_key":"A","m":17.0,"n":6.0,"add":23.0,"add_x2":46.0}
+{"_time":"1996-12-19T16:40:00.000000000","_key":"A","m":null,"n":9.0,"add":null,"add_x2":null}
+{"_time":"1996-12-19T16:40:01.000000000","_key":"A","m":12.0,"n":null,"add":null,"add_x2":null}
+{"_time":"1996-12-19T16:40:02.000000000","_key":"A","m":null,"n":null,"add":null,"add_x2":null}
diff --git a/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl b/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl
new file mode 100644
index 000000000..e1314e28a
--- /dev/null
+++ b/python/pytests/golden/udf_test/test_add_udf_pipe.jsonl
@@ -0,0 +1,6 @@
+{"_time":"1996-12-19T16:39:57.000000000","_key":"A","m":5.0,"n":10.0,"add":15.0,"add_x2":30.0}
+{"_time":"1996-12-19T16:39:58.000000000","_key":"B","m":24.0,"n":3.0,"add":27.0,"add_x2":54.0}
+{"_time":"1996-12-19T16:39:59.000000000","_key":"A","m":17.0,"n":6.0,"add":23.0,"add_x2":46.0}
+{"_time":"1996-12-19T16:40:00.000000000","_key":"A","m":null,"n":9.0,"add":null,"add_x2":null}
+{"_time":"1996-12-19T16:40:01.000000000","_key":"A","m":12.0,"n":null,"add":null,"add_x2":null}
+{"_time":"1996-12-19T16:40:02.000000000","_key":"A","m":null,"n":null,"add":null,"add_x2":null}
diff --git a/python/pytests/udf_test.py b/python/pytests/udf_test.py
index f58fe906b..366b51bd5 100644
--- a/python/pytests/udf_test.py
+++ b/python/pytests/udf_test.py
@@ -1,23 +1,31 @@
-import pandas as pd
-import pyarrow as pa
 import kaskada as kd
 import kaskada._ffi as _ffi
+import pandas as pd
 import pytest
 
+
 @kd.udf("add<N: number>(x: N, y: N) -> N")
 def add(x: pd.Series, y: pd.Series) -> pd.Series:
     """Use Pandas to add two numbers."""
     return x + y
 
 
+@kd.udf("add_x2<N: number>(x: N, y: N) -> N")
+def add_x2(x: pd.Series, y: pd.Series) -> pd.Series:
+    """Use Pandas to add and multiple"""
+    return (x + y) * 2
+
+
+@pytest.mark.skip(reason="currently not working")
 def test_docstring() -> None:
-    assert add.__doc__ == "Use Pandas to add two numbers."    
+    assert add.__doc__ == "Use Pandas to add two numbers."
 
 
 def test_udf_instance() -> None:
     assert isinstance(add, kd.Udf)
     assert isinstance(add._ffi, _ffi.Udf)
 
+
 @pytest.fixture
 def source_int64() -> kd.sources.CsvString:
     content = "\n".join(
@@ -33,12 +41,23 @@ def source_int64() -> kd.sources.CsvString:
     )
     return kd.sources.CsvString(content, time_column_name="time", key_column_name="key")
 
+
 def test_add_udf_direct(golden, source_int64) -> None:
     m = source_int64.col("m")
     n = source_int64.col("n")
-    golden.jsonl(add(m, n))
+    golden.jsonl(kd.record({"m": m, "n": n, "add": add(m, n), "add_x2": add_x2(m, n)}))
+
 
 def test_add_udf_pipe(golden, source_int64) -> None:
     m = source_int64.col("m")
     n = source_int64.col("n")
-    golden.jsonl(m.pipe(add, n))
+    golden.jsonl(
+        kd.record(
+            {
+                "m": m,
+                "n": n,
+                "add": m.pipe(add, n),
+                "add_x2": m.pipe(add_x2, n),
+            }
+        )
+    )
diff --git a/python/src/udf.rs b/python/src/udf.rs
index 9574d7744..f58e5a773 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -105,7 +105,7 @@ impl PyUdfEvaluator {
         anyhow::ensure!(
             *result_array_data.data_type() == self.result_type,
             "expected Python UDF to return type {:?}, but received {:?}",
-            self.result_type
+            self.result_type,
             result_array_data.data_type(),
         );
 

From e4149af9e0bff6bef7ad314974d7e672214754fb Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Thu, 24 Aug 2023 20:12:10 -0700
Subject: [PATCH 12/18] remove prints

---
 crates/sparrow-compiler/src/ast_to_dfg.rs              | 1 -
 crates/sparrow-compiler/src/plan/expression_to_plan.rs | 5 +----
 crates/sparrow-instructions/src/evaluators.rs          | 5 +----
 python/src/expr.rs                                     | 3 +--
 python/src/udf.rs                                      | 8 +++-----
 5 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/crates/sparrow-compiler/src/ast_to_dfg.rs b/crates/sparrow-compiler/src/ast_to_dfg.rs
index 84be28e3a..c43ef25ab 100644
--- a/crates/sparrow-compiler/src/ast_to_dfg.rs
+++ b/crates/sparrow-compiler/src/ast_to_dfg.rs
@@ -104,7 +104,6 @@ pub fn add_udf_to_dfg(
     data_context: &mut DataContext,
     diagnostics: &mut DiagnosticCollector<'_>,
 ) -> anyhow::Result<AstDfgRef> {
-    println!("sparrow_compiler::add_udf_to_dfg");
     let argument_types = arguments.transform(|i| i.with_value(i.value_type().clone()));
     let signature = udf.signature();
 
diff --git a/crates/sparrow-compiler/src/plan/expression_to_plan.rs b/crates/sparrow-compiler/src/plan/expression_to_plan.rs
index 99495e4fe..c3beb3a24 100644
--- a/crates/sparrow-compiler/src/plan/expression_to_plan.rs
+++ b/crates/sparrow-compiler/src/plan/expression_to_plan.rs
@@ -35,10 +35,7 @@ pub(super) fn dfg_to_plan(
                 InstKind::FieldRef => "field_ref".to_owned(),
                 InstKind::Record => "record".to_owned(),
                 InstKind::Cast(_) => "cast".to_owned(),
-                InstKind::Udf(udf) => {
-                    println!("dfg_to_plan udf: {:?}", udf.signature().name());
-                    udf.uuid().to_string()
-                }
+                InstKind::Udf(udf) => udf.uuid().to_string(),
             };
 
             let result_type =
diff --git a/crates/sparrow-instructions/src/evaluators.rs b/crates/sparrow-instructions/src/evaluators.rs
index d0cc6950f..f5c84248a 100644
--- a/crates/sparrow-instructions/src/evaluators.rs
+++ b/crates/sparrow-instructions/src/evaluators.rs
@@ -160,10 +160,7 @@ pub fn create_evaluator(info: StaticInfo<'_>) -> anyhow::Result<Box<dyn Evaluato
             Ok(CastEvaluator::try_new(info)?)
         }
         InstKind::Record => Ok(RecordEvaluator::try_new(info)?),
-        InstKind::Udf(udf) => {
-            println!("Making udf evaluator");
-            Ok(udf.make_evaluator(info))
-        }
+        InstKind::Udf(udf) => Ok(udf.make_evaluator(info)),
     }
 }
 
diff --git a/python/src/expr.rs b/python/src/expr.rs
index d0f5111ca..17deccf64 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -47,11 +47,10 @@ impl Expr {
 
     /// Create a new udf.
     ///
-    /// This creates a new udf based on the `udf` and `args` provided.
+    /// This creates a new expression based on the `udf` and `args` provided.
     #[staticmethod]
     #[pyo3(signature = (session, udf, args))]
     fn call_udf(session: Session, udf: Udf, args: Vec<Expr>) -> PyResult<Self> {
-        println!("SparrowExpr::call_udf");
         let sparrow_udf = udf.0;
         if !args.iter().all(|e| e.session() == session) {
             return Err(PyValueError::new_err(
diff --git a/python/src/udf.rs b/python/src/udf.rs
index f58e5a773..d42cf1424 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -26,7 +26,6 @@ struct PyUdfEvaluator {
 
 #[derive(Clone)]
 #[pyclass]
-// TODO: Does it need to extend Expr for some reason?
 pub(crate) struct Udf(pub Arc<dyn sparrow_instructions::Udf>);
 
 impl sparrow_instructions::Udf for PyUdf {
@@ -34,12 +33,12 @@ impl sparrow_instructions::Udf for PyUdf {
         &self.signature
     }
 
-    // FRAZ: Should we allow this to return an error?
     fn make_evaluator(&self, info: StaticInfo<'_>) -> Box<dyn Evaluator> {
-        // TODO: Idea: Create a random value of each argument type and verify that the callbale
+        // Idea: Create a random value of each argument type and verify that the callbale
         // is invokable on that. This would let us detect errors early.
+        //
+        // Though, it still wouldn't give us compile-time errors.
 
-        println!("Making python udf evaluator");
         let result_type = info.result_type.clone();
         let args = info.args.iter().map(|arg| arg.value_ref.clone()).collect();
 
@@ -57,7 +56,6 @@ impl sparrow_instructions::Udf for PyUdf {
 
 impl Evaluator for PyUdfEvaluator {
     fn evaluate(&mut self, info: &dyn RuntimeInfo) -> anyhow::Result<ArrayRef> {
-        println!("Evaluating Python UDF");
         let inputs = self
             .args
             .iter()

From be6ded4feddfeaa6e25bc5670c49547f90232389 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Thu, 24 Aug 2023 20:26:44 -0700
Subject: [PATCH 13/18] linting

---
 python/pysrc/kaskada/__init__.py | 2 +-
 python/pysrc/kaskada/udf.py      | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/pysrc/kaskada/__init__.py b/python/pysrc/kaskada/__init__.py
index 620bb8233..b4d5db186 100644
--- a/python/pysrc/kaskada/__init__.py
+++ b/python/pysrc/kaskada/__init__.py
@@ -10,8 +10,8 @@
 from ._timestream import Literal
 from ._timestream import Timestream
 from ._timestream import record
-from .udf import udf
 from .udf import Udf
+from .udf import udf
 
 
 __all__ = [
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index febccb9a7..36c0a6d14 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -17,6 +17,8 @@
 
 
 class Udf:
+    """User defined function."""
+
     def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
         """Create a UDF with the given signature."""
         functools.update_wrapper(self, func)
@@ -35,7 +37,8 @@ def decorator(func_type: FuncType):
         # This needs to take the PyArrow result type and PyArrow inputs,
         # convert them to Pandas, call the function, and convert the result
         # back to PyArrow.
-        func = lambda result_type, *args: _converted_func(func_type, result_type, *args)
+        def func(result_type, *args):
+            _converted_func(func_type, result_type, *args)
 
         # 2. Create the UDF object.
         return Udf(signature, func)

From e291e1f451401bc92255ae1b0473b211689812f6 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Fri, 25 Aug 2023 10:21:54 -0700
Subject: [PATCH 14/18] Linting / print remove

---
 .../src/execute/operation/expression_executor.rs            | 1 -
 python/pysrc/kaskada/udf.py                                 | 1 +
 python/pytests/udf_test.py                                  | 2 +-
 python/src/expr.rs                                          | 1 -
 python/src/udf.rs                                           | 6 +++---
 5 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
index 77e72f3bf..776821808 100644
--- a/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
+++ b/crates/sparrow-runtime/src/execute/operation/expression_executor.rs
@@ -98,7 +98,6 @@ impl ExpressionExecutor {
                 Operator::Instruction(inst) => {
                     // TODO: This could probably be cleaned up, possibly by eliminating
                     // `inst kind` entirely.
-                    println!("Inst: {:?}", inst);
                     let inst_kind = if inst == "field_ref" {
                         InstKind::FieldRef
                     } else if inst == "record" {
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 36c0a6d14..76a5f19a2 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -56,6 +56,7 @@ def _converted_func(
     pd_args = [arg.to_pandas() for arg in args]
     pd_result = func_type(*pd_args)
 
+    print("Result type: ", result_type)
     if isinstance(pd_result, pd.Series):
         return pa.Array.from_pandas(pd_result, type=result_type)
     else:
diff --git a/python/pytests/udf_test.py b/python/pytests/udf_test.py
index 366b51bd5..f05b4e4b1 100644
--- a/python/pytests/udf_test.py
+++ b/python/pytests/udf_test.py
@@ -12,7 +12,7 @@ def add(x: pd.Series, y: pd.Series) -> pd.Series:
 
 @kd.udf("add_x2<N: number>(x: N, y: N) -> N")
 def add_x2(x: pd.Series, y: pd.Series) -> pd.Series:
-    """Use Pandas to add and multiple"""
+    """Use Pandas to add then multiply by 2"""
     return (x + y) * 2
 
 
diff --git a/python/src/expr.rs b/python/src/expr.rs
index 17deccf64..5c427305d 100644
--- a/python/src/expr.rs
+++ b/python/src/expr.rs
@@ -7,7 +7,6 @@ use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
 use sparrow_session::{Expr as RustExpr, Literal, Session as RustSession};
-use std::sync::Arc;
 
 /// Kaskada expression node.
 #[derive(Clone)]
diff --git a/python/src/udf.rs b/python/src/udf.rs
index d42cf1424..9c30d0c0d 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -2,12 +2,12 @@ use itertools::Itertools;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use arrow::array::{new_empty_array, ArrayData, ArrayRef};
+use arrow::array::{ArrayData, ArrayRef};
 use arrow::datatypes::DataType;
 use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::prelude::*;
-use pyo3::types::{PyDict, PyTuple};
-use sparrow_instructions::{ColumnarValue, Evaluator, RuntimeInfo, StaticInfo, ValueRef};
+use pyo3::types::PyTuple;
+use sparrow_instructions::{Evaluator, RuntimeInfo, StaticInfo, ValueRef};
 use sparrow_syntax::{FeatureSetPart, Signature};
 use uuid::Uuid;
 

From 05bdfaf62d416e1b0f6a353d58f7a9766979b9e6 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Fri, 25 Aug 2023 10:51:58 -0700
Subject: [PATCH 15/18] cleanup comments

---
 python/src/udf.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/src/udf.rs b/python/src/udf.rs
index 9c30d0c0d..e0be6c650 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -75,7 +75,7 @@ impl Evaluator for PyUdfEvaluator {
 
 impl PyUdfEvaluator {
     fn evaluate_py<'py>(&self, py: Python<'py>, inputs: Vec<ArrayRef>) -> anyhow::Result<ArrayRef> {
-        // 1. Convert all arg inputs into pyarrow arrays then to python objects
+        // Convert all arg inputs into pyarrow arrays then to python objects
         let inputs: Vec<PyObject> = inputs
             .iter()
             .map::<anyhow::Result<_>, _>(|arg_array| {
@@ -89,13 +89,11 @@ impl PyUdfEvaluator {
         let mut py_inputs = vec![DataType::to_pyarrow(&self.result_type, py)?];
         py_inputs.extend(inputs);
 
-        // let inputs: Vec<_> = result_ty_vec.into_iter().chain(inputs).collect();
         let inputs = PyTuple::new(py, py_inputs);
 
-        // 3. Execute the udf
+        // Execute the python udf
         let py_result = self.callable.call1(py, inputs)?;
 
-        // 3. Convert the result from pyarrow array to arrow array
         let result_array_data: ArrayData = ArrayData::from_pyarrow(py_result.as_ref(py))?;
 
         // We attempt to coerce the return type into our expected result type, but this

From 23a9c39f8243891b9973e76ca2911a5ca243eef6 Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Fri, 25 Aug 2023 11:12:48 -0700
Subject: [PATCH 16/18] Actually return from the udf

---
 crates/sparrow-runtime/src/execute/compute_executor.rs | 3 ++-
 crates/sparrow-runtime/src/execute/operation.rs        | 1 +
 crates/sparrow-session/src/session.rs                  | 2 +-
 python/pysrc/kaskada/udf.py                            | 2 +-
 python/src/udf.rs                                      | 4 ++--
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/crates/sparrow-runtime/src/execute/compute_executor.rs b/crates/sparrow-runtime/src/execute/compute_executor.rs
index fb63354e9..7faf111a9 100644
--- a/crates/sparrow-runtime/src/execute/compute_executor.rs
+++ b/crates/sparrow-runtime/src/execute/compute_executor.rs
@@ -51,6 +51,7 @@ pub struct ComputeResult {
 
 impl ComputeExecutor {
     /// Spawns the compute tasks using the new operation based executor.
+    #[allow(clippy::too_many_arguments)]
     pub async fn try_spawn(
         mut context: OperationContext,
         plan_hash: PlanHash,
@@ -151,7 +152,7 @@ impl ComputeExecutor {
                         max_event_time_tx.clone(),
                         late_bindings,
                         stop_signal_rx.clone(),
-                        &udfs,
+                        udfs,
                     )
                     .await?,
             );
diff --git a/crates/sparrow-runtime/src/execute/operation.rs b/crates/sparrow-runtime/src/execute/operation.rs
index d57eff0a6..b642d0714 100644
--- a/crates/sparrow-runtime/src/execute/operation.rs
+++ b/crates/sparrow-runtime/src/execute/operation.rs
@@ -169,6 +169,7 @@ impl OperationExecutor {
     /// This function will not return until the operation is complete -- either
     /// because all inputs have been processed or because it has been
     /// interrupted due to an error or reaching a preview-rows limit, etc.
+    #[allow(clippy::too_many_arguments)]
     pub async fn execute(
         self,
         operation_index: usize,
diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index 22c95d1db..e1f1cb908 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -340,7 +340,7 @@ impl Session {
                 .collect();
             Err(Error::Errors(errors))?
         } else {
-            self.udfs.insert(udf.uuid().clone(), udf.clone());
+            self.udfs.insert(*udf.uuid(), udf.clone());
             Ok(Expr(result))
         }
     }
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 76a5f19a2..8d7058257 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -38,7 +38,7 @@ def decorator(func_type: FuncType):
         # convert them to Pandas, call the function, and convert the result
         # back to PyArrow.
         def func(result_type, *args):
-            _converted_func(func_type, result_type, *args)
+            return _converted_func(func_type, result_type, *args)
 
         # 2. Create the UDF object.
         return Udf(signature, func)
diff --git a/python/src/udf.rs b/python/src/udf.rs
index e0be6c650..0b8634ab3 100644
--- a/python/src/udf.rs
+++ b/python/src/udf.rs
@@ -59,7 +59,7 @@ impl Evaluator for PyUdfEvaluator {
         let inputs = self
             .args
             .iter()
-            .map(|value_ref| -> anyhow::Result<_> { Ok(info.value(value_ref)?.array_ref()?) })
+            .map(|value_ref| -> anyhow::Result<_> { info.value(value_ref)?.array_ref() })
             .try_collect()?;
 
         let result =
@@ -74,7 +74,7 @@ impl Evaluator for PyUdfEvaluator {
 }
 
 impl PyUdfEvaluator {
-    fn evaluate_py<'py>(&self, py: Python<'py>, inputs: Vec<ArrayRef>) -> anyhow::Result<ArrayRef> {
+    fn evaluate_py(&self, py: Python<'_>, inputs: Vec<ArrayRef>) -> anyhow::Result<ArrayRef> {
         // Convert all arg inputs into pyarrow arrays then to python objects
         let inputs: Vec<PyObject> = inputs
             .iter()

From 0683b00630aaa7fa20daf61be1abf9163117d51e Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Fri, 25 Aug 2023 12:37:10 -0700
Subject: [PATCH 17/18] mypy and cargo test

---
 crates/sparrow-runtime/src/execute/operation/scan.rs    | 2 ++
 crates/sparrow-runtime/src/execute/operation/testing.rs | 3 +++
 python/pysrc/kaskada/_ffi.pyi                           | 1 -
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/sparrow-runtime/src/execute/operation/scan.rs b/crates/sparrow-runtime/src/execute/operation/scan.rs
index 1c87e848a..a1f2ae204 100644
--- a/crates/sparrow-runtime/src/execute/operation/scan.rs
+++ b/crates/sparrow-runtime/src/execute/operation/scan.rs
@@ -361,6 +361,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
     use arrow::record_batch::RecordBatch;
     use futures::StreamExt;
+    use hashbrown::HashMap;
     use itertools::Itertools;
     use sparrow_api::kaskada::v1alpha::compute_table::FileSet;
     use sparrow_api::kaskada::v1alpha::operation_input_ref::{self, Column};
@@ -519,6 +520,7 @@ mod tests {
                 max_event_tx,
                 &Default::default(),
                 None,
+                &HashMap::new(),
             )
             .await
             .unwrap()
diff --git a/crates/sparrow-runtime/src/execute/operation/testing.rs b/crates/sparrow-runtime/src/execute/operation/testing.rs
index be209ecd5..0a89feb7d 100644
--- a/crates/sparrow-runtime/src/execute/operation/testing.rs
+++ b/crates/sparrow-runtime/src/execute/operation/testing.rs
@@ -3,6 +3,7 @@ use std::sync::Arc;
 use anyhow::Context;
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use arrow::record_batch::RecordBatch;
+use hashbrown::HashMap;
 use itertools::Itertools;
 use sparrow_api::kaskada::v1alpha::{ComputePlan, OperationPlan};
 use sparrow_compiler::DataContext;
@@ -198,6 +199,7 @@ pub(super) async fn run_operation(
             max_event_tx,
             &Default::default(),
             None,
+            &HashMap::new(),
         )
         .await
         .unwrap()
@@ -255,6 +257,7 @@ pub(super) async fn run_operation_json(
             max_event_tx,
             &Default::default(),
             None,
+            &HashMap::new(),
         )
         .await
         .unwrap()
diff --git a/python/pysrc/kaskada/_ffi.pyi b/python/pysrc/kaskada/_ffi.pyi
index 07791114b..e05f87856 100644
--- a/python/pysrc/kaskada/_ffi.pyi
+++ b/python/pysrc/kaskada/_ffi.pyi
@@ -6,7 +6,6 @@ from typing import Sequence
 import pyarrow as pa
 
 from ._execution import ExecutionOptions
-from .udf import Udf
 
 class Session:
     def __init__(self) -> None: ...

From 5ebc7e7a28cfecc46eee6f32c9d465ba14c243dd Mon Sep 17 00:00:00 2001
From: Jordan Frazier <jordan.frazier@datastax.com>
Date: Mon, 28 Aug 2023 11:04:04 -0700
Subject: [PATCH 18/18] Move udfs into operation context

---
 crates/sparrow-runtime/src/execute.rs           |  8 ++++----
 .../src/execute/compute_executor.rs             |  5 -----
 crates/sparrow-runtime/src/execute/operation.rs |  6 +++---
 .../src/execute/operation/scan.rs               |  2 +-
 .../src/execute/operation/testing.rs            |  4 ++--
 crates/sparrow-session/src/session.rs           |  2 +-
 python/.vscode/settings.json                    |  3 +++
 python/pysrc/kaskada/__init__.py                |  2 --
 python/pysrc/kaskada/udf.py                     | 17 ++++++++++-------
 python/pytests/udf_test.py                      | 12 ++++++------
 10 files changed, 30 insertions(+), 31 deletions(-)
 create mode 100644 python/.vscode/settings.json

diff --git a/crates/sparrow-runtime/src/execute.rs b/crates/sparrow-runtime/src/execute.rs
index 6521447e3..37b8f33ef 100644
--- a/crates/sparrow-runtime/src/execute.rs
+++ b/crates/sparrow-runtime/src/execute.rs
@@ -75,7 +75,7 @@ pub async fn execute(
         data_context,
         options,
         None,
-        &HashMap::new(),
+        HashMap::new(),
     )
     .await
 }
@@ -234,7 +234,7 @@ pub async fn execute_new(
     mut data_context: DataContext,
     options: ExecutionOptions,
     key_hash_inverse: Option<Arc<ThreadSafeKeyHashInverse>>,
-    udfs: &HashMap<Uuid, Arc<dyn Udf>>,
+    udfs: HashMap<Uuid, Arc<dyn Udf>>,
 ) -> error_stack::Result<impl Stream<Item = error_stack::Result<ExecuteResponse, Error>>, Error> {
     let object_stores = Arc::new(ObjectStoreRegistry::default());
 
@@ -280,6 +280,7 @@ pub async fn execute_new(
         output_at_time,
         bounded_lateness_ns: options.bounded_lateness_ns,
         materialize: options.materialize,
+        udfs,
     };
 
     // Start executing the query. We pass the response channel to the
@@ -301,7 +302,6 @@ pub async fn execute_new(
         progress_updates_rx,
         destination,
         options.stop_signal_rx,
-        udfs,
     )
     .await
     .change_context(Error::internal_msg("spawn compute executor"))?;
@@ -346,5 +346,5 @@ pub async fn materialize(
     // TODO: Unimplemented feature - UDFs
     let udfs = HashMap::new();
 
-    execute_new(plan, destination, data_context, options, None, &udfs).await
+    execute_new(plan, destination, data_context, options, None, udfs).await
 }
diff --git a/crates/sparrow-runtime/src/execute/compute_executor.rs b/crates/sparrow-runtime/src/execute/compute_executor.rs
index 7faf111a9..155869f5b 100644
--- a/crates/sparrow-runtime/src/execute/compute_executor.rs
+++ b/crates/sparrow-runtime/src/execute/compute_executor.rs
@@ -5,19 +5,16 @@ use enum_map::EnumMap;
 use error_stack::{IntoReport, IntoReportCompat, ResultExt};
 use futures::stream::{FuturesUnordered, PollNext};
 use futures::{FutureExt, Stream, TryFutureExt};
-use hashbrown::HashMap;
 use prost_wkt_types::Timestamp;
 use sparrow_api::kaskada::v1alpha::ComputeSnapshot;
 use sparrow_api::kaskada::v1alpha::{ExecuteResponse, LateBoundValue, PlanHash};
 use sparrow_arrow::scalar_value::ScalarValue;
-use sparrow_instructions::Udf;
 use sparrow_qfr::io::writer::FlightRecordWriter;
 use sparrow_qfr::kaskada::sparrow::v1alpha::FlightRecordHeader;
 use sparrow_qfr::FlightRecorderFactory;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 use tokio_stream::StreamExt;
 use tracing::{error, info, info_span, Instrument};
-use uuid::Uuid;
 
 use crate::execute::compute_store_guard::ComputeStoreGuard;
 use crate::execute::operation::{OperationContext, OperationExecutor};
@@ -60,7 +57,6 @@ impl ComputeExecutor {
         progress_updates_rx: tokio::sync::mpsc::Receiver<ProgressUpdate>,
         destination: Destination,
         stop_signal_rx: Option<tokio::sync::watch::Receiver<bool>>,
-        udfs: &HashMap<Uuid, Arc<dyn Udf>>,
     ) -> error_stack::Result<Self, Error> {
         let mut spawner = ComputeTaskSpawner::new();
 
@@ -152,7 +148,6 @@ impl ComputeExecutor {
                         max_event_time_tx.clone(),
                         late_bindings,
                         stop_signal_rx.clone(),
-                        udfs,
                     )
                     .await?,
             );
diff --git a/crates/sparrow-runtime/src/execute/operation.rs b/crates/sparrow-runtime/src/execute/operation.rs
index b642d0714..a2a558fa1 100644
--- a/crates/sparrow-runtime/src/execute/operation.rs
+++ b/crates/sparrow-runtime/src/execute/operation.rs
@@ -105,6 +105,8 @@ pub(crate) struct OperationContext {
     ///
     /// Derived from the ExecutionOptions,
     pub materialize: bool,
+    /// Mapping of uuid to user-defined functions.
+    pub udfs: HashMap<Uuid, Arc<dyn Udf>>,
 }
 
 impl OperationContext {
@@ -169,7 +171,6 @@ impl OperationExecutor {
     /// This function will not return until the operation is complete -- either
     /// because all inputs have been processed or because it has been
     /// interrupted due to an error or reaching a preview-rows limit, etc.
-    #[allow(clippy::too_many_arguments)]
     pub async fn execute(
         self,
         operation_index: usize,
@@ -178,7 +179,6 @@ impl OperationExecutor {
         max_event_time_tx: tokio::sync::mpsc::UnboundedSender<Timestamp>,
         late_bindings: &EnumMap<LateBoundValue, Option<ScalarValue>>,
         stop_signal_rx: Option<tokio::sync::watch::Receiver<bool>>,
-        udfs: &HashMap<Uuid, Arc<dyn Udf>>,
     ) -> Result<impl Future<Output = Result<(), Error>>, Error> {
         let Self {
             operation,
@@ -195,7 +195,7 @@ impl OperationExecutor {
             operation_label,
             operation.expressions,
             late_bindings,
-            udfs,
+            &context.udfs,
         )
         .into_report()
         .change_context(Error::internal_msg("unable to create executor"))?;
diff --git a/crates/sparrow-runtime/src/execute/operation/scan.rs b/crates/sparrow-runtime/src/execute/operation/scan.rs
index a1f2ae204..1bb1c3eb4 100644
--- a/crates/sparrow-runtime/src/execute/operation/scan.rs
+++ b/crates/sparrow-runtime/src/execute/operation/scan.rs
@@ -510,6 +510,7 @@ mod tests {
             output_at_time: None,
             bounded_lateness_ns: None,
             materialize: false,
+            udfs: HashMap::new(),
         };
 
         executor
@@ -520,7 +521,6 @@ mod tests {
                 max_event_tx,
                 &Default::default(),
                 None,
-                &HashMap::new(),
             )
             .await
             .unwrap()
diff --git a/crates/sparrow-runtime/src/execute/operation/testing.rs b/crates/sparrow-runtime/src/execute/operation/testing.rs
index 0a89feb7d..1ef6b5bb1 100644
--- a/crates/sparrow-runtime/src/execute/operation/testing.rs
+++ b/crates/sparrow-runtime/src/execute/operation/testing.rs
@@ -190,6 +190,7 @@ pub(super) async fn run_operation(
         output_at_time: None,
         bounded_lateness_ns: None,
         materialize: false,
+        udfs: HashMap::new(),
     };
     executor
         .execute(
@@ -199,7 +200,6 @@ pub(super) async fn run_operation(
             max_event_tx,
             &Default::default(),
             None,
-            &HashMap::new(),
         )
         .await
         .unwrap()
@@ -248,6 +248,7 @@ pub(super) async fn run_operation_json(
         output_at_time: None,
         bounded_lateness_ns: None,
         materialize: false,
+        udfs: HashMap::new(),
     };
     executor
         .execute(
@@ -257,7 +258,6 @@ pub(super) async fn run_operation_json(
             max_event_tx,
             &Default::default(),
             None,
-            &HashMap::new(),
         )
         .await
         .unwrap()
diff --git a/crates/sparrow-session/src/session.rs b/crates/sparrow-session/src/session.rs
index e1f1cb908..d7edbd738 100644
--- a/crates/sparrow-session/src/session.rs
+++ b/crates/sparrow-session/src/session.rs
@@ -423,7 +423,7 @@ impl Session {
                 data_context,
                 options,
                 Some(key_hash_inverse),
-                &self.udfs,
+                self.udfs.clone(),
             ))
             .change_context(Error::Execute)?
             .map_err(|e| e.change_context(Error::Execute))
diff --git a/python/.vscode/settings.json b/python/.vscode/settings.json
new file mode 100644
index 000000000..23fd35f0e
--- /dev/null
+++ b/python/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "editor.formatOnSave": true
+}
\ No newline at end of file
diff --git a/python/pysrc/kaskada/__init__.py b/python/pysrc/kaskada/__init__.py
index b4d5db186..bc5121cc2 100644
--- a/python/pysrc/kaskada/__init__.py
+++ b/python/pysrc/kaskada/__init__.py
@@ -10,7 +10,6 @@
 from ._timestream import Literal
 from ._timestream import Timestream
 from ._timestream import record
-from .udf import Udf
 from .udf import udf
 
 
@@ -23,7 +22,6 @@
     "Result",
     "sources",
     "Timestream",
-    "Udf",
     "udf",
     "windows",
 ]
diff --git a/python/pysrc/kaskada/udf.py b/python/pysrc/kaskada/udf.py
index 8d7058257..15f06d8f0 100644
--- a/python/pysrc/kaskada/udf.py
+++ b/python/pysrc/kaskada/udf.py
@@ -21,7 +21,6 @@ class Udf:
 
     def __init__(self, signature: str, func: Callable[..., pa.Array]) -> None:
         """Create a UDF with the given signature."""
-        functools.update_wrapper(self, func)
         self._ffi = _ffi.Udf(signature, func)
 
     def __call__(self, *args: Timestream | Literal) -> Timestream:
@@ -32,31 +31,35 @@ def __call__(self, *args: Timestream | Literal) -> Timestream:
 def udf(signature: str):
     """Decorate a function for use as a Kaskada UDF."""
 
-    def decorator(func_type: FuncType):
+    def decorator(user_func: FuncType):
         # 1. Convert the `FuncType` to the type expected by Udf.
         # This needs to take the PyArrow result type and PyArrow inputs,
         # convert them to Pandas, call the function, and convert the result
         # back to PyArrow.
         def func(result_type, *args):
-            return _converted_func(func_type, result_type, *args)
+            return _converted_func(user_func, result_type, *args)
 
         # 2. Create the UDF object.
-        return Udf(signature, func)
+        udf = Udf(signature, func)
+
+        # 3. Update the udf with the user function's documentation
+        functools.update_wrapper(udf, user_func)
+
+        return udf
 
     return decorator
 
 
 def _converted_func(
-    func_type: FuncType, result_type: pa.DataType, *args: pa.Array
+    user_func: FuncType, result_type: pa.DataType, *args: pa.Array
 ) -> pa.Array:
     """Run the function producing the given result type."""
     # TODO: I believe this will return a series for simple arrays, and a
     # dataframe for struct arrays. We should explore how this handles
     # different types.
     pd_args = [arg.to_pandas() for arg in args]
-    pd_result = func_type(*pd_args)
+    pd_result = user_func(*pd_args)
 
-    print("Result type: ", result_type)
     if isinstance(pd_result, pd.Series):
         return pa.Array.from_pandas(pd_result, type=result_type)
     else:
diff --git a/python/pytests/udf_test.py b/python/pytests/udf_test.py
index f05b4e4b1..e9bbd788b 100644
--- a/python/pytests/udf_test.py
+++ b/python/pytests/udf_test.py
@@ -2,6 +2,7 @@
 import kaskada._ffi as _ffi
 import pandas as pd
 import pytest
+from kaskada.udf import Udf
 
 
 @kd.udf("add<N: number>(x: N, y: N) -> N")
@@ -16,16 +17,15 @@ def add_x2(x: pd.Series, y: pd.Series) -> pd.Series:
     return (x + y) * 2
 
 
-@pytest.mark.skip(reason="currently not working")
-def test_docstring() -> None:
-    assert add.__doc__ == "Use Pandas to add two numbers."
-
-
 def test_udf_instance() -> None:
-    assert isinstance(add, kd.Udf)
+    assert isinstance(add, Udf)
     assert isinstance(add._ffi, _ffi.Udf)
 
 
+def test_docstring() -> None:
+    assert add.__doc__ == "Use Pandas to add two numbers."
+
+
 @pytest.fixture
 def source_int64() -> kd.sources.CsvString:
     content = "\n".join(