From 951b0304a232fc80eaea0a3f60d338c0480563c4 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 4 Oct 2024 14:18:16 +0000 Subject: [PATCH 1/4] refactor(python): Expose IEJoin IR node to python --- crates/polars-python/src/lazyframe/visit.rs | 2 +- .../src/lazyframe/visitor/expr_nodes.rs | 15 ++++ .../src/lazyframe/visitor/nodes.rs | 80 ++++++++++++++----- py-polars/src/lib.rs | 1 + 4 files changed, 77 insertions(+), 21 deletions(-) diff --git a/crates/polars-python/src/lazyframe/visit.rs b/crates/polars-python/src/lazyframe/visit.rs index 726b5e7debd4..32c8d3d23b7d 100644 --- a/crates/polars-python/src/lazyframe/visit.rs +++ b/crates/polars-python/src/lazyframe/visit.rs @@ -57,7 +57,7 @@ impl NodeTraverser { // Increment major on breaking changes to the IR (e.g. renaming // fields, reordering tuples), minor on backwards compatible // changes (e.g. exposing a new expression node). - const VERSION: Version = (2, 1); + const VERSION: Version = (2, 2); pub fn new(root: Node, lp_arena: Arena, expr_arena: Arena) -> Self { Self { diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index a05ea6891e15..e8832e9b5488 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -1,4 +1,6 @@ use polars::datatypes::TimeUnit; +#[cfg(feature = "iejoin")] +use polars::prelude::InequalityOperator; use polars::series::ops::NullBehavior; use polars_core::prelude::{NonExistent, QuantileInterpolOptions}; use polars_core::series::IsSorted; @@ -114,6 +116,19 @@ impl IntoPy for Wrap { } } +#[cfg(feature = "iejoin")] +impl IntoPy for Wrap { + fn into_py(self, py: Python<'_>) -> PyObject { + match self.0 { + InequalityOperator::Lt => PyOperator::Lt, + InequalityOperator::LtEq => PyOperator::LtEq, + InequalityOperator::Gt => PyOperator::Gt, + InequalityOperator::GtEq => PyOperator::GtEq, + } + .into_py(py) + } +} + #[pyclass(name = "StringFunction")] #[derive(Copy, Clone)] pub enum PyStringFunction { diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index d8dbb71281bc..47137dbc3193 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -9,7 +9,7 @@ use pyo3::prelude::*; use super::expr_nodes::PyGroupbyOptions; use crate::lazyframe::visit::PyExprIR; -use crate::PyDataFrame; +use crate::{PyDataFrame, Wrap}; #[pyclass] /// Scan a table with an optional predicate from a python function @@ -191,6 +191,21 @@ pub struct Join { options: PyObject, } +#[pyclass] +/// IEJoin operation +pub struct IEJoin { + #[pyo3(get)] + input_left: usize, + #[pyo3(get)] + input_right: usize, + #[pyo3(get)] + left_on: Vec, + #[pyo3(get)] + right_on: Vec, + #[pyo3(get)] + options: PyObject, +} + #[pyclass] /// Adding columns to the table without a Join pub struct HStack { @@ -470,26 +485,51 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { input_right: input_right.0, left_on: left_on.iter().map(|e| e.into()).collect(), right_on: right_on.iter().map(|e| e.into()).collect(), - options: ( - match options.args.how { - JoinType::Left => "left", - JoinType::Right => "right", - JoinType::Inner => "inner", - JoinType::Full => "full", - #[cfg(feature = "asof_join")] - JoinType::AsOf(_) => return Err(PyNotImplementedError::new_err("asof join")), - JoinType::Cross => "cross", - JoinType::Semi => "leftsemi", - JoinType::Anti => "leftanti", + options: { + let how = &options.args.how; + if how.is_ie() { #[cfg(feature = "iejoin")] - JoinType::IEJoin(_) => return Err(PyNotImplementedError::new_err("IEJoin")), - }, - options.args.join_nulls, - options.args.slice, - options.args.suffix.as_deref(), - options.args.coalesce.coalesce(&options.args.how), - ) - .to_object(py), + if let JoinType::IEJoin(ie_options) = how { + ( + "inequality", + options.args.join_nulls, + options.args.slice, + options.args.suffix.as_deref(), + options.args.coalesce.coalesce(&options.args.how), + Wrap(ie_options.operator1).into_py(py), + ie_options + .operator2 + .as_ref() + .map_or_else(|| py.None(), |op| Wrap(*op).into_py(py)), + ) + .to_object(py) + } else { + unreachable!() + } + } else { + ( + match how { + JoinType::Left => "left", + JoinType::Right => "right", + JoinType::Inner => "inner", + JoinType::Full => "full", + #[cfg(feature = "asof_join")] + JoinType::AsOf(_) => { + return Err(PyNotImplementedError::new_err("asof join")) + }, + JoinType::Cross => "cross", + JoinType::Semi => "leftsemi", + JoinType::Anti => "leftanti", + _ => unreachable!(), + }, + options.args.join_nulls, + options.args.slice, + options.args.suffix.as_deref(), + options.args.coalesce.coalesce(how), + ) + .to_object(py) + } + }, } .into_py(py), IR::HStack { diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index 1c645738102a..78b7b0eb23af 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -38,6 +38,7 @@ fn _ir_nodes(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::().unwrap(); m.add_class::().unwrap(); m.add_class::().unwrap(); + m.add_class::().unwrap(); m.add_class::().unwrap(); m.add_class::().unwrap(); m.add_class::().unwrap(); From 8fac5bbced1b5a59958d946ee0199fdd74932edd Mon Sep 17 00:00:00 2001 From: ritchie Date: Mon, 7 Oct 2024 11:02:15 +0200 Subject: [PATCH 2/4] object --- .../src/lazyframe/visitor/nodes.rs | 65 ++++++++----------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index 47137dbc3193..2c4000edf96a 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -9,7 +9,7 @@ use pyo3::prelude::*; use super::expr_nodes::PyGroupbyOptions; use crate::lazyframe::visit::PyExprIR; -use crate::{PyDataFrame, Wrap}; +use crate::PyDataFrame; #[pyclass] /// Scan a table with an optional predicate from a python function @@ -487,48 +487,37 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { right_on: right_on.iter().map(|e| e.into()).collect(), options: { let how = &options.args.how; - if how.is_ie() { - #[cfg(feature = "iejoin")] - if let JoinType::IEJoin(ie_options) = how { - ( - "inequality", - options.args.join_nulls, - options.args.slice, - options.args.suffix.as_deref(), - options.args.coalesce.coalesce(&options.args.how), - Wrap(ie_options.operator1).into_py(py), + + ( + match how { + JoinType::Left => "left".to_object(py), + JoinType::Right => "right".to_object(py), + JoinType::Inner => "inner".to_object(py), + JoinType::Full => "full".to_object(py), + #[cfg(feature = "asof_join")] + JoinType::AsOf(_) => { + return Err(PyNotImplementedError::new_err("asof join")) + }, + JoinType::Cross => "cross".to_object(py), + JoinType::Semi => "leftsemi".to_object(py), + JoinType::Anti => "leftanti".to_object(py), + #[cfg(feature = "iejoin")] + JoinType::IEJoin(ie_options) => ( + "inequality".to_object(py), + crate::Wrap(ie_options.operator1).into_py(py), ie_options .operator2 .as_ref() .map_or_else(|| py.None(), |op| Wrap(*op).into_py(py)), ) - .to_object(py) - } else { - unreachable!() - } - } else { - ( - match how { - JoinType::Left => "left", - JoinType::Right => "right", - JoinType::Inner => "inner", - JoinType::Full => "full", - #[cfg(feature = "asof_join")] - JoinType::AsOf(_) => { - return Err(PyNotImplementedError::new_err("asof join")) - }, - JoinType::Cross => "cross", - JoinType::Semi => "leftsemi", - JoinType::Anti => "leftanti", - _ => unreachable!(), - }, - options.args.join_nulls, - options.args.slice, - options.args.suffix.as_deref(), - options.args.coalesce.coalesce(how), - ) - .to_object(py) - } + .into_py(py), + }, + options.args.join_nulls, + options.args.slice, + options.args.suffix.as_deref(), + options.args.coalesce.coalesce(how), + ) + .to_object(py) }, } .into_py(py), From 1bc6cef80b0acb931a12ec849fcaa6061d95dbb3 Mon Sep 17 00:00:00 2001 From: ritchie Date: Mon, 7 Oct 2024 11:09:10 +0200 Subject: [PATCH 3/4] fix --- crates/polars-python/src/lazyframe/visitor/nodes.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index 2c4000edf96a..330017083195 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -508,7 +508,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { ie_options .operator2 .as_ref() - .map_or_else(|| py.None(), |op| Wrap(*op).into_py(py)), + .map_or_else(|| py.None(), |op| crate::Wrap(*op).into_py(py)), ) .into_py(py), }, From c01908ad25419e9821437cbd5374ef61a0ca0a8d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Mon, 7 Oct 2024 10:11:32 +0000 Subject: [PATCH 4/4] Remove unnecessary struct We now don't use this one. --- .../polars-python/src/lazyframe/visitor/nodes.rs | 15 --------------- py-polars/src/lib.rs | 1 - 2 files changed, 16 deletions(-) diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index 330017083195..ae805e7d0ff0 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -191,21 +191,6 @@ pub struct Join { options: PyObject, } -#[pyclass] -/// IEJoin operation -pub struct IEJoin { - #[pyo3(get)] - input_left: usize, - #[pyo3(get)] - input_right: usize, - #[pyo3(get)] - left_on: Vec, - #[pyo3(get)] - right_on: Vec, - #[pyo3(get)] - options: PyObject, -} - #[pyclass] /// Adding columns to the table without a Join pub struct HStack { diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index 78b7b0eb23af..1c645738102a 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -38,7 +38,6 @@ fn _ir_nodes(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::().unwrap(); m.add_class::().unwrap(); m.add_class::().unwrap(); - m.add_class::().unwrap(); m.add_class::().unwrap(); m.add_class::().unwrap(); m.add_class::().unwrap();