Skip to content

Commit

Permalink
fix(rust,python): Allow partial name overlap in join_where resolution (
Browse files Browse the repository at this point in the history
wence- authored Oct 8, 2024

Verified

This commit was signed with the committer’s verified signature.
tvdeyen Thomas von Deyen
1 parent 815e31f commit 2a7e165
Showing 3 changed files with 26 additions and 6 deletions.
16 changes: 11 additions & 5 deletions crates/polars-plan/src/plans/conversion/join.rs
Original file line number Diff line number Diff line change
@@ -163,7 +163,6 @@ fn resolve_join_where(
.get(input_right)
.schema(ctxt.lp_arena)
.into_owned();

for e in &predicates {
let no_binary_comparisons = e
.into_iter()
@@ -174,16 +173,23 @@ fn resolve_join_where(
.count();
polars_ensure!(no_binary_comparisons == 1, InvalidOperation: "only 1 binary comparison allowed as join condition");

fn all_in_schema(schema: &Schema, left: &Expr, right: &Expr) -> bool {
fn all_in_schema(
schema: &Schema,
other: Option<&Schema>,
left: &Expr,
right: &Expr,
) -> bool {
let mut iter =
expr_to_leaf_column_names_iter(left).chain(expr_to_leaf_column_names_iter(right));
iter.all(|name| schema.contains(name.as_str()))
iter.all(|name| {
schema.contains(name.as_str()) && other.map_or(true, |s| !s.contains(name.as_str()))
})
}

let valid = e.into_iter().all(|e| match e {
Expr::BinaryExpr { left, op, right } if op.is_comparison() => {
!(all_in_schema(&schema_left, left, right)
|| all_in_schema(&schema_right, left, right))
!(all_in_schema(&schema_left, None, left, right)
|| all_in_schema(&schema_right, Some(&schema_left), left, right))
},
_ => true,
});
2 changes: 1 addition & 1 deletion crates/polars-python/src/lazyframe/visitor/nodes.rs
Original file line number Diff line number Diff line change
@@ -499,7 +499,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult<PyObject> {
},
options.args.join_nulls,
options.args.slice,
options.args.suffix.as_deref(),
options.args.suffix().as_str(),
options.args.coalesce.coalesce(how),
)
.to_object(py)
14 changes: 14 additions & 0 deletions py-polars/tests/unit/operations/test_inequality_join.py
Original file line number Diff line number Diff line change
@@ -594,3 +594,17 @@ def test_join_on_strings() -> None:
"a_right": ["a", "a", "b", "a", "b", "c"],
"b_right": ["b", "b", "b", "b", "b", "b"],
}


def test_join_partial_column_name_overlap_19119() -> None:
left = pl.LazyFrame({"a": [1], "b": [2]})
right = pl.LazyFrame({"a": [2], "d": [0]})

q = left.join_where(right, pl.col("a") > pl.col("d"))

assert q.collect().to_dict(as_series=False) == {
"a": [1],
"b": [2],
"a_right": [2],
"d": [0],
}

0 comments on commit 2a7e165

Please sign in to comment.