diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index 154726e6a27e..d0860f3faccd 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -601,11 +601,15 @@ impl LazyFrame { opt_state &= !OptFlags::COMM_SUBPLAN_ELIM; } - // The new streaming engine can't deal with the way the common - // subexpression elimination adds length-incorrect with_columns. #[cfg(feature = "cse")] if new_streaming { + // The new streaming engine can't deal with the way the common + // subexpression elimination adds length-incorrect with_columns. opt_state &= !OptFlags::COMM_SUBEXPR_ELIM; + + // The new streaming engine can't yet deal with the cache nodes + // introduced by common subplan elimination. + opt_state &= !OptFlags::COMM_SUBPLAN_ELIM; } let lp_top = optimize( diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index 9a3b763b5f6f..3bc0649a72ea 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -208,10 +208,14 @@ def test_join() -> None: } ) - joined = df_left.join(df_right, left_on="a", right_on="a").sort("a") + joined = df_left.join( + df_right, left_on="a", right_on="a", maintain_order="left_right" + ).sort("a") assert_series_equal(joined["b"], pl.Series("b", [1, 3, 2, 2])) - joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a") + joined = df_left.join( + df_right, left_on="a", right_on="a", how="left", maintain_order="left_right" + ).sort("a") assert joined["c_right"].is_null().sum() == 1 assert_series_equal(joined["b"], pl.Series("b", [1, 3, 2, 2, 4])) diff --git a/py-polars/tests/unit/sql/test_joins.py b/py-polars/tests/unit/sql/test_joins.py index c423fc4c45f4..8b3b968991a2 100644 --- a/py-polars/tests/unit/sql/test_joins.py +++ b/py-polars/tests/unit/sql/test_joins.py @@ -113,27 +113,36 @@ def test_join_cross_11927() -> None: def test_join_inner(foods_ipc_path: Path, join_clause: str) -> None: foods1 = pl.scan_ipc(foods_ipc_path) foods2 = foods1 # noqa: F841 + schema = foods1.collect_schema() + sort_clause = ", ".join(f'{c} ASC, "{c}:foods2" DESC' for c in schema) out = pl.sql( f""" SELECT * FROM foods1 INNER JOIN foods2 {join_clause} + ORDER BY {sort_clause} LIMIT 2 """, eager=True, ) - assert out.to_dict(as_series=False) == { - "category": ["vegetables", "vegetables"], - "calories": [45, 20], - "fats_g": [0.5, 0.0], - "sugars_g": [2, 2], - "category:foods2": ["vegetables", "vegetables"], - "calories:foods2": [45, 45], - "fats_g:foods2": [0.5, 0.5], - "sugars_g:foods2": [2, 2], - } + assert_frame_equal( + out, + pl.DataFrame( + { + "category": ["fruit", "fruit"], + "calories": [30, 30], + "fats_g": [0.0, 0.0], + "sugars_g": [3, 5], + "category:foods2": ["fruit", "fruit"], + "calories:foods2": [130, 130], + "fats_g:foods2": [0.0, 0.0], + "sugars_g:foods2": [25, 25], + } + ), + check_dtypes=False, + ) @pytest.mark.parametrize( diff --git a/py-polars/tests/unit/test_cse.py b/py-polars/tests/unit/test_cse.py index 06efd675c84f..47e60bdb0fd6 100644 --- a/py-polars/tests/unit/test_cse.py +++ b/py-polars/tests/unit/test_cse.py @@ -647,6 +647,7 @@ def test_cse_and_schema_update_projection_pd() -> None: @pytest.mark.debug +@pytest.mark.may_fail_auto_streaming def test_cse_predicate_self_join(capfd: Any, monkeypatch: Any) -> None: monkeypatch.setenv("POLARS_VERBOSE", "1") y = pl.LazyFrame({"a": [1], "b": [2], "y": [3]})