From 2b8f7658d584dea651a823d04dfd373a500781fe Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 13 Aug 2024 12:48:09 +0200 Subject: [PATCH] Fix result index of merge (#1121) --- dask_expr/_collection.py | 6 +++++- dask_expr/tests/test_merge.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index 7d9bce24..af2fe2cd 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -5703,7 +5703,7 @@ def merge( if left_on and right_on: warn_dtype_mismatch(left, right, left_on, right_on) - return new_collection( + result = new_collection( Merge( left, right, @@ -5719,6 +5719,10 @@ def merge( broadcast=broadcast, ) ) + if left._meta.index.name != right._meta.index.name: + return result.rename_axis(index=result._meta.index.name) + else: + return result @wraps(pd.merge_asof) diff --git a/dask_expr/tests/test_merge.py b/dask_expr/tests/test_merge.py index 7711028a..390eb9b8 100644 --- a/dask_expr/tests/test_merge.py +++ b/dask_expr/tests/test_merge.py @@ -840,6 +840,22 @@ def test_merge_avoid_overeager_filter_pushdown(): assert isinstance(result.expr.frame.frame, Merge) +def test_join_consistent_index_names(): + pdf1 = pd.DataFrame(index=["a", "b", "c"], data=dict(a=[1, 2, 3])) + pdf1.index.name = "test" + df1 = from_pandas(pdf1, 2) + + pdf2 = pd.DataFrame(index=["a", "b", "d"], data=dict(b=[1, 2, 3])) + df2 = from_pandas(pdf2, 2) + result = df1.join(df2, how="outer") + expected = pdf1.join(pdf2, how="outer") + assert_eq(result, expected, check_index=False) + assert result.index.name is None + assert result._meta.index.name is None + assert result.partitions[0].compute().index.name is None + assert expected.index.name is None + + @pytest.mark.parametrize("how", ["left", "inner", "right", "outer"]) def test_isin_filter_pushdown(how): pdf1 = pd.DataFrame(