fix: Avoid failure when index level shares name with a column

Previously, report generation failed for DataFrames where an index level had the same name as a column, resulting in a "ValueError: 'foo' is both an index level and a column label, which is ambiguous." This update removes index names for the relevant groupby operation, ensuring the column is prioritized.
ydataai · Nov 10, 2024 · f39f669 · f39f669
1 parent 920f8df
commit f39f669
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 0 deletions.
diff --git a/src/ydata_profiling/model/pandas/duplicates_pandas.py b/src/ydata_profiling/model/pandas/duplicates_pandas.py
@@ -35,6 +35,7 @@ def pandas_get_duplicates(
             duplicated_rows = df.duplicated(subset=supported_columns, keep=False)
             duplicated_rows = (
                 df[duplicated_rows]
+                .rename_axis(index=lambda _: None)
                 .groupby(supported_columns, dropna=False, observed=True)
                 .size()
                 .reset_index(name=duplicates_key)

diff --git a/tests/unit/test_index_column_name_clash.py b/tests/unit/test_index_column_name_clash.py
@@ -0,0 +1,20 @@
+import pandas as pd
+import pytest
+
+from ydata_profiling import ProfileReport
+
+
+@pytest.fixture()
+def df():
+    df = pd.DataFrame(
+        {
+            "foo": [1, 2, 3],
+        },
+        index=pd.Index([1, 2, 3], name="foo"),
+    )
+    return df
+
+
+def test_index_column_name_clash(df: pd.DataFrame):
+    profile_report = ProfileReport(df, title="Test Report", progress_bar=False)
+    assert len(profile_report.to_html()) > 0