Refine filter_df logic

* Rename filter argument to to_keep * Make target dataframe positional-only * Revise docstring
emdgroup · Dec 20, 2024 · a6cc05a · a6cc05a
1 parent 3d1c0eb
commit a6cc05a
Showing 1 changed file with 7 additions and 5 deletions.
diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py
@@ -607,7 +607,7 @@ def get_transform_objects(
 
 
 def filter_df(
-    df: pd.DataFrame, filter: pd.DataFrame, complement: bool = False
+    df: pd.DataFrame, /, to_keep: pd.DataFrame, complement: bool = False
 ) -> pd.DataFrame:
     """Filter a dataframe based on a second dataframe defining filtering conditions.
 
@@ -616,9 +616,11 @@ def filter_df(
 
     Args:
         df: The dataframe to be filtered.
-        filter: The dataframe defining the filtering conditions.
+        to_keep: The dataframe defining the filtering conditions. By default
+            (see ``complement`` argument), it defines the rows to be kept in the sense
+            of an inner join.
         complement: If ``False``, the filter dataframe determines the rows to be kept
-            (i.e. selection via regular join). If ``True``, the filtering mechanism is
+            (i.e. selection via inner join). If ``True``, the filtering mechanism is
             inverted so that the complement set of rows is kept (i.e. selection
             via anti-join).
 
@@ -661,15 +663,15 @@ def filter_df(
 
     """
     # Handle special case of empty filter
-    if filter.empty:
+    if to_keep.empty:
         return df if complement else pd.DataFrame(columns=df.columns)
 
     # Remember original index name
     index_name = df.index.name
 
     # Identify rows to be dropped
     out = pd.merge(
-        df.reset_index(names="_df_index"), filter, how="left", indicator=True
+        df.reset_index(names="_df_index"), to_keep, how="left", indicator=True
     ).set_index("_df_index")
     to_drop = out["_merge"] == ("both" if complement else "left_only")