feat(python): Add 'drop_empty_rows' parameter for read_ods (#19202)

pola-rs · Oct 12, 2024 · 48f6e9d · 48f6e9d
1 parent fc970f7
commit 48f6e9d
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 11 deletions.
diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py
@@ -55,8 +55,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
-    raise_if_empty: bool = ...,
     drop_empty_rows: bool = ...,
+    raise_if_empty: bool = ...,
 ) -> pl.DataFrame: ...
 
 
@@ -73,8 +73,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
-    raise_if_empty: bool = ...,
     drop_empty_rows: bool = ...,
+    raise_if_empty: bool = ...,
 ) -> pl.DataFrame: ...
 
 
@@ -91,8 +91,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
-    raise_if_empty: bool = ...,
     drop_empty_rows: bool = ...,
+    raise_if_empty: bool = ...,
 ) -> NoReturn: ...
 
 
@@ -111,8 +111,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
-    raise_if_empty: bool = ...,
     drop_empty_rows: bool = ...,
+    raise_if_empty: bool = ...,
 ) -> dict[str, pl.DataFrame]: ...
 
 
@@ -129,8 +129,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
-    raise_if_empty: bool = ...,
     drop_empty_rows: bool = ...,
+    raise_if_empty: bool = ...,
 ) -> pl.DataFrame: ...
 
 
@@ -147,8 +147,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
-    raise_if_empty: bool = ...,
     drop_empty_rows: bool = ...,
+    raise_if_empty: bool = ...,
 ) -> dict[str, pl.DataFrame]: ...
 
 
@@ -166,8 +166,8 @@ def read_excel(
     columns: Sequence[int] | Sequence[str] | None = None,
     schema_overrides: SchemaDict | None = None,
     infer_schema_length: int | None = N_INFER_DEFAULT,
-    raise_if_empty: bool = True,
     drop_empty_rows: bool = True,
+    raise_if_empty: bool = True,
 ) -> pl.DataFrame | dict[str, pl.DataFrame]:
     """
     Read Excel spreadsheet data into a DataFrame.
@@ -236,12 +236,11 @@ def read_excel(
         entire dataset is scanned to determine the dtypes, which can slow parsing for
         large workbooks. Note that only the "calamine" and "xlsx2csv" engines support
         this parameter.
+    drop_empty_rows
+        Indicate whether to omit empty rows when reading data into the DataFrame.
     raise_if_empty
         When there is no data in the sheet,`NoDataError` is raised. If this parameter
         is set to False, an empty DataFrame (with no columns) is returned instead.
-    drop_empty_rows
-        A boolean flag whether to drop empty rows or not from the dataframe. Default
-        is True.
 
     Returns
     -------
@@ -323,6 +322,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
+    drop_empty_rows: bool = ...,
     raise_if_empty: bool = ...,
 ) -> pl.DataFrame: ...
 
@@ -337,6 +337,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
+    drop_empty_rows: bool = ...,
     raise_if_empty: bool = ...,
 ) -> pl.DataFrame: ...
 
@@ -351,6 +352,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
+    drop_empty_rows: bool = ...,
     raise_if_empty: bool = ...,
 ) -> NoReturn: ...
 
@@ -365,6 +367,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
+    drop_empty_rows: bool = ...,
     raise_if_empty: bool = ...,
 ) -> dict[str, pl.DataFrame]: ...
 
@@ -379,6 +382,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
+    drop_empty_rows: bool = ...,
     raise_if_empty: bool = ...,
 ) -> pl.DataFrame: ...
 
@@ -393,6 +397,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = ...,
     schema_overrides: SchemaDict | None = ...,
     infer_schema_length: int | None = ...,
+    drop_empty_rows: bool = ...,
     raise_if_empty: bool = ...,
 ) -> dict[str, pl.DataFrame]: ...
 
@@ -406,6 +411,7 @@ def read_ods(
     columns: Sequence[int] | Sequence[str] | None = None,
     schema_overrides: SchemaDict | None = None,
     infer_schema_length: int | None = N_INFER_DEFAULT,
+    drop_empty_rows: bool = True,
     raise_if_empty: bool = True,
 ) -> pl.DataFrame | dict[str, pl.DataFrame]:
     """
@@ -440,6 +446,8 @@ def read_ods(
         The maximum number of rows to scan for schema inference. If set to `None`, the
         entire dataset is scanned to determine the dtypes, which can slow parsing for
         large workbooks.
+    drop_empty_rows
+        Indicate whether to omit empty rows when reading data into the DataFrame.
     raise_if_empty
         When there is no data in the sheet,`NoDataError` is raised. If this parameter
         is set to False, an empty DataFrame (with no columns) is returned instead.
@@ -481,6 +489,7 @@ def read_ods(
         schema_overrides=schema_overrides,
         infer_schema_length=infer_schema_length,
         raise_if_empty=raise_if_empty,
+        drop_empty_rows=drop_empty_rows,
         has_header=has_header,
         columns=columns,
     )

diff --git a/py-polars/tests/unit/io/test_spreadsheet.py b/py-polars/tests/unit/io/test_spreadsheet.py
@@ -83,7 +83,7 @@ def path_ods_mixed(io_files_path: Path) -> Path:
     return io_files_path / "mixed.ods"
 
 
-@pytest.fixture()
+@pytest.fixture
 def path_empty_rows_excel(io_files_path: Path) -> Path:
     return io_files_path / "test_empty_rows.xlsx"