Skip to content

Commit

Permalink
feat(python): Add 'drop_empty_rows' parameter for read_ods (#19202)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie authored Oct 12, 2024
1 parent fc970f7 commit 48f6e9d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 11 deletions.
29 changes: 19 additions & 10 deletions py-polars/polars/io/spreadsheet/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
raise_if_empty: bool = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> pl.DataFrame: ...


Expand All @@ -73,8 +73,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
raise_if_empty: bool = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> pl.DataFrame: ...


Expand All @@ -91,8 +91,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
raise_if_empty: bool = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> NoReturn: ...


Expand All @@ -111,8 +111,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
raise_if_empty: bool = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> dict[str, pl.DataFrame]: ...


Expand All @@ -129,8 +129,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
raise_if_empty: bool = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> pl.DataFrame: ...


Expand All @@ -147,8 +147,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
raise_if_empty: bool = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> dict[str, pl.DataFrame]: ...


Expand All @@ -166,8 +166,8 @@ def read_excel(
columns: Sequence[int] | Sequence[str] | None = None,
schema_overrides: SchemaDict | None = None,
infer_schema_length: int | None = N_INFER_DEFAULT,
raise_if_empty: bool = True,
drop_empty_rows: bool = True,
raise_if_empty: bool = True,
) -> pl.DataFrame | dict[str, pl.DataFrame]:
"""
Read Excel spreadsheet data into a DataFrame.
Expand Down Expand Up @@ -236,12 +236,11 @@ def read_excel(
entire dataset is scanned to determine the dtypes, which can slow parsing for
large workbooks. Note that only the "calamine" and "xlsx2csv" engines support
this parameter.
drop_empty_rows
Indicate whether to omit empty rows when reading data into the DataFrame.
raise_if_empty
When there is no data in the sheet,`NoDataError` is raised. If this parameter
is set to False, an empty DataFrame (with no columns) is returned instead.
drop_empty_rows
A boolean flag whether to drop empty rows or not from the dataframe. Default
is True.
Returns
-------
Expand Down Expand Up @@ -323,6 +322,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> pl.DataFrame: ...

Expand All @@ -337,6 +337,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> pl.DataFrame: ...

Expand All @@ -351,6 +352,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> NoReturn: ...

Expand All @@ -365,6 +367,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> dict[str, pl.DataFrame]: ...

Expand All @@ -379,6 +382,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> pl.DataFrame: ...

Expand All @@ -393,6 +397,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = ...,
schema_overrides: SchemaDict | None = ...,
infer_schema_length: int | None = ...,
drop_empty_rows: bool = ...,
raise_if_empty: bool = ...,
) -> dict[str, pl.DataFrame]: ...

Expand All @@ -406,6 +411,7 @@ def read_ods(
columns: Sequence[int] | Sequence[str] | None = None,
schema_overrides: SchemaDict | None = None,
infer_schema_length: int | None = N_INFER_DEFAULT,
drop_empty_rows: bool = True,
raise_if_empty: bool = True,
) -> pl.DataFrame | dict[str, pl.DataFrame]:
"""
Expand Down Expand Up @@ -440,6 +446,8 @@ def read_ods(
The maximum number of rows to scan for schema inference. If set to `None`, the
entire dataset is scanned to determine the dtypes, which can slow parsing for
large workbooks.
drop_empty_rows
Indicate whether to omit empty rows when reading data into the DataFrame.
raise_if_empty
When there is no data in the sheet,`NoDataError` is raised. If this parameter
is set to False, an empty DataFrame (with no columns) is returned instead.
Expand Down Expand Up @@ -481,6 +489,7 @@ def read_ods(
schema_overrides=schema_overrides,
infer_schema_length=infer_schema_length,
raise_if_empty=raise_if_empty,
drop_empty_rows=drop_empty_rows,
has_header=has_header,
columns=columns,
)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/io/test_spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def path_ods_mixed(io_files_path: Path) -> Path:
return io_files_path / "mixed.ods"


@pytest.fixture()
@pytest.fixture
def path_empty_rows_excel(io_files_path: Path) -> Path:
return io_files_path / "test_empty_rows.xlsx"

Expand Down

0 comments on commit 48f6e9d

Please sign in to comment.