Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python, rust!): Rename write_csv parameter quote to quote_char #11583

Merged
merged 11 commits into from
Oct 9, 2023
6 changes: 3 additions & 3 deletions crates/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ impl LogicalPlanBuilder {
#[cfg(feature = "csv")]
pub fn scan_csv<P: Into<std::path::PathBuf>>(
path: P,
delimiter: u8,
delimiter_char: u8,
has_header: bool,
ignore_errors: bool,
mut skip_rows: usize,
Expand Down Expand Up @@ -314,7 +314,7 @@ impl LogicalPlanBuilder {
// this needs a way to estimated bytes/rows.
let (mut inferred_schema, rows_read, bytes_read) = infer_file_schema(
&reader_bytes,
delimiter,
delimiter_char,
infer_schema_length,
has_header,
schema_overwrite,
Expand Down Expand Up @@ -368,7 +368,7 @@ impl LogicalPlanBuilder {
scan_type: FileScan::Csv {
options: CsvParserOptions {
has_header,
delimiter,
delimiter: delimiter_char,
ignore_errors,
skip_rows,
low_memory,
Expand Down
43 changes: 21 additions & 22 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from polars.exceptions import NoRowsReturnedError, TooManyRowsReturnedError
from polars.functions import col, lit
from polars.io._utils import _is_glob_pattern, _is_local_file
from polars.io.csv._utils import _check_arg_is_1byte
from polars.io.spreadsheet._write_utils import (
_unpack_multi_column_dict,
_xl_apply_conditional_formats,
Expand Down Expand Up @@ -655,9 +656,9 @@ def _read_csv(
*,
has_header: bool = True,
columns: Sequence[int] | Sequence[str] | None = None,
separator: str = ",",
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: None | (SchemaDict | Sequence[PolarsDataType]) = None,
schema: None | SchemaDict = None,
Expand Down Expand Up @@ -734,7 +735,7 @@ def _read_csv(
scan = scan_csv(
source,
has_header=has_header,
separator=separator,
delimiter_char=delimiter_char,
comment_char=comment_char,
quote_char=quote_char,
skip_rows=skip_rows,
Expand Down Expand Up @@ -775,7 +776,7 @@ def _read_csv(
n_rows,
skip_rows,
projection,
separator,
ord(delimiter_char),
rechunk,
columns,
encoding,
Expand All @@ -784,15 +785,15 @@ def _read_csv(
dtype_list,
dtype_slice,
low_memory,
comment_char,
quote_char,
ord(comment_char) if comment_char else None,
ord(quote_char) if quote_char else None,
processed_null_values,
missing_utf8_is_empty_string,
try_parse_dates,
skip_rows_after_header,
_prepare_row_count_args(row_count_name, row_count_offset),
sample_size=sample_size,
eol_char=eol_char,
eol_char=ord(eol_char),
raise_if_empty=raise_if_empty,
truncate_ragged_lines=truncate_ragged_lines,
schema=schema,
Expand Down Expand Up @@ -2441,9 +2442,9 @@ def write_csv(
file: None = None,
*,
has_header: bool = ...,
separator: str = ...,
delimiter_char: str = ...,
line_terminator: str = ...,
quote: str = ...,
quote_char: str = ...,
batch_size: int = ...,
datetime_format: str | None = ...,
date_format: str | None = ...,
Expand All @@ -2460,9 +2461,9 @@ def write_csv(
file: BytesIO | TextIOWrapper | str | Path,
*,
has_header: bool = ...,
separator: str = ...,
delimiter_char: str = ...,
line_terminator: str = ...,
quote: str = ...,
quote_char: str = ...,
batch_size: int = ...,
datetime_format: str | None = ...,
date_format: str | None = ...,
Expand All @@ -2478,9 +2479,9 @@ def write_csv(
file: BytesIO | TextIOWrapper | str | Path | None = None,
*,
has_header: bool = True,
separator: str = ",",
delimiter_char: str = ",",
line_terminator: str = "\n",
quote: str = '"',
quote_char: str = '"',
batch_size: int = 1024,
datetime_format: str | None = None,
date_format: str | None = None,
Expand All @@ -2499,11 +2500,11 @@ def write_csv(
(default), the output is returned as a string instead.
has_header
Whether to include header in the CSV output.
separator
delimiter_char
Separate CSV fields with this symbol.
line_terminator
String used to end each row.
quote
quote_char
Byte to use as quoting character.
batch_size
Number of rows that will be processed per thread.
Expand Down Expand Up @@ -2555,13 +2556,11 @@ def write_csv(
... }
... )
>>> path: pathlib.Path = dirpath / "new_file.csv"
>>> df.write_csv(path, separator=",")
>>> df.write_csv(path, delimiter_char=",")

"""
if len(separator) != 1:
raise ValueError("only single byte separator is allowed")
if len(quote) != 1:
raise ValueError("only single byte quote char is allowed")
_check_arg_is_1byte("delimiter_char", delimiter_char, can_be_empty=False)
_check_arg_is_1byte("quote_char", quote_char, can_be_empty=True)
if not null_value:
null_value = None

Expand All @@ -2577,9 +2576,9 @@ def write_csv(
self._df.write_csv(
file,
has_header,
ord(separator),
ord(delimiter_char),
line_terminator,
ord(quote),
ord(quote_char),
batch_size,
datetime_format,
date_format,
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/io/csv/batched_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def __init__(
*,
has_header: bool = True,
columns: Sequence[int] | Sequence[str] | None = None,
separator: str = ",",
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: None | (SchemaDict | Sequence[PolarsDataType]) = None,
null_values: str | Sequence[str] | dict[str, str] | None = None,
Expand Down Expand Up @@ -83,7 +83,7 @@ def __init__(
n_rows=n_rows,
skip_rows=skip_rows,
projection=projection,
separator=separator,
delimiter_char=delimiter_char,
rechunk=rechunk,
columns=columns,
encoding=encoding,
Expand Down Expand Up @@ -123,7 +123,7 @@ def next_batches(self, n: int) -> list[DataFrame] | None:
--------
>>> reader = pl.read_csv_batched(
... "./tpch/tables_scale_100/lineitem.tbl",
... separator="|",
... delimiter_char="|",
... try_parse_dates=True,
... ) # doctest: +SKIP
>>> reader.next_batches(5) # doctest: +SKIP
Expand Down
52 changes: 30 additions & 22 deletions py-polars/polars/io/csv/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def read_csv(
has_header: bool = True,
columns: Sequence[int] | Sequence[str] | None = None,
new_columns: Sequence[str] | None = None,
separator: str = ",",
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
schema: SchemaDict | None = None,
Expand All @@ -50,7 +50,7 @@ def read_csv(
raise_if_empty: bool = True,
truncate_ragged_lines: bool = False,
) -> DataFrame:
"""
r"""
Read a CSV file into a DataFrame.

Parameters
Expand All @@ -72,7 +72,7 @@ def read_csv(
Rename columns right after parsing the CSV file. If the given
list is shorter than the width of the DataFrame the remaining
columns will have their original name.
separator
delimiter_char
Single byte character to use as delimiter in the file.
comment_char
Single byte character that indicates the start of a comment line,
Expand Down Expand Up @@ -159,7 +159,9 @@ def read_csv(
Set the sample size. This is used to sample statistics to estimate the
allocation needed.
eol_char
Single byte end of line character.
Single byte end of line character (default: `\n`). When encountering a file
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
`\r` will be removed when processed.
raise_if_empty
When there is no data in the source,``NoDataError`` is raised. If this parameter
is set to False, an empty DataFrame (with no columns) is returned instead.
Expand All @@ -182,7 +184,7 @@ def read_csv(
an expensive operation.

"""
_check_arg_is_1byte("separator", separator, can_be_empty=False)
_check_arg_is_1byte("delimiter_char", delimiter_char, can_be_empty=False)
_check_arg_is_1byte("comment_char", comment_char, can_be_empty=False)
_check_arg_is_1byte("quote_char", quote_char, can_be_empty=True)
_check_arg_is_1byte("eol_char", eol_char, can_be_empty=False)
Expand Down Expand Up @@ -239,7 +241,7 @@ def read_csv(
encoding=encoding,
),
pa.csv.ParseOptions(
delimiter=separator,
delimiter=delimiter_char,
quote_char=quote_char if quote_char else False,
double_quote=quote_char is not None and quote_char == '"',
),
Expand Down Expand Up @@ -365,7 +367,7 @@ def read_csv(
data,
has_header=has_header,
columns=columns if columns else projection,
separator=separator,
delimiter_char=delimiter_char,
comment_char=comment_char,
quote_char=quote_char,
skip_rows=skip_rows,
Expand Down Expand Up @@ -402,9 +404,9 @@ def read_csv_batched(
has_header: bool = True,
columns: Sequence[int] | Sequence[str] | None = None,
new_columns: Sequence[str] | None = None,
separator: str = ",",
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
null_values: str | Sequence[str] | dict[str, str] | None = None,
Expand All @@ -425,7 +427,7 @@ def read_csv_batched(
eol_char: str = "\n",
raise_if_empty: bool = True,
) -> BatchedCsvReader:
"""
r"""
Read a CSV file in batches.

Upon creation of the ``BatchedCsvReader``, Polars will gather statistics and
Expand All @@ -451,7 +453,7 @@ def read_csv_batched(
Rename columns right after parsing the CSV file. If the given
list is shorter than the width of the DataFrame the remaining
columns will have their original name.
separator
delimiter_char
Single byte character to use as delimiter in the file.
comment_char
Single byte character that indicates the start of a comment line,
Expand Down Expand Up @@ -517,7 +519,9 @@ def read_csv_batched(
Set the sample size. This is used to sample statistics to estimate the
allocation needed.
eol_char
Single byte end of line character.
Single byte end of line character (default: `\n`). When encountering a file
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
`\r` will be removed when processed.
raise_if_empty
When there is no data in the source,``NoDataError`` is raised. If this parameter
is set to False, ``None`` will be returned from ``next_batches(n)`` instead.
Expand All @@ -533,7 +537,9 @@ def read_csv_batched(
Examples
--------
>>> reader = pl.read_csv_batched(
... "./tpch/tables_scale_100/lineitem.tbl", separator="|", try_parse_dates=True
... "./tpch/tables_scale_100/lineitem.tbl",
... delimiter_char="|",
... try_parse_dates=True,
... ) # doctest: +SKIP
>>> batches = reader.next_batches(5) # doctest: +SKIP
>>> for df in batches: # doctest: +SKIP
Expand Down Expand Up @@ -662,7 +668,7 @@ def read_csv_batched(
source,
has_header=has_header,
columns=columns if columns else projection,
separator=separator,
delimiter_char=delimiter_char,
comment_char=comment_char,
quote_char=quote_char,
skip_rows=skip_rows,
Expand Down Expand Up @@ -692,9 +698,9 @@ def scan_csv(
source: str | Path,
*,
has_header: bool = True,
separator: str = ",",
delimiter_char: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: SchemaDict | Sequence[PolarsDataType] | None = None,
schema: SchemaDict | None = None,
Expand All @@ -717,7 +723,7 @@ def scan_csv(
raise_if_empty: bool = True,
truncate_ragged_lines: bool = False,
) -> LazyFrame:
"""
r"""
Lazily read from a CSV file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and
Expand All @@ -733,7 +739,7 @@ def scan_csv(
If set to False, column names will be autogenerated in the
following format: ``column_x``, with ``x`` being an
enumeration over every column in the dataset starting at 1.
separator
delimiter_char
Single byte character to use as delimiter in the file.
comment_char
Single byte character that indicates the start of a comment line,
Expand Down Expand Up @@ -796,7 +802,9 @@ def scan_csv(
can be inferred, as well as a handful of others. If this does not succeed,
the column remains of data type ``pl.Utf8``.
eol_char
Single byte end of line character
Single byte end of line character (default: `\n`). When encountering a file
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
`\r` will be removed when processed.
new_columns
Provide an explicit list of string column names to use (for example, when
scanning a headerless CSV file). If the given list is shorter than the width of
Expand Down Expand Up @@ -891,7 +899,7 @@ def with_column_names(cols: list[str]) -> list[str]:
else:
return new_columns # type: ignore[return-value]

_check_arg_is_1byte("separator", separator, can_be_empty=False)
_check_arg_is_1byte("delimiter_char", delimiter_char, can_be_empty=False)
_check_arg_is_1byte("comment_char", comment_char, can_be_empty=False)
_check_arg_is_1byte("quote_char", quote_char, can_be_empty=True)

Expand All @@ -901,7 +909,7 @@ def with_column_names(cols: list[str]) -> list[str]:
return pl.LazyFrame._scan_csv(
source,
has_header=has_header,
separator=separator,
delimiter_char=delimiter_char,
comment_char=comment_char,
quote_char=quote_char,
skip_rows=skip_rows,
Expand Down
Loading