Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python, rust!): Rename write_csv parameter quote to quote_char #11583

Merged
merged 11 commits into from
Oct 9, 2023
6 changes: 3 additions & 3 deletions crates/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ impl LogicalPlanBuilder {
#[cfg(feature = "csv")]
pub fn scan_csv<P: Into<std::path::PathBuf>>(
path: P,
delimiter: u8,
separator: u8,
has_header: bool,
ignore_errors: bool,
mut skip_rows: usize,
Expand Down Expand Up @@ -314,7 +314,7 @@ impl LogicalPlanBuilder {
// this needs a way to estimated bytes/rows.
let (mut inferred_schema, rows_read, bytes_read) = infer_file_schema(
&reader_bytes,
delimiter,
separator,
infer_schema_length,
has_header,
schema_overwrite,
Expand Down Expand Up @@ -368,7 +368,7 @@ impl LogicalPlanBuilder {
scan_type: FileScan::Csv {
options: CsvParserOptions {
has_header,
delimiter,
delimiter: separator,
ignore_errors,
skip_rows,
low_memory,
Expand Down
28 changes: 14 additions & 14 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from polars.exceptions import NoRowsReturnedError, TooManyRowsReturnedError
from polars.functions import col, lit
from polars.io._utils import _is_glob_pattern, _is_local_file
from polars.io.csv._utils import _check_arg_is_1byte
from polars.io.spreadsheet._write_utils import (
_unpack_multi_column_dict,
_xl_apply_conditional_formats,
Expand Down Expand Up @@ -657,7 +658,7 @@ def _read_csv(
columns: Sequence[int] | Sequence[str] | None = None,
separator: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: None | (SchemaDict | Sequence[PolarsDataType]) = None,
schema: None | SchemaDict = None,
Expand Down Expand Up @@ -775,7 +776,7 @@ def _read_csv(
n_rows,
skip_rows,
projection,
separator,
ord(separator),
rechunk,
columns,
encoding,
Expand All @@ -784,15 +785,15 @@ def _read_csv(
dtype_list,
dtype_slice,
low_memory,
comment_char,
quote_char,
ord(comment_char) if comment_char else None,
ord(quote_char) if quote_char else None,
processed_null_values,
missing_utf8_is_empty_string,
try_parse_dates,
skip_rows_after_header,
_prepare_row_count_args(row_count_name, row_count_offset),
sample_size=sample_size,
eol_char=eol_char,
eol_char=ord(eol_char),
raise_if_empty=raise_if_empty,
truncate_ragged_lines=truncate_ragged_lines,
schema=schema,
Expand Down Expand Up @@ -2443,7 +2444,7 @@ def write_csv(
has_header: bool = ...,
separator: str = ...,
line_terminator: str = ...,
quote: str = ...,
quote_char: str = ...,
batch_size: int = ...,
datetime_format: str | None = ...,
date_format: str | None = ...,
Expand All @@ -2462,7 +2463,7 @@ def write_csv(
has_header: bool = ...,
separator: str = ...,
line_terminator: str = ...,
quote: str = ...,
quote_char: str = ...,
batch_size: int = ...,
datetime_format: str | None = ...,
date_format: str | None = ...,
Expand All @@ -2473,14 +2474,15 @@ def write_csv(
) -> None:
...

@deprecate_renamed_parameter("quote", "quote_char", version="0.19.7")
svaningelgem marked this conversation as resolved.
Show resolved Hide resolved
def write_csv(
self,
file: BytesIO | TextIOWrapper | str | Path | None = None,
*,
has_header: bool = True,
separator: str = ",",
line_terminator: str = "\n",
quote: str = '"',
quote_char: str = '"',
batch_size: int = 1024,
datetime_format: str | None = None,
date_format: str | None = None,
Expand All @@ -2503,7 +2505,7 @@ def write_csv(
Separate CSV fields with this symbol.
line_terminator
String used to end each row.
quote
quote_char
Byte to use as quoting character.
batch_size
Number of rows that will be processed per thread.
Expand Down Expand Up @@ -2558,10 +2560,8 @@ def write_csv(
>>> df.write_csv(path, separator=",")

"""
if len(separator) != 1:
raise ValueError("only single byte separator is allowed")
if len(quote) != 1:
raise ValueError("only single byte quote char is allowed")
_check_arg_is_1byte("separator", separator, can_be_empty=False)
_check_arg_is_1byte("quote_char", quote_char, can_be_empty=True)
if not null_value:
null_value = None

Expand All @@ -2579,7 +2579,7 @@ def write_csv(
has_header,
ord(separator),
line_terminator,
ord(quote),
ord(quote_char),
batch_size,
datetime_format,
date_format,
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/csv/batched_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
columns: Sequence[int] | Sequence[str] | None = None,
separator: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: None | (SchemaDict | Sequence[PolarsDataType]) = None,
null_values: str | Sequence[str] | dict[str, str] | None = None,
Expand Down
28 changes: 18 additions & 10 deletions py-polars/polars/io/csv/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def read_csv(
new_columns: Sequence[str] | None = None,
separator: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
schema: SchemaDict | None = None,
Expand All @@ -50,7 +50,7 @@ def read_csv(
raise_if_empty: bool = True,
truncate_ragged_lines: bool = False,
) -> DataFrame:
"""
r"""
Read a CSV file into a DataFrame.

Parameters
Expand Down Expand Up @@ -159,7 +159,9 @@ def read_csv(
Set the sample size. This is used to sample statistics to estimate the
allocation needed.
eol_char
Single byte end of line character.
Single byte end of line character (default: `\n`). When encountering a file
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
`\r` will be removed when processed.
raise_if_empty
When there is no data in the source,``NoDataError`` is raised. If this parameter
is set to False, an empty DataFrame (with no columns) is returned instead.
Expand Down Expand Up @@ -404,7 +406,7 @@ def read_csv_batched(
new_columns: Sequence[str] | None = None,
separator: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | None = None,
null_values: str | Sequence[str] | dict[str, str] | None = None,
Expand All @@ -425,7 +427,7 @@ def read_csv_batched(
eol_char: str = "\n",
raise_if_empty: bool = True,
) -> BatchedCsvReader:
"""
r"""
Read a CSV file in batches.

Upon creation of the ``BatchedCsvReader``, Polars will gather statistics and
Expand Down Expand Up @@ -517,7 +519,9 @@ def read_csv_batched(
Set the sample size. This is used to sample statistics to estimate the
allocation needed.
eol_char
Single byte end of line character.
Single byte end of line character (default: `\n`). When encountering a file
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
`\r` will be removed when processed.
raise_if_empty
When there is no data in the source,``NoDataError`` is raised. If this parameter
is set to False, ``None`` will be returned from ``next_batches(n)`` instead.
Expand All @@ -533,7 +537,9 @@ def read_csv_batched(
Examples
--------
>>> reader = pl.read_csv_batched(
... "./tpch/tables_scale_100/lineitem.tbl", separator="|", try_parse_dates=True
... "./tpch/tables_scale_100/lineitem.tbl",
... separator="|",
... try_parse_dates=True,
... ) # doctest: +SKIP
>>> batches = reader.next_batches(5) # doctest: +SKIP
>>> for df in batches: # doctest: +SKIP
Expand Down Expand Up @@ -694,7 +700,7 @@ def scan_csv(
has_header: bool = True,
separator: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: SchemaDict | Sequence[PolarsDataType] | None = None,
schema: SchemaDict | None = None,
Expand All @@ -717,7 +723,7 @@ def scan_csv(
raise_if_empty: bool = True,
truncate_ragged_lines: bool = False,
) -> LazyFrame:
"""
r"""
Lazily read from a CSV file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and
Expand Down Expand Up @@ -796,7 +802,9 @@ def scan_csv(
can be inferred, as well as a handful of others. If this does not succeed,
the column remains of data type ``pl.Utf8``.
eol_char
Single byte end of line character
Single byte end of line character (default: `\n`). When encountering a file
with windows line endings (`\r\n`), one can go with the default `\n`. The extra
`\r` will be removed when processed.
new_columns
Provide an explicit list of string column names to use (for example, when
scanning a headerless CSV file). If the given list is shorter than the width of
Expand Down
27 changes: 14 additions & 13 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)
from polars.dependencies import dataframe_api_compat, subprocess
from polars.io._utils import _is_local_file, _is_supported_cloud
from polars.io.csv._utils import _check_arg_is_1byte
from polars.io.ipc.anonymous_scan import _scan_ipc_fsspec
from polars.io.parquet.anonymous_scan import _scan_parquet_fsspec
from polars.lazyframe.group_by import LazyGroupBy
Expand Down Expand Up @@ -316,7 +317,7 @@ def _scan_csv(
has_header: bool = True,
separator: str = ",",
comment_char: str | None = None,
quote_char: str | None = r'"',
quote_char: str | None = '"',
skip_rows: int = 0,
dtypes: SchemaDict | None = None,
schema: SchemaDict | None = None,
Expand Down Expand Up @@ -358,16 +359,16 @@ def _scan_csv(
self = cls.__new__(cls)
self._ldf = PyLazyFrame.new_from_csv(
source,
separator,
ord(separator),
has_header,
ignore_errors,
skip_rows,
n_rows,
cache,
dtype_list,
low_memory,
comment_char,
quote_char,
ord(comment_char) if comment_char else None,
ord(quote_char) if quote_char else None,
processed_null_values,
missing_utf8_is_empty_string,
infer_schema_length,
Expand All @@ -377,7 +378,7 @@ def _scan_csv(
encoding,
_prepare_row_count_args(row_count_name, row_count_offset),
try_parse_dates,
eol_char=eol_char,
eol_char=ord(eol_char),
raise_if_empty=raise_if_empty,
truncate_ragged_lines=truncate_ragged_lines,
schema=schema,
Expand Down Expand Up @@ -2026,14 +2027,15 @@ def sink_ipc(
maintain_order=maintain_order,
)

@deprecate_renamed_parameter("quote", "quote_char", version="0.19.7")
def sink_csv(
self,
path: str | Path,
*,
has_header: bool = True,
separator: str = ",",
line_terminator: str = "\n",
quote: str = '"',
quote_char: str = '"',
batch_size: int = 1024,
datetime_format: str | None = None,
date_format: str | None = None,
Expand Down Expand Up @@ -2064,7 +2066,7 @@ def sink_csv(
Separate CSV fields with this symbol.
line_terminator
String used to end each row.
quote
quote_char
Byte to use as quoting character.
batch_size
Number of rows that will be processed per thread.
Expand Down Expand Up @@ -2097,7 +2099,8 @@ def sink_csv(
This is the default.
- always: This puts quotes around every field. Always.
- never: This never puts quotes around fields, even if that results in
invalid CSV data (e.g.: by not quoting strings containing the separator).
invalid CSV data (e.g.: by not quoting strings containing the
separator).
- non_numeric: This puts quotes around all fields that are non-numeric.
Namely, when writing a field that does not parse as a valid float
or integer, then quotes will be used even if they aren`t strictly
Expand Down Expand Up @@ -2128,10 +2131,8 @@ def sink_csv(
>>> lf.sink_csv("out.csv") # doctest: +SKIP

"""
if len(separator) != 1:
raise ValueError("only single byte separator is allowed")
if len(quote) != 1:
raise ValueError("only single byte quote char is allowed")
_check_arg_is_1byte("separator", separator, can_be_empty=False)
_check_arg_is_1byte("quote_char", quote_char, can_be_empty=False)
svaningelgem marked this conversation as resolved.
Show resolved Hide resolved
if not null_value:
null_value = None

Expand All @@ -2149,7 +2150,7 @@ def sink_csv(
has_header=has_header,
separator=ord(separator),
line_terminator=line_terminator,
quote=ord(quote),
quote_char=ord(quote_char),
batch_size=batch_size,
datetime_format=datetime_format,
date_format=date_format,
Expand Down
Loading