Skip to content

Commit

Permalink
fix(python,rust): fix for write_csv when using non-default "quote" …
Browse files Browse the repository at this point in the history
…char (#11474)
  • Loading branch information
alexander-beedie authored Oct 3, 2023
1 parent 5ca9fa4 commit 5ffc501
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 28 deletions.
52 changes: 26 additions & 26 deletions crates/polars-io/src/csv/write_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,33 @@ use super::write::QuoteStyle;

fn fmt_and_escape_str(f: &mut Vec<u8>, v: &str, options: &SerializeOptions) -> std::io::Result<()> {
if options.quote_style == QuoteStyle::Never {
write!(f, "{v}")
} else if v.is_empty() {
write!(f, "\"\"")
} else {
let needs_escaping = memchr(options.quote, v.as_bytes()).is_some();
if needs_escaping {
let replaced = unsafe {
// Replace from single quote " to double quote "".
v.replace(
std::str::from_utf8_unchecked(&[options.quote]),
std::str::from_utf8_unchecked(&[options.quote, options.quote]),
)
};
return write!(f, "\"{replaced}\"");
}
let surround_with_quotes = match options.quote_style {
QuoteStyle::Always | QuoteStyle::NonNumeric => true,
QuoteStyle::Necessary => memchr2(options.delimiter, b'\n', v.as_bytes()).is_some(),
QuoteStyle::Never => false,
return write!(f, "{v}");
}
let quote = options.quote as char;
if v.is_empty() {
return write!(f, "{quote}{quote}");
}
let needs_escaping = memchr(options.quote, v.as_bytes()).is_some();
if needs_escaping {
let replaced = unsafe {
// Replace from single quote " to double quote "".
v.replace(
std::str::from_utf8_unchecked(&[options.quote]),
std::str::from_utf8_unchecked(&[options.quote, options.quote]),
)
};

let quote = options.quote as char;
if surround_with_quotes {
write!(f, "{quote}{v}{quote}")
} else {
write!(f, "{v}")
}
return write!(f, "{quote}{replaced}{quote}");
}
let surround_with_quotes = match options.quote_style {
QuoteStyle::Always | QuoteStyle::NonNumeric => true,
QuoteStyle::Necessary => memchr2(options.delimiter, b'\n', v.as_bytes()).is_some(),
QuoteStyle::Never => false,
};

if surround_with_quotes {
write!(f, "{quote}{v}{quote}")
} else {
write!(f, "{v}")
}
}

Expand Down
9 changes: 7 additions & 2 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,6 @@ def test_csv_quote_char() -> None:
),
]
)

rolling_stones = textwrap.dedent(
"""\
linenum,last_name,first_name
Expand All @@ -576,14 +575,20 @@ def test_csv_quote_char() -> None:
9,J"o"ne"s,Brian
"""
)

for use_pyarrow in (False, True):
out = pl.read_csv(
rolling_stones.encode(), quote_char=None, use_pyarrow=use_pyarrow
)
assert out.shape == (9, 3)
assert_frame_equal(out, expected)

# non-standard quote char
df = pl.DataFrame({"x": ["", "0*0", "xyz"]})
csv_data = df.write_csv(quote="*")

assert csv_data == "x\n**\n*0**0*\nxyz\n"
assert_frame_equal(df, pl.read_csv(io.StringIO(csv_data), quote_char="*"))


def test_csv_empty_quotes_char_1622() -> None:
pl.read_csv(b"a,b,c,d\nA1,B1,C1,1\nA2,B2,C2,2\n", quote_char="")
Expand Down

0 comments on commit 5ffc501

Please sign in to comment.