Skip to content

Commit

Permalink
[VD-4841] Spreadsheet diff: fail gracefully if the input string is an…
Browse files Browse the repository at this point in the history
… invalid CSV (eg. NUL characters)
  • Loading branch information
martin-village committed Aug 25, 2024
1 parent e0abed2 commit a499786
Showing 1 changed file with 18 additions and 15 deletions.
33 changes: 18 additions & 15 deletions target_elasticsearch/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,21 +650,24 @@ def spreadsheet_diff(csv_string1, csv_string2):
def safe_read_csv(csv_string):
if not csv_string.strip():
return pd.DataFrame()

max_columns = 0
for line in csv.reader(io.StringIO(csv_string)):
max_columns = max(max_columns, len(line))

return pd.read_csv(
io.StringIO(csv_string),
dtype=str,
keep_default_na=False,
quotechar='"',
escapechar='\\',
names=range(max_columns),
header=None,
on_bad_lines='warn'
)
try:
max_columns = 0
for line in csv.reader(io.StringIO(csv_string)):
max_columns = max(max_columns, len(line))

return pd.read_csv(
io.StringIO(csv_string),
dtype=str,
keep_default_na=False,
quotechar='"',
escapechar='\\',
names=range(max_columns),
header=None,
on_bad_lines='warn'
)
except Exception as e:
logging.warning(f"Invalid CSV string input, pandas exception: {e}. Consider a null document")
return pd.DataFrame()

df1 = safe_read_csv(csv_string1)
df2 = safe_read_csv(csv_string2)
Expand Down

0 comments on commit a499786

Please sign in to comment.