Skip to content

Commit

Permalink
fix: allow null strings in CSVs (#646)
Browse files Browse the repository at this point in the history
  • Loading branch information
bjchambers authored Aug 10, 2023
1 parent 2e1cf23 commit ae9ebaf
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
3 changes: 2 additions & 1 deletion sparrow-py/pysrc/sparrow_py/sources/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def __init__(
super().__init__(schema, **kwargs)

self._convert_options = pyarrow.csv.ConvertOptions(
column_types=schema
column_types=schema,
strings_can_be_null=True,
)
self.add_string(csv_string)

Expand Down
4 changes: 3 additions & 1 deletion sparrow-py/pytests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def csv(self, data: Union[kt.Timestream, pd.DataFrame]) -> None:

# Load the CSV file. Use the schema of the data to set expected types.
golden = pa.read_csv(filename,
convert_options = pa.csv.ConvertOptions(column_types=data.schema))
convert_options = pa.csv.ConvertOptions(
column_types=data.schema,
strings_can_be_null=True,))

pd.testing.assert_frame_equal(df, golden)

Expand Down
12 changes: 6 additions & 6 deletions sparrow-py/pytests/golden/length_test/test_length.jsonl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{"_time":"1996-12-19T16:39:57.000","_subsort":0,"_key_hash":12960666915911099378,"_key":"A","str":"apple","len_key":5,"list":["apple"],"len_list":1}
{"_time":"1996-12-19T16:39:58.000","_subsort":1,"_key_hash":2867199309159137213,"_key":"B","str":"dog","len_key":3,"list":["dog"],"len_list":1}
{"_time":"1996-12-19T16:39:59.000","_subsort":2,"_key_hash":12960666915911099378,"_key":"A","str":"carrot","len_key":6,"list":["apple","carrot"],"len_list":2}
{"_time":"1996-12-19T16:40:00.000","_subsort":3,"_key_hash":12960666915911099378,"_key":"A","str":"","len_key":0,"list":["apple","carrot",""],"len_list":3}
{"_time":"1996-12-19T16:40:01.000","_subsort":4,"_key_hash":12960666915911099378,"_key":"A","str":"eggplant","len_key":8,"list":["apple","carrot","","eggplant"],"len_list":4}
{"_time":"1996-12-19T16:40:02.000","_subsort":5,"_key_hash":12960666915911099378,"_key":"A","str":"fig","len_key":3,"list":["apple","carrot","","eggplant","fig"],"len_list":5}
{"_time":"1996-12-19T16:39:57.000","_subsort":0,"_key_hash":12960666915911099378,"_key":"A","str":"apple","len_key":5.0,"list":["apple"],"len_list":1}
{"_time":"1996-12-19T16:39:58.000","_subsort":1,"_key_hash":2867199309159137213,"_key":"B","str":"dog","len_key":3.0,"list":["dog"],"len_list":1}
{"_time":"1996-12-19T16:39:59.000","_subsort":2,"_key_hash":12960666915911099378,"_key":"A","str":"carrot","len_key":6.0,"list":["apple","carrot"],"len_list":2}
{"_time":"1996-12-19T16:40:00.000","_subsort":3,"_key_hash":12960666915911099378,"_key":"A","str":null,"len_key":null,"list":["apple","carrot"],"len_list":2}
{"_time":"1996-12-19T16:40:01.000","_subsort":4,"_key_hash":12960666915911099378,"_key":"A","str":"eggplant","len_key":8.0,"list":["apple","carrot","eggplant"],"len_list":3}
{"_time":"1996-12-19T16:40:02.000","_subsort":5,"_key_hash":12960666915911099378,"_key":"A","str":"fig","len_key":3.0,"list":["apple","carrot","eggplant","fig"],"len_list":4}

0 comments on commit ae9ebaf

Please sign in to comment.