Skip to content

Commit dc85285

Browse files
committed
Fixup: only parsing the floats
1 parent e8df1d3 commit dc85285

File tree

1 file changed

+3
-7
lines changed

1 file changed

+3
-7
lines changed

tests/workflows/test_from_csv_to_parquet.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,7 @@ def test_from_csv_to_parquet(
114114
sep="\t",
115115
names=SCHEMA.keys(),
116116
# 'dtype' and 'converters' cannot overlap
117-
dtype={
118-
col: dtype for col, dtype in SCHEMA.items() if dtype == "string[pyarrow]"
119-
},
117+
dtype={col: dtype for col, dtype in SCHEMA.items() if dtype != "Float64"},
120118
storage_options=s3.storage_options,
121119
on_bad_lines="skip",
122120
# Some bad files have '#' in float values
@@ -127,10 +125,8 @@ def test_from_csv_to_parquet(
127125
},
128126
)
129127

130-
# Now we can safely convert the numeric columns
131-
df = df.astype(
132-
{col: dtype for col, dtype in SCHEMA.items() if dtype != "string[pyarrow]"}
133-
)
128+
# Now we can safely convert the float columns
129+
df = df.astype({col: dtype for col, dtype in SCHEMA.items() if dtype == "Float64"})
134130

135131
df = df.map_partitions(
136132
lambda xdf: xdf.drop_duplicates(subset=["SOURCEURL"], keep="first")

0 commit comments

Comments
 (0)