File tree Expand file tree Collapse file tree 1 file changed +3
-7
lines changed Expand file tree Collapse file tree 1 file changed +3
-7
lines changed Original file line number Diff line number Diff line change @@ -114,9 +114,7 @@ def test_from_csv_to_parquet(
114114 sep = "\t " ,
115115 names = SCHEMA .keys (),
116116 # 'dtype' and 'converters' cannot overlap
117- dtype = {
118- col : dtype for col , dtype in SCHEMA .items () if dtype == "string[pyarrow]"
119- },
117+ dtype = {col : dtype for col , dtype in SCHEMA .items () if dtype != "Float64" },
120118 storage_options = s3 .storage_options ,
121119 on_bad_lines = "skip" ,
122120 # Some bad files have '#' in float values
@@ -127,10 +125,8 @@ def test_from_csv_to_parquet(
127125 },
128126 )
129127
130- # Now we can safely convert the numeric columns
131- df = df .astype (
132- {col : dtype for col , dtype in SCHEMA .items () if dtype != "string[pyarrow]" }
133- )
128+ # Now we can safely convert the float columns
129+ df = df .astype ({col : dtype for col , dtype in SCHEMA .items () if dtype == "Float64" })
134130
135131 df = df .map_partitions (
136132 lambda xdf : xdf .drop_duplicates (subset = ["SOURCEURL" ], keep = "first" )
You can’t perform that action at this time.
0 commit comments