Fixup: only parsing the floats

milesgranger · milesgranger · commit dc85285b344b · 2023-05-15T10:38:33.000+02:00
diff --git a/tests/workflows/test_from_csv_to_parquet.py b/tests/workflows/test_from_csv_to_parquet.py
@@ -114,9 +114,7 @@ def test_from_csv_to_parquet(
         sep="\t",
         names=SCHEMA.keys(),
         # 'dtype' and 'converters' cannot overlap
-        dtype={
-            col: dtype for col, dtype in SCHEMA.items() if dtype == "string[pyarrow]"
-        },
+        dtype={col: dtype for col, dtype in SCHEMA.items() if dtype != "Float64"},
         storage_options=s3.storage_options,
         on_bad_lines="skip",
         # Some bad files have '#' in float values
@@ -127,10 +125,8 @@ def test_from_csv_to_parquet(
         },
     )
 
-    # Now we can safely convert the numeric columns
-    df = df.astype(
-        {col: dtype for col, dtype in SCHEMA.items() if dtype != "string[pyarrow]"}
-    )
+    # Now we can safely convert the float columns
+    df = df.astype({col: dtype for col, dtype in SCHEMA.items() if dtype == "Float64"})
 
     df = df.map_partitions(
         lambda xdf: xdf.drop_duplicates(subset=["SOURCEURL"], keep="first")