Perform extra tests against empty files. (#456)

astronomy-commons · Dec 10, 2024 · 33aeacd · 33aeacd
1 parent ef94ded
commit 33aeacd
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 2 deletions.
diff --git a/src/hats_import/catalog/file_readers.py b/src/hats_import/catalog/file_readers.py
@@ -438,11 +438,11 @@ def read(self, input_file, read_columns=None):
             if nrows + batch.num_rows > self.chunksize:
                 # We've hit the chunksize so load to a DataFrame and yield.
                 # There should always be at least one batch in here since batch_size == self.chunksize above.
-                yield pyarrow.Table.from_batches(batches).to_pandas()
+                yield pyarrow.Table.from_batches(batches).replace_schema_metadata()
                 batches, nrows = [], 0
 
             batches.append(batch)
             nrows += batch.num_rows
 
         if len(batches) > 0:
-            yield pyarrow.Table.from_batches(batches).to_pandas()
+            yield pyarrow.Table.from_batches(batches).replace_schema_metadata()
diff --git a/tests/data/indexed_files/csv_list_empty.txt b/tests/data/indexed_files/csv_list_empty.txt
diff --git a/tests/data/mixed_schema/input_03.csv b/tests/data/mixed_schema/input_03.csv
@@ -0,0 +1 @@
+id,ra,dec,ra_error,dec_error,comment,code
diff --git a/tests/hats_import/catalog/test_run_round_trip.py b/tests/hats_import/catalog/test_run_round_trip.py
@@ -79,6 +79,7 @@ def test_import_mixed_schema_csv(
         input_file_list=[
             Path(mixed_schema_csv_dir) / "input_01.csv",
             Path(mixed_schema_csv_dir) / "input_02.csv",
+            Path(mixed_schema_csv_dir) / "input_03.csv",
         ],
         output_path=tmp_path,
         dask_tmp=tmp_path,
@@ -921,6 +922,7 @@ def test_import_indexed_csv(
         input_file_list=[
             indexed_files_dir / "csv_list_double_1_of_2.txt",
             indexed_files_dir / "csv_list_double_2_of_2.txt",
+            indexed_files_dir / "csv_list_empty.txt",
         ],
         output_path=tmp_path,
         file_reader="indexed_csv",