astronomy-commons · delucchi-cmu · May 3, 2024 · Apr 30, 2024 · May 2, 2024 · May 3, 2024
diff --git a/src/hipscat_import/catalog/file_readers.py b/src/hipscat_import/catalog/file_readers.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, Union
 
 import pyarrow.parquet as pq
+from astropy.io import ascii as ascii_reader
 from astropy.table import Table
 from hipscat.io import FilePointer, file_io
 
@@ -38,14 +39,16 @@ def get_file_reader(
         skip_column_names (list[str]): for fits files, a list of columns to remove.
         type_map (dict): for CSV files, the data types to use for columns
     """
-    if "csv" in file_format:
+    if file_format == "csv":
         return CsvReader(
             chunksize=chunksize,
             schema_file=schema_file,
             column_names=column_names,
             type_map=type_map,
             **kwargs,
         )
+    if file_format == "ecsv":
+        return AstropyEcsvReader(**kwargs)
     if file_format == "fits":
         return FitsReader(
             chunksize=chunksize,
@@ -180,6 +183,27 @@ def provenance_info(self) -> dict:
         return provenance_info
 
 
+class AstropyEcsvReader(InputReader):
+    """Reads astropy ascii .ecsv files.
+
+    Note that this is NOT a chunked reader. Use caution when reading
+    large ECSV files with this reader."""
+
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def read(self, input_file, read_columns=None):
+        self.regular_file_exists(input_file, **self.kwargs)
+        if read_columns:
+            self.kwargs["include_names"] = read_columns
+
+        astropy_table = ascii_reader.read(input_file, format="ecsv", **self.kwargs)
+        yield astropy_table.to_pandas()
+
+    def provenance_info(self):
+        return {"input_reader_type": "AstropyEcsvReader"}
+
+
 class FitsReader(InputReader):
     """Chunked FITS file reader.
 

diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -477,3 +477,35 @@ def test_import_gaia_minimum(
     assert "Norder" in column_names
     assert "Dir" in column_names
     assert "Npix" in column_names
+
+
+@pytest.mark.dask
+def test_gaia_ecsv(
+    dask_client,
+    formats_dir,
+    tmp_path,
+):
+    input_file = os.path.join(formats_dir, "gaia_epoch.ecsv")
+
+    args = ImportArguments(
+        output_artifact_name="gaia_e_astropy",
+        input_file_list=[input_file],
+        file_reader="ecsv",
+        ra_column="ra",
+        dec_column="dec",
+        sort_columns="solution_id,source_id",
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
+        highest_healpix_order=2,
+        pixel_threshold=3_000,
+        progress_bar=False,
+    )
+
+    runner.run(args, dask_client)
+
+    # Check that the catalog metadata file exists
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert catalog.catalog_info.total_rows == 3
+    assert len(catalog.get_healpix_pixels()) == 1