From 5c3655c4b1478c8e338765022221f51921c29966 Mon Sep 17 00:00:00 2001 From: delucchi-cmu Date: Wed, 21 Jun 2023 19:09:37 -0400 Subject: [PATCH] Add unit test for example in documentation. --- .../catalog/test_run_round_trip.py | 46 +++++- tests/hipscat_import/conftest.py | 5 + .../data/test_formats/catalog.starr | 132 ++++++++++++++++++ 3 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 tests/hipscat_import/data/test_formats/catalog.starr diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py index d8c12fed..49484164 100644 --- a/tests/hipscat_import/catalog/test_run_round_trip.py +++ b/tests/hipscat_import/catalog/test_run_round_trip.py @@ -15,7 +15,7 @@ import hipscat_import.catalog.run_import as runner from hipscat_import.catalog.arguments import ImportArguments -from hipscat_import.catalog.file_readers import get_file_reader +from hipscat_import.catalog.file_readers import CsvReader, get_file_reader @pytest.mark.dask @@ -319,3 +319,47 @@ def test_import_constant_healpix_order( assert len(data_frame) == 14 ids = data_frame["id"] assert np.logical_and(ids >= 700, ids < 832).all() + +@pytest.mark.dask +def test_import_starr_file( + dask_client, + formats_dir, + assert_parquet_file_ids, + tmp_path, +): + """Test basic execution. + - tests that we can run pipeline with a totally unknown file type, so long + as a valid InputReader implementation is provided. + """ + + class StarrReader(CsvReader): + """Shallow subclass""" + + args = ImportArguments( + output_catalog_name="starr", + input_path=formats_dir, + input_format="starr", + file_reader=StarrReader(), + output_path=tmp_path, + dask_tmp=tmp_path, + highest_healpix_order=2, + pixel_threshold=3_000, + progress_bar=False, + ) + + runner.run(args, dask_client) + + # Check that the catalog metadata file exists + catalog = Catalog.read_from_hipscat(args.catalog_path) + assert catalog.on_disk + assert catalog.catalog_path == args.catalog_path + assert catalog.catalog_info.total_rows == 131 + assert len(catalog.get_pixels()) == 1 + + # Check that the catalog parquet file exists and contains correct object IDs + output_file = os.path.join( + args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet" + ) + + expected_ids = [*range(700, 831)] + assert_parquet_file_ids(output_file, "id", expected_ids) \ No newline at end of file diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py index 2e64d854..fadeb841 100644 --- a/tests/hipscat_import/conftest.py +++ b/tests/hipscat_import/conftest.py @@ -91,6 +91,11 @@ def empty_data_dir(test_data_dir): return os.path.join(test_data_dir, "empty") +@pytest.fixture +def formats_dir(test_data_dir): + return os.path.join(test_data_dir, "test_formats") + + @pytest.fixture def formats_headers_csv(test_data_dir): return os.path.join(test_data_dir, "test_formats", "headers.csv") diff --git a/tests/hipscat_import/data/test_formats/catalog.starr b/tests/hipscat_import/data/test_formats/catalog.starr new file mode 100644 index 00000000..1bb66e95 --- /dev/null +++ b/tests/hipscat_import/data/test_formats/catalog.starr @@ -0,0 +1,132 @@ +id,ra,dec,ra_error,dec_error +700,282.5,-58.5,0,0 +701,299.5,-48.5,0,0 +702,310.5,-27.5,0,0 +703,286.5,-69.5,0,0 +704,326.5,-45.5,0,0 +705,335.5,-32.5,0,0 +706,297.5,-36.5,0,0 +707,308.5,-69.5,0,0 +708,307.5,-37.5,0,0 +709,294.5,-45.5,0,0 +710,341.5,-39.5,0,0 +711,305.5,-49.5,0,0 +712,288.5,-49.5,0,0 +713,298.5,-41.5,0,0 +714,303.5,-37.5,0,0 +715,280.5,-35.5,0,0 +716,305.5,-60.5,0,0 +717,303.5,-43.5,0,0 +718,292.5,-60.5,0,0 +719,344.5,-39.5,0,0 +720,344.5,-47.5,0,0 +721,314.5,-34.5,0,0 +722,350.5,-58.5,0,0 +723,315.5,-68.5,0,0 +724,323.5,-41.5,0,0 +725,308.5,-41.5,0,0 +726,341.5,-37.5,0,0 +727,301.5,-44.5,0,0 +728,328.5,-47.5,0,0 +729,299.5,-59.5,0,0 +730,322.5,-61.5,0,0 +731,343.5,-52.5,0,0 +732,337.5,-39.5,0,0 +733,329.5,-65.5,0,0 +734,348.5,-66.5,0,0 +735,299.5,-65.5,0,0 +736,303.5,-52.5,0,0 +737,316.5,-33.5,0,0 +738,345.5,-64.5,0,0 +739,332.5,-57.5,0,0 +740,306.5,-33.5,0,0 +741,303.5,-38.5,0,0 +742,348.5,-45.5,0,0 +743,307.5,-25.5,0,0 +744,349.5,-39.5,0,0 +745,337.5,-38.5,0,0 +746,283.5,-31.5,0,0 +747,327.5,-61.5,0,0 +748,296.5,-63.5,0,0 +749,293.5,-55.5,0,0 +750,338.5,-67.5,0,0 +751,330.5,-44.5,0,0 +752,291.5,-34.5,0,0 +753,307.5,-45.5,0,0 +754,313.5,-30.5,0,0 +755,303.5,-38.5,0,0 +756,319.5,-35.5,0,0 +757,346.5,-34.5,0,0 +758,325.5,-53.5,0,0 +759,290.5,-48.5,0,0 +760,320.5,-53.5,0,0 +761,329.5,-29.5,0,0 +762,327.5,-51.5,0,0 +763,306.5,-38.5,0,0 +764,297.5,-45.5,0,0 +765,306.5,-35.5,0,0 +766,310.5,-63.5,0,0 +767,314.5,-29.5,0,0 +768,297.5,-60.5,0,0 +769,307.5,-42.5,0,0 +770,285.5,-29.5,0,0 +771,348.5,-67.5,0,0 +772,348.5,-64.5,0,0 +773,293.5,-50.5,0,0 +774,281.5,-54.5,0,0 +775,321.5,-54.5,0,0 +776,344.5,-63.5,0,0 +777,307.5,-39.5,0,0 +778,313.5,-36.5,0,0 +779,347.5,-29.5,0,0 +780,326.5,-52.5,0,0 +781,330.5,-46.5,0,0 +782,290.5,-39.5,0,0 +783,286.5,-42.5,0,0 +784,338.5,-40.5,0,0 +785,296.5,-44.5,0,0 +786,336.5,-33.5,0,0 +787,320.5,-47.5,0,0 +788,283.5,-61.5,0,0 +789,287.5,-45.5,0,0 +790,286.5,-35.5,0,0 +791,312.5,-28.5,0,0 +792,320.5,-69.5,0,0 +793,289.5,-58.5,0,0 +794,300.5,-66.5,0,0 +795,306.5,-58.5,0,0 +796,320.5,-33.5,0,0 +797,308.5,-62.5,0,0 +798,316.5,-36.5,0,0 +799,313.5,-31.5,0,0 +800,299.5,-37.5,0,0 +801,309.5,-50.5,0,0 +802,304.5,-49.5,0,0 +803,336.5,-25.5,0,0 +804,322.5,-66.5,0,0 +805,297.5,-52.5,0,0 +806,312.5,-29.5,0,0 +807,303.5,-60.5,0,0 +808,320.5,-40.5,0,0 +809,283.5,-34.5,0,0 +810,301.5,-59.5,0,0 +811,315.5,-68.5,0,0 +812,346.5,-60.5,0,0 +813,349.5,-37.5,0,0 +814,312.5,-33.5,0,0 +815,283.5,-68.5,0,0 +816,288.5,-69.5,0,0 +817,318.5,-48.5,0,0 +818,300.5,-55.5,0,0 +819,313.5,-35.5,0,0 +820,286.5,-46.5,0,0 +821,330.5,-52.5,0,0 +822,301.5,-54.5,0,0 +823,338.5,-45.5,0,0 +824,305.5,-28.5,0,0 +825,315.5,-30.5,0,0 +826,335.5,-69.5,0,0 +827,310.5,-40.5,0,0 +828,330.5,-26.5,0,0 +829,314.5,-35.5,0,0 +830,306.5,-50.5,0,0 \ No newline at end of file