From 0a192aa28d3f6e30e70f3623738f1696c8fe6c6b Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 7 Jun 2023 07:40:59 -0400
Subject: [PATCH 01/12] Use type-specific catalog info

---
 src/hipscat_import/association/arguments.py   |  26 ++--
 .../association/run_association.py            |  17 ++-
 src/hipscat_import/catalog/arguments.py       |  27 ++--
 src/hipscat_import/catalog/run_import.py      |  28 ++--
 src/hipscat_import/index/arguments.py         |  25 ++--
 src/hipscat_import/index/run_index.py         |  13 +-
 .../margin_cache/margin_cache_arguments.py    |  26 +++-
 .../association/test_association_argument.py  |   9 +-
 .../association/test_run_association.py       | 120 ++++--------------
 .../catalog/test_argument_validation.py       |  10 +-
 .../catalog/test_file_readers.py              |  54 ++++----
 .../hipscat_import/catalog/test_run_import.py |  66 ++++------
 .../catalog/test_run_round_trip.py            |  88 +++----------
 .../catalog_info.json                         |   8 +-
 .../catalog_info.json                         |   6 +-
 .../index/test_index_argument.py              |   7 +-
 tests/hipscat_import/index/test_run_index.py  |  17 +--
 .../test_arguments_margin_cache.py            |  28 ++++
 18 files changed, 250 insertions(+), 325 deletions(-)

diff --git a/src/hipscat_import/association/arguments.py b/src/hipscat_import/association/arguments.py
index 28b8f852..4a85d742 100644
--- a/src/hipscat_import/association/arguments.py
+++ b/src/hipscat_import/association/arguments.py
@@ -2,7 +2,8 @@
 
 from dataclasses import dataclass
 
-from hipscat.catalog import CatalogParameters
+from hipscat.catalog.association_catalog.association_catalog import \
+    AssociationCatalogInfo
 
 from hipscat_import.runtime_arguments import RuntimeArguments
 
@@ -51,17 +52,18 @@ def _check_arguments(self):
         if self.compute_partition_size < 100_000:
             raise ValueError("compute_partition_size must be at least 100_000")
 
-    def to_catalog_parameters(self) -> CatalogParameters:
-        """Convert importing arguments into hipscat catalog parameters.
-
-        Returns:
-            CatalogParameters for catalog being created.
-        """
-        return CatalogParameters(
-            catalog_name=self.output_catalog_name,
-            catalog_type="association",
-            output_path=self.output_path,
-        )
+    def to_catalog_info(self, total_rows) -> AssociationCatalogInfo:
+        """Catalog-type-specific dataset info."""
+        info = {
+            "catalog_name": self.output_catalog_name,
+            "catalog_type": "association",
+            "total_rows": total_rows,
+            "primary_column": self.primary_id_column,
+            "primary_catalog": str(self.primary_input_catalog_path),
+            "join_column": self.join_id_column,
+            "join_catalog": str(self.join_input_catalog_path),
+        }
+        return AssociationCatalogInfo(**info)
 
     def additional_runtime_provenance_info(self):
         return {
diff --git a/src/hipscat_import/association/run_association.py b/src/hipscat_import/association/run_association.py
index ba505967..461a36f1 100644
--- a/src/hipscat_import/association/run_association.py
+++ b/src/hipscat_import/association/run_association.py
@@ -9,7 +9,8 @@
 from tqdm import tqdm
 
 from hipscat_import.association.arguments import AssociationArguments
-from hipscat_import.association.map_reduce import map_association, reduce_association
+from hipscat_import.association.map_reduce import (map_association,
+                                                   reduce_association)
 
 
 def _validate_args(args):
@@ -40,11 +41,17 @@ def run(args):
     ) as step_progress:
         # pylint: disable=duplicate-code
         # Very similar to /index/run_index.py
-        catalog_params = args.to_catalog_parameters()
-        catalog_params.total_rows = int(rows_written)
-        write_metadata.write_provenance_info(catalog_params, args.provenance_info())
+        catalog_info = args.to_catalog_info(int(rows_written))
+        write_metadata.write_provenance_info(
+            catalog_base_dir=args.catalog_path,
+            dataset_info=catalog_info,
+            tool_args=args.provenance_info(),
+        )
         step_progress.update(1)
-        write_metadata.write_catalog_info(catalog_params)
+        catalog_info = args.to_catalog_info(total_rows=int(rows_written))
+        write_metadata.write_catalog_info(
+            dataset_info=catalog_info, catalog_base_dir=args.catalog_path
+        )
         step_progress.update(1)
         write_metadata.write_parquet_metadata(args.catalog_path)
         step_progress.update(1)
diff --git a/src/hipscat_import/catalog/arguments.py b/src/hipscat_import/catalog/arguments.py
index 7fa98135..10a09d77 100644
--- a/src/hipscat_import/catalog/arguments.py
+++ b/src/hipscat_import/catalog/arguments.py
@@ -6,7 +6,7 @@
 from typing import Callable, List
 
 import pandas as pd
-from hipscat.catalog import CatalogParameters
+from hipscat.catalog.catalog import CatalogInfo
 from hipscat.io import FilePointer, file_io
 from hipscat.pixel_math import hipscat_id
 
@@ -140,19 +140,17 @@ def _check_arguments(self):
         if not self.filter_function:
             self.filter_function = passthrough_filter_function
 
-    def to_catalog_parameters(self) -> CatalogParameters:
-        """Convert importing arguments into hipscat catalog parameters.
-        Returns:
-            CatalogParameters for catalog being created.
-        """
-        return CatalogParameters(
-            catalog_name=self.output_catalog_name,
-            catalog_type=self.catalog_type,
-            output_path=self.output_path,
-            epoch=self.epoch,
-            ra_column=self.ra_column,
-            dec_column=self.dec_column,
-        )
+    def to_catalog_info(self, total_rows) -> CatalogInfo:
+        """Catalog-type-specific dataset info."""
+        info = {
+            "catalog_name": self.output_catalog_name,
+            "catalog_type": self.catalog_type,
+            "total_rows": total_rows,
+            "epoch": self.epoch,
+            "ra_column": self.ra_column,
+            "dec_column": self.dec_column,
+        }
+        return CatalogInfo(**info)
 
     def additional_runtime_provenance_info(self):
         return {
@@ -180,6 +178,7 @@ def check_healpix_order_range(
 ):
     """Helper method to heck if the `order` is within the range determined by the
     `lower_bound` and `upper_bound`, inclusive.
+
     Args:
         order (int): healpix order to check
         field_name (str): field name to use in the error message
diff --git a/src/hipscat_import/catalog/run_import.py b/src/hipscat_import/catalog/run_import.py
index 9b32416c..273e1da7 100644
--- a/src/hipscat_import/catalog/run_import.py
+++ b/src/hipscat_import/catalog/run_import.py
@@ -50,13 +50,13 @@ def _map_pixels(args, client):
         total=len(futures),
         disable=(not args.progress_bar),
     ):
-        if future.status == "error": # pragma: no cover
+        if future.status == "error":  # pragma: no cover
             some_error = True
         raw_histogram = np.add(raw_histogram, result)
         resume.write_mapping_start_key(args.tmp_path, future.key)
         resume.write_histogram(args.tmp_path, raw_histogram)
         resume.write_mapping_done_key(args.tmp_path, future.key)
-    if some_error: # pragma: no cover
+    if some_error:  # pragma: no cover
         raise RuntimeError("Some mapping stages failed. See logs for details.")
     resume.set_mapping_done(args.tmp_path)
     return raw_histogram
@@ -98,10 +98,10 @@ def _split_pixels(args, alignment_future, client):
         total=len(futures),
         disable=(not args.progress_bar),
     ):
-        if future.status == "error": # pragma: no cover
+        if future.status == "error":  # pragma: no cover
             some_error = True
         resume.write_splitting_done_key(args.tmp_path, future.key)
-    if some_error: # pragma: no cover
+    if some_error:  # pragma: no cover
         raise RuntimeError("Some splitting stages failed. See logs for details.")
     resume.set_splitting_done(args.tmp_path)
 
@@ -143,10 +143,10 @@ def _reduce_pixels(args, destination_pixel_map, client):
         total=len(futures),
         disable=(not args.progress_bar),
     ):
-        if future.status == "error": # pragma: no cover
+        if future.status == "error":  # pragma: no cover
             some_error = True
         resume.write_reducing_key(args.tmp_path, future.key)
-    if some_error: # pragma: no cover
+    if some_error:  # pragma: no cover
         raise RuntimeError("Some reducing stages failed. See logs for details.")
     resume.set_reducing_done(args.tmp_path)
 
@@ -215,12 +215,17 @@ def run_with_client(args, client):
     with tqdm(
         total=6, desc="Finishing", disable=not args.progress_bar
     ) as step_progress:
-        catalog_parameters = args.to_catalog_parameters()
-        catalog_parameters.total_rows = int(raw_histogram.sum())
-        io.write_provenance_info(catalog_parameters, args.provenance_info())
+        catalog_info = args.to_catalog_info(int(raw_histogram.sum()))
+        io.write_provenance_info(
+            catalog_base_dir=args.catalog_path,
+            dataset_info=catalog_info,
+            tool_args=args.provenance_info(),
+        )
         step_progress.update(1)
 
-        io.write_catalog_info(catalog_parameters)
+        io.write_catalog_info(
+            catalog_base_dir=args.catalog_path, dataset_info=catalog_info
+        )
         step_progress.update(1)
         if not args.debug_stats_only:
             io.write_parquet_metadata(args.catalog_path)
@@ -228,7 +233,8 @@ def run_with_client(args, client):
         io.write_fits_map(args.catalog_path, raw_histogram)
         step_progress.update(1)
         io.write_partition_info(
-            catalog_parameters, destination_healpix_pixel_map=destination_pixel_map
+            catalog_base_dir=args.catalog_path,
+            destination_healpix_pixel_map=destination_pixel_map,
         )
         step_progress.update(1)
         resume.clean_resume_files(args.tmp_path)
diff --git a/src/hipscat_import/index/arguments.py b/src/hipscat_import/index/arguments.py
index d7ba2856..4c7a7e86 100644
--- a/src/hipscat_import/index/arguments.py
+++ b/src/hipscat_import/index/arguments.py
@@ -3,7 +3,8 @@
 from dataclasses import dataclass, field
 from typing import List, Optional
 
-from hipscat.catalog import Catalog, CatalogParameters
+from hipscat.catalog import Catalog
+from hipscat.catalog.index.index_catalog_info import IndexCatalogInfo
 
 from hipscat_import.runtime_arguments import RuntimeArguments
 
@@ -46,17 +47,17 @@ def _check_arguments(self):
         if self.compute_partition_size < 100_000:
             raise ValueError("compute_partition_size must be at least 100_000")
 
-    def to_catalog_parameters(self) -> CatalogParameters:
-        """Convert importing arguments into hipscat catalog parameters.
-
-        Returns:
-            CatalogParameters for catalog being created.
-        """
-        return CatalogParameters(
-            catalog_name=self.output_catalog_name,
-            catalog_type="index",
-            output_path=self.output_path,
-        )
+    def to_catalog_info(self, total_rows) -> IndexCatalogInfo:
+        """Catalog-type-specific dataset info."""
+        info = {
+            "catalog_name": self.output_catalog_name,
+            "total_rows": total_rows,
+            "catalog_type": "index",
+            "primary_catalog":str(self.input_catalog_path),
+            "indexing_column": self.indexing_column,
+            "extra_columns": self.extra_columns,
+        }
+        return IndexCatalogInfo(**info)
 
     def additional_runtime_provenance_info(self):
         return {
diff --git a/src/hipscat_import/index/run_index.py b/src/hipscat_import/index/run_index.py
index d30b6f47..46a7bd3e 100644
--- a/src/hipscat_import/index/run_index.py
+++ b/src/hipscat_import/index/run_index.py
@@ -25,11 +25,16 @@ def run(args):
     ) as step_progress:
         # pylint: disable=duplicate-code
         # Very similar to /association/run_association.py
-        catalog_params = args.to_catalog_parameters()
-        catalog_params.total_rows = int(rows_written)
-        write_metadata.write_provenance_info(catalog_params, args.provenance_info())
+        catalog_info = args.to_catalog_info(int(rows_written))
+        write_metadata.write_provenance_info(
+            catalog_base_dir=args.catalog_path,
+            dataset_info=catalog_info,
+            tool_args=args.provenance_info(),
+        )
         step_progress.update(1)
-        write_metadata.write_catalog_info(catalog_params)
+        write_metadata.write_catalog_info(
+            catalog_base_dir=args.catalog_path, dataset_info=catalog_info
+        )
         step_progress.update(1)
         file_io.remove_directory(args.tmp_path, ignore_errors=True)
         step_progress.update(1)
diff --git a/src/hipscat_import/margin_cache/margin_cache_arguments.py b/src/hipscat_import/margin_cache/margin_cache_arguments.py
index 31174d20..3293ae0f 100644
--- a/src/hipscat_import/margin_cache/margin_cache_arguments.py
+++ b/src/hipscat_import/margin_cache/margin_cache_arguments.py
@@ -4,6 +4,9 @@
 import healpy as hp
 import numpy as np
 from hipscat.catalog import Catalog
+from hipscat.catalog.margin_cache.margin_cache_catalog_info import (
+    MarginCacheCatalogInfo,
+)
 from hipscat.io import file_io
 
 from hipscat_import.runtime_arguments import RuntimeArguments
@@ -53,9 +56,30 @@ def _check_arguments(self):
 
         margin_pixel_nside = hp.order2nside(self.margin_order)
 
-        if hp.nside2resol(margin_pixel_nside, arcmin=True) * 60. < self.margin_threshold:
+        if (
+            hp.nside2resol(margin_pixel_nside, arcmin=True) * 60.0
+            < self.margin_threshold
+        ):
             # pylint: disable=line-too-long
             warnings.warn(
                 "Warning: margin pixels have a smaller resolution than margin_threshold; this may lead to data loss in the margin cache."
             )
             # pylint: enable=line-too-long
+
+    def to_catalog_info(self, total_rows) -> MarginCacheCatalogInfo:
+        """Catalog-type-specific dataset info."""
+        info = {
+            "catalog_name": self.output_catalog_name,
+            "total_rows": total_rows,
+            "catalog_type": "margin",
+            "primary_catalog": self.input_catalog_path,
+            "margin_threshold": self.margin_threshold,
+        }
+        return MarginCacheCatalogInfo(**info)
+
+    def additional_runtime_provenance_info(self):
+        return {
+            "input_catalog_path": str(self.input_catalog_path),
+            "margin_threshold": self.margin_threshold,
+            "margin_order": self.margin_order,
+        }
diff --git a/tests/hipscat_import/association/test_association_argument.py b/tests/hipscat_import/association/test_association_argument.py
index b14c785a..bf82055b 100644
--- a/tests/hipscat_import/association/test_association_argument.py
+++ b/tests/hipscat_import/association/test_association_argument.py
@@ -181,8 +181,8 @@ def test_all_required_args(tmp_path, small_sky_object_catalog):
     )
 
 
-def test_to_catalog_parameters(small_sky_object_catalog, tmp_path):
-    """Verify creation of catalog parameters for index to be created."""
+def test_to_catalog_info(small_sky_object_catalog, tmp_path):
+    """Verify creation of catalog parameters for association table to be created."""
     args = AssociationArguments(
         primary_input_catalog_path=small_sky_object_catalog,
         primary_id_column="id",
@@ -193,8 +193,9 @@ def test_to_catalog_parameters(small_sky_object_catalog, tmp_path):
         output_path=tmp_path,
         output_catalog_name="small_sky_self_join",
     )
-    catalog_parameters = args.to_catalog_parameters()
-    assert catalog_parameters.catalog_name == args.output_catalog_name
+    catalog_info = args.to_catalog_info(total_rows=10)
+    assert catalog_info.catalog_name == args.output_catalog_name
+    assert catalog_info.total_rows == 10
 
 
 def test_provenance_info(small_sky_object_catalog, tmp_path):
diff --git a/tests/hipscat_import/association/test_run_association.py b/tests/hipscat_import/association/test_run_association.py
index c1aa4b73..c09a31f7 100644
--- a/tests/hipscat_import/association/test_run_association.py
+++ b/tests/hipscat_import/association/test_run_association.py
@@ -6,6 +6,8 @@
 import numpy.testing as npt
 import pandas as pd
 import pytest
+from hipscat.catalog.association_catalog.association_catalog import \
+    AssociationCatalog
 
 import hipscat_import.association.run_association as runner
 from hipscat_import.association.arguments import AssociationArguments
@@ -29,9 +31,8 @@ def test_object_to_source(
     small_sky_object_catalog,
     small_sky_source_catalog,
     tmp_path,
-    assert_text_file_matches,
 ):
-    """test stuff"""
+    """Test creating association between object and source catalogs."""
 
     args = AssociationArguments(
         primary_input_catalog_path=small_sky_object_catalog,
@@ -46,40 +47,11 @@ def test_object_to_source(
     )
     runner.run(args)
 
-    # Check that the catalog metadata file exists
-    expected_metadata_lines = [
-        "{",
-        '    "catalog_name": "small_sky_association",',
-        '    "catalog_type": "association",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 17161',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_metadata_lines, metadata_filename)
-
-    # Check that the partition *join* info file exists
-    expected_lines = [
-        "Norder,Dir,Npix,join_Norder,join_Dir,join_Npix,num_rows",
-        "0,0,11,0,0,4,50",
-        "0,0,11,1,0,47,2395",
-        "0,0,11,2,0,176,385",
-        "0,0,11,2,0,177,1510",
-        "0,0,11,2,0,178,1634",
-        "0,0,11,2,0,179,1773",
-        "0,0,11,2,0,180,655",
-        "0,0,11,2,0,181,903",
-        "0,0,11,2,0,182,1246",
-        "0,0,11,2,0,183,1143",
-        "0,0,11,2,0,184,1390",
-        "0,0,11,2,0,185,2942",
-        "0,0,11,2,0,186,452",
-        "0,0,11,2,0,187,683",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "partition_join_info.csv")
-    assert_text_file_matches(expected_lines, metadata_filename)
+    ## Check that the association data can be parsed as a valid association catalog.
+    catalog = AssociationCatalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert len(catalog.get_join_pixels()) == 14
 
     ## Test one pixel that will have 50 rows in it.
     output_file = os.path.join(
@@ -105,15 +77,19 @@ def test_object_to_source(
     ids = data_frame["join_id"]
     assert np.logical_and(ids >= 70_000, ids < 87161).all()
 
+    catalog = AssociationCatalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert len(catalog.get_join_pixels()) == 14
+
 
 @pytest.mark.dask
 def test_source_to_object(
     small_sky_object_catalog,
     small_sky_source_catalog,
     tmp_path,
-    assert_text_file_matches,
 ):
-    """test stuff"""
+    """Test creating (weirder) association between source and object catalogs."""
 
     args = AssociationArguments(
         primary_input_catalog_path=small_sky_source_catalog,
@@ -128,40 +104,11 @@ def test_source_to_object(
     )
     runner.run(args)
 
-    # Check that the catalog metadata file exists
-    expected_metadata_lines = [
-        "{",
-        '    "catalog_name": "small_sky_association",',
-        '    "catalog_type": "association",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 17161',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_metadata_lines, metadata_filename)
-
-    # Check that the partition *join* info file exists
-    expected_lines = [
-        "Norder,Dir,Npix,join_Norder,join_Dir,join_Npix,num_rows",
-        "0,0,4,0,0,11,50",
-        "1,0,47,0,0,11,2395",
-        "2,0,176,0,0,11,385",
-        "2,0,177,0,0,11,1510",
-        "2,0,178,0,0,11,1634",
-        "2,0,179,0,0,11,1773",
-        "2,0,180,0,0,11,655",
-        "2,0,181,0,0,11,903",
-        "2,0,182,0,0,11,1246",
-        "2,0,183,0,0,11,1143",
-        "2,0,184,0,0,11,1390",
-        "2,0,185,0,0,11,2942",
-        "2,0,186,0,0,11,452",
-        "2,0,187,0,0,11,683",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "partition_join_info.csv")
-    assert_text_file_matches(expected_lines, metadata_filename)
+    ## Check that the association data can be parsed as a valid association catalog.
+    catalog = AssociationCatalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert len(catalog.get_join_pixels()) == 14
 
     ## Test one pixel that will have 50 rows in it.
     output_file = os.path.join(
@@ -192,9 +139,8 @@ def test_source_to_object(
 def test_self_join(
     small_sky_object_catalog,
     tmp_path,
-    assert_text_file_matches,
 ):
-    """test stuff"""
+    """Test creating association between object catalog and itself."""
 
     args = AssociationArguments(
         primary_input_catalog_path=small_sky_object_catalog,
@@ -209,27 +155,11 @@ def test_self_join(
     )
     runner.run(args)
 
-    # Check that the catalog metadata file exists
-    expected_metadata_lines = [
-        "{",
-        '    "catalog_name": "small_sky_self_association",',
-        '    "catalog_type": "association",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 131',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_metadata_lines, metadata_filename)
-
-    # Check that the partition *join* info file exists
-    expected_lines = [
-        "Norder,Dir,Npix,join_Norder,join_Dir,join_Npix,num_rows",
-        "0,0,11,0,0,11,131",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "partition_join_info.csv")
-    assert_text_file_matches(expected_lines, metadata_filename)
+    ## Check that the association data can be parsed as a valid association catalog.
+    catalog = AssociationCatalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert len(catalog.get_join_pixels()) == 1
 
     ## Test one pixel that will have 50 rows in it.
     output_file = os.path.join(
diff --git a/tests/hipscat_import/catalog/test_argument_validation.py b/tests/hipscat_import/catalog/test_argument_validation.py
index 22a77bb1..76641037 100644
--- a/tests/hipscat_import/catalog/test_argument_validation.py
+++ b/tests/hipscat_import/catalog/test_argument_validation.py
@@ -3,7 +3,8 @@
 
 import pytest
 
-from hipscat_import.catalog.arguments import ImportArguments, check_healpix_order_range
+from hipscat_import.catalog.arguments import (ImportArguments,
+                                              check_healpix_order_range)
 
 # pylint: disable=protected-access
 
@@ -168,7 +169,7 @@ def test_catalog_type(blank_data_dir, tmp_path):
         )
 
 
-def test_to_catalog_parameters(blank_data_dir, tmp_path):
+def test_to_catalog_info(blank_data_dir, tmp_path):
     """Verify creation of catalog parameters for catalog to be created."""
     args = ImportArguments(
         output_catalog_name="catalog",
@@ -177,8 +178,9 @@ def test_to_catalog_parameters(blank_data_dir, tmp_path):
         output_path=tmp_path,
         tmp_dir=tmp_path,
     )
-    catalog_parameters = args.to_catalog_parameters()
-    assert catalog_parameters.catalog_name == "catalog"
+    catalog_info = args.to_catalog_info(total_rows=10)
+    assert catalog_info.catalog_name == "catalog"
+    assert catalog_info.total_rows == 10
 
 
 def test_provenance_info(blank_data_dir, tmp_path):
diff --git a/tests/hipscat_import/catalog/test_file_readers.py b/tests/hipscat_import/catalog/test_file_readers.py
index 87ef60b6..6d907ea1 100644
--- a/tests/hipscat_import/catalog/test_file_readers.py
+++ b/tests/hipscat_import/catalog/test_file_readers.py
@@ -7,15 +7,24 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pytest
-from hipscat.catalog import CatalogParameters
-
-from hipscat_import.catalog.file_readers import (
-    CsvReader,
-    FitsReader,
-    ParquetReader,
-    get_file_reader,
-)
-
+from hipscat.catalog.catalog import CatalogInfo
+
+from hipscat_import.catalog.file_readers import (CsvReader, FitsReader,
+                                                 ParquetReader,
+                                                 get_file_reader)
+
+
+# pylint: disable=redefined-outer-name
+@pytest.fixture
+def basic_catalog_info():
+    info = {
+        "catalog_name": "test_catalog",
+        "catalog_type": "object",
+        "total_rows": 100,
+        "ra_column": "ra",
+        "dec_column": "dec",
+    }
+    return CatalogInfo(**info)
 
 def test_unknown_file_type():
     """File reader factory method should fail for unknown file types"""
@@ -187,7 +196,7 @@ def test_csv_reader_pipe_delimited(formats_pipe_csv, tmp_path):
     assert np.all(column_types == expected_column_types)
 
 
-def test_csv_reader_provenance_info(tmp_path):
+def test_csv_reader_provenance_info(tmp_path, basic_catalog_info):
     """Test that we get some provenance info and it is parseable into JSON."""
     reader = CsvReader(
         header=None,
@@ -201,10 +210,9 @@ def test_csv_reader_provenance_info(tmp_path):
         },
     )
     provenance_info = reader.provenance_info()
-    base_catalog_parameters = CatalogParameters(
-        output_path=tmp_path, catalog_name="empty"
-    )
-    io.write_provenance_info(base_catalog_parameters, provenance_info)
+    catalog_base_dir = os.path.join(tmp_path, "test_catalog")
+    os.makedirs(catalog_base_dir)
+    io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
 
 
 def test_parquet_reader(parquet_shards_shard_44_0):
@@ -226,14 +234,13 @@ def test_parquet_reader_chunked(parquet_shards_shard_44_0):
     assert total_chunks == 7
 
 
-def test_parquet_reader_provenance_info(tmp_path):
+def test_parquet_reader_provenance_info(tmp_path, basic_catalog_info):
     """Test that we get some provenance info and it is parseable into JSON."""
     reader = ParquetReader(chunksize=1)
     provenance_info = reader.provenance_info()
-    base_catalog_parameters = CatalogParameters(
-        output_path=tmp_path, catalog_name="empty"
-    )
-    io.write_provenance_info(base_catalog_parameters, provenance_info)
+    catalog_base_dir = os.path.join(tmp_path, "test_catalog")
+    os.makedirs(catalog_base_dir)
+    io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
 
 
 def test_read_fits(formats_fits):
@@ -267,11 +274,10 @@ def test_read_fits_columns(formats_fits):
     assert list(frame.columns) == ["id", "ra", "dec"]
 
 
-def test_fits_reader_provenance_info(tmp_path):
+def test_fits_reader_provenance_info(tmp_path, basic_catalog_info):
     """Test that we get some provenance info and it is parseable into JSON."""
     reader = FitsReader()
     provenance_info = reader.provenance_info()
-    base_catalog_parameters = CatalogParameters(
-        output_path=tmp_path, catalog_name="empty"
-    )
-    io.write_provenance_info(base_catalog_parameters, provenance_info)
+    catalog_base_dir = os.path.join(tmp_path, "test_catalog")
+    os.makedirs(catalog_base_dir)
+    io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
diff --git a/tests/hipscat_import/catalog/test_run_import.py b/tests/hipscat_import/catalog/test_run_import.py
index bd565996..0a30b558 100644
--- a/tests/hipscat_import/catalog/test_run_import.py
+++ b/tests/hipscat_import/catalog/test_run_import.py
@@ -4,6 +4,7 @@
 import shutil
 
 import pytest
+from hipscat.catalog.catalog import Catalog
 
 import hipscat_import.catalog.resume_files as rf
 import hipscat_import.catalog.run_import as runner
@@ -29,7 +30,6 @@ def test_resume_dask_runner(
     small_sky_parts_dir,
     resume_dir,
     tmp_path,
-    assert_text_file_matches,
     assert_parquet_file_ids,
 ):
     """Test execution in the presence of some resume files."""
@@ -91,26 +91,13 @@ def test_resume_dask_runner(
     runner.run_with_client(args, dask_client)
 
     # Check that the catalog metadata file exists
-    expected_metadata_lines = [
-        "{",
-        '    "catalog_name": "resume",',
-        '    "catalog_type": "object",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 131',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_metadata_lines, metadata_filename)
-
-    # Check that the partition info file exists
-    expected_partition_lines = [
-        "Norder,Dir,Npix,num_rows",
-        "0,0,11,131",
-    ]
-    partition_filename = os.path.join(args.catalog_path, "partition_info.csv")
-    assert_text_file_matches(expected_partition_lines, partition_filename)
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert catalog.catalog_info.ra_column == "ra"
+    assert catalog.catalog_info.dec_column == "dec"
+    assert catalog.catalog_info.total_rows == 131
+    assert len(catalog.get_pixels()) == 1
 
     # Check that the catalog parquet file exists and contains correct object IDs
     output_file = os.path.join(
@@ -146,8 +133,13 @@ def test_resume_dask_runner(
 
     runner.run_with_client(args, dask_client)
 
-    assert_text_file_matches(expected_metadata_lines, metadata_filename)
-    assert_text_file_matches(expected_partition_lines, partition_filename)
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert catalog.catalog_info.ra_column == "ra"
+    assert catalog.catalog_info.dec_column == "dec"
+    assert catalog.catalog_info.total_rows == 131
+    assert len(catalog.get_pixels()) == 1
     assert_parquet_file_ids(output_file, "id", expected_ids)
 
 
@@ -156,7 +148,6 @@ def test_dask_runner(
     dask_client,
     small_sky_parts_dir,
     assert_parquet_file_ids,
-    assert_text_file_matches,
     tmp_path,
 ):
     """Test basic execution."""
@@ -173,26 +164,13 @@ def test_dask_runner(
     runner.run_with_client(args, dask_client)
 
     # Check that the catalog metadata file exists
-    expected_lines = [
-        "{",
-        '    "catalog_name": "small_sky_object_catalog",',
-        '    "catalog_type": "object",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 131',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_lines, metadata_filename)
-
-    # Check that the partition info file exists
-    expected_lines = [
-        "Norder,Dir,Npix,num_rows",
-        "0,0,11,131",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "partition_info.csv")
-    assert_text_file_matches(expected_lines, metadata_filename)
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert catalog.catalog_info.ra_column == "ra"
+    assert catalog.catalog_info.dec_column == "dec"
+    assert catalog.catalog_info.total_rows == 131
+    assert len(catalog.get_pixels()) == 1
 
     # Check that the catalog parquet file exists and contains correct object IDs
     output_file = os.path.join(
diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
index 174c09a9..8b050f01 100644
--- a/tests/hipscat_import/catalog/test_run_round_trip.py
+++ b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -11,6 +11,7 @@
 import numpy.testing as npt
 import pandas as pd
 import pytest
+from hipscat.catalog.catalog import Catalog
 
 import hipscat_import.catalog.run_import as runner
 from hipscat_import.catalog.arguments import ImportArguments
@@ -21,7 +22,6 @@
 def test_import_source_table(
     dask_client,
     small_sky_source_dir,
-    assert_text_file_matches,
     tmp_path,
 ):
     """Test basic execution, using a larger source file.
@@ -47,39 +47,12 @@ def test_import_source_table(
     runner.run_with_client(args, dask_client)
 
     # Check that the catalog metadata file exists
-    expected_lines = [
-        "{",
-        '    "catalog_name": "small_sky_source_catalog",',
-        '    "catalog_type": "source",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "source_ra",',
-        '    "dec_kw": "source_dec",',
-        '    "total_rows": 17161',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_lines, metadata_filename)
-
-    # Check that the partition info file exists
-    expected_lines = [
-        "Norder,Dir,Npix,num_rows",
-        "0,0,4,50",
-        "1,0,47,2395",
-        "2,0,176,385",
-        "2,0,177,1510",
-        "2,0,178,1634",
-        "2,0,179,1773",
-        "2,0,180,655",
-        "2,0,181,903",
-        "2,0,182,1246",
-        "2,0,183,1143",
-        "2,0,184,1390",
-        "2,0,185,2942",
-        "2,0,186,452",
-        "2,0,187,683",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "partition_info.csv")
-    assert_text_file_matches(expected_lines, metadata_filename)
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert catalog.catalog_info.ra_column == "source_ra"
+    assert catalog.catalog_info.dec_column == "source_dec"
+    assert len(catalog.get_pixels()) == 14
 
 
 @pytest.mark.dask
@@ -131,8 +104,8 @@ def test_import_preserve_index(
 ):
     """Test basic execution, with input with pandas metadata.
     - the input file is a parquet file with some pandas metadata.
-        this verifies that the parquet file at the end also has pandas 
-        metadata, and the user's preferred id is retained as the index, 
+        this verifies that the parquet file at the end also has pandas
+        metadata, and the user's preferred id is retained as the index,
         when requested.
     """
 
@@ -222,8 +195,8 @@ def test_import_multiindex(
     """Test basic execution, with input with pandas metadata
     - this is *similar* to the above test
     - the input file is a parquet file with a multi-level pandas index.
-        this verifies that the parquet file at the end also has pandas 
-        metadata, and the user's preferred id is retained as the index, 
+        this verifies that the parquet file at the end also has pandas
+        metadata, and the user's preferred id is retained as the index,
         when requested.
     """
 
@@ -310,7 +283,6 @@ def test_import_multiindex(
 def test_import_constant_healpix_order(
     dask_client,
     small_sky_parts_dir,
-    assert_text_file_matches,
     tmp_path,
 ):
     """Test basic execution.
@@ -329,40 +301,12 @@ def test_import_constant_healpix_order(
 
     runner.run_with_client(args, dask_client)
 
-    # Check that the partition info file exists - all pixels at order 2!
-    expected_lines = [
-        "Norder,Dir,Npix,num_rows",
-        "2,0,176,4",
-        "2,0,177,11",
-        "2,0,178,14",
-        "2,0,179,13",
-        "2,0,180,5",
-        "2,0,181,7",
-        "2,0,182,8",
-        "2,0,183,9",
-        "2,0,184,11",
-        "2,0,185,23",
-        "2,0,186,4",
-        "2,0,187,4",
-        "2,0,188,17",
-        "2,0,190,1",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "partition_info.csv")
-    assert_text_file_matches(expected_lines, metadata_filename)
-
     # Check that the catalog metadata file exists
-    expected_lines = [
-        "{",
-        '    "catalog_name": "small_sky_object_catalog",',
-        '    "catalog_type": "object",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 131',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_lines, metadata_filename)
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    # Check that the partition info file exists - all pixels at order 2!
+    assert all(pixel.order == 2 for pixel in catalog.partition_info.get_healpix_pixels())
 
     # Pick a parquet file and make sure it contains as many rows as we expect
     output_file = os.path.join(
diff --git a/tests/hipscat_import/data/small_sky_object_catalog/catalog_info.json b/tests/hipscat_import/data/small_sky_object_catalog/catalog_info.json
index 451af75e..59eaef9c 100644
--- a/tests/hipscat_import/data/small_sky_object_catalog/catalog_info.json
+++ b/tests/hipscat_import/data/small_sky_object_catalog/catalog_info.json
@@ -1,8 +1,8 @@
 {
     "catalog_name": "small_sky_object_catalog",
     "catalog_type": "object",
+    "total_rows": 131,
     "epoch": "J2000",
-    "ra_kw": "ra",
-    "dec_kw": "dec",
-    "total_rows": 131
-}
+    "ra_column": "ra",
+    "dec_column": "dec"
+}
\ No newline at end of file
diff --git a/tests/hipscat_import/data/small_sky_source_catalog/catalog_info.json b/tests/hipscat_import/data/small_sky_source_catalog/catalog_info.json
index 7c819f1d..0491b5c5 100644
--- a/tests/hipscat_import/data/small_sky_source_catalog/catalog_info.json
+++ b/tests/hipscat_import/data/small_sky_source_catalog/catalog_info.json
@@ -1,8 +1,8 @@
 {
     "catalog_name": "small_sky_source_catalog",
     "catalog_type": "source",
+    "total_rows": 17161,
     "epoch": "J2000",
     "ra_column": "source_ra",
-    "dec_column": "source_dec",
-    "total_rows": 17161
-}
+    "dec_column": "source_dec"
+}
\ No newline at end of file
diff --git a/tests/hipscat_import/index/test_index_argument.py b/tests/hipscat_import/index/test_index_argument.py
index c7032806..c1b3e125 100644
--- a/tests/hipscat_import/index/test_index_argument.py
+++ b/tests/hipscat_import/index/test_index_argument.py
@@ -128,7 +128,7 @@ def test_compute_partition_size(tmp_path, small_sky_object_catalog):
         )
 
 
-def test_to_catalog_parameters(small_sky_object_catalog, tmp_path):
+def test_to_catalog_info(small_sky_object_catalog, tmp_path):
     """Verify creation of catalog parameters for index to be created."""
     args = IndexArguments(
         input_catalog_path=small_sky_object_catalog,
@@ -138,8 +138,9 @@ def test_to_catalog_parameters(small_sky_object_catalog, tmp_path):
         include_hipscat_index=True,
         include_order_pixel=True,
     )
-    catalog_parameters = args.to_catalog_parameters()
-    assert catalog_parameters.catalog_name == args.output_catalog_name
+    catalog_info = args.to_catalog_info(total_rows=10)
+    assert catalog_info.catalog_name == args.output_catalog_name
+    assert catalog_info.total_rows == 10
 
 
 def test_provenance_info(small_sky_object_catalog, tmp_path):
diff --git a/tests/hipscat_import/index/test_run_index.py b/tests/hipscat_import/index/test_run_index.py
index 0f84cd61..f9bf3d3e 100644
--- a/tests/hipscat_import/index/test_run_index.py
+++ b/tests/hipscat_import/index/test_run_index.py
@@ -5,6 +5,7 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pytest
+from hipscat.catalog.dataset.dataset import Dataset
 
 import hipscat_import.index.run_index as runner
 from hipscat_import.index.arguments import IndexArguments
@@ -27,7 +28,6 @@ def test_bad_args():
 def test_run_index(
     small_sky_object_catalog,
     tmp_path,
-    assert_text_file_matches,
 ):
     """Test appropriate metadata is written"""
 
@@ -42,18 +42,9 @@ def test_run_index(
     runner.run(args)
 
     # Check that the catalog metadata file exists
-    expected_metadata_lines = [
-        "{",
-        '    "catalog_name": "small_sky_object_index",',
-        '    "catalog_type": "index",',
-        '    "epoch": "J2000",',
-        '    "ra_kw": "ra",',
-        '    "dec_kw": "dec",',
-        '    "total_rows": 131',
-        "}",
-    ]
-    metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
-    assert_text_file_matches(expected_metadata_lines, metadata_filename)
+    catalog = Dataset.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
 
     basic_index_parquet_schema = pa.schema(
         [
diff --git a/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py b/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
index 0d1537af..b7240ba9 100644
--- a/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
+++ b/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
@@ -62,3 +62,31 @@ def test_margin_threshold_warns(small_sky_source_catalog, tmp_path):
             output_catalog_name="catalog_cache",
             margin_order=16
         )
+
+
+def test_to_catalog_info(small_sky_source_catalog, tmp_path):
+    """Verify creation of catalog info for margin cache to be created."""
+    args = MarginCacheArguments(
+        margin_threshold=5.0,
+        input_catalog_path=small_sky_source_catalog,
+        output_path=tmp_path,
+        output_catalog_name="catalog_cache",
+        margin_order=4
+    )
+    catalog_info = args.to_catalog_info(total_rows=10)
+    assert catalog_info.catalog_name == args.output_catalog_name
+    assert catalog_info.total_rows == 10
+
+
+def test_provenance_info(small_sky_source_catalog, tmp_path):
+    """Verify that provenance info includes margin-cache-specific fields."""
+    args = MarginCacheArguments(
+        margin_threshold=5.0,
+        input_catalog_path=small_sky_source_catalog,
+        output_path=tmp_path,
+        output_catalog_name="catalog_cache",
+        margin_order=4
+    )
+
+    runtime_args = args.provenance_info()["runtime_args"]
+    assert "margin_threshold" in runtime_args

From e45d95827d3c42a64f2a70268ff9ac1722fca795 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Mon, 12 Jun 2023 18:56:27 -0400
Subject: [PATCH 02/12] Add type hints.

---
 src/hipscat_import/association/arguments.py       |  7 ++++---
 src/hipscat_import/catalog/arguments.py           |  2 +-
 src/hipscat_import/index/arguments.py             |  4 ++--
 .../margin_cache/margin_cache_arguments.py        |  2 +-
 .../catalog/test_argument_validation.py           |  3 +--
 tests/hipscat_import/catalog/test_file_readers.py | 10 +++++++---
 .../margin_cache/test_arguments_margin_cache.py   | 15 ++++++++++-----
 7 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/hipscat_import/association/arguments.py b/src/hipscat_import/association/arguments.py
index 4a85d742..e1b717e2 100644
--- a/src/hipscat_import/association/arguments.py
+++ b/src/hipscat_import/association/arguments.py
@@ -2,8 +2,9 @@
 
 from dataclasses import dataclass
 
-from hipscat.catalog.association_catalog.association_catalog import \
-    AssociationCatalogInfo
+from hipscat.catalog.association_catalog.association_catalog import (
+    AssociationCatalogInfo,
+)
 
 from hipscat_import.runtime_arguments import RuntimeArguments
 
@@ -65,7 +66,7 @@ def to_catalog_info(self, total_rows) -> AssociationCatalogInfo:
         }
         return AssociationCatalogInfo(**info)
 
-    def additional_runtime_provenance_info(self):
+    def additional_runtime_provenance_info(self) -> dict:
         return {
             "primary_input_catalog_path": str(self.primary_input_catalog_path),
             "primary_id_column": self.primary_id_column,
diff --git a/src/hipscat_import/catalog/arguments.py b/src/hipscat_import/catalog/arguments.py
index 10a09d77..01a468a6 100644
--- a/src/hipscat_import/catalog/arguments.py
+++ b/src/hipscat_import/catalog/arguments.py
@@ -152,7 +152,7 @@ def to_catalog_info(self, total_rows) -> CatalogInfo:
         }
         return CatalogInfo(**info)
 
-    def additional_runtime_provenance_info(self):
+    def additional_runtime_provenance_info(self) -> dict:
         return {
             "catalog_name": self.output_catalog_name,
             "epoch": self.epoch,
diff --git a/src/hipscat_import/index/arguments.py b/src/hipscat_import/index/arguments.py
index 4c7a7e86..88fd957c 100644
--- a/src/hipscat_import/index/arguments.py
+++ b/src/hipscat_import/index/arguments.py
@@ -53,13 +53,13 @@ def to_catalog_info(self, total_rows) -> IndexCatalogInfo:
             "catalog_name": self.output_catalog_name,
             "total_rows": total_rows,
             "catalog_type": "index",
-            "primary_catalog":str(self.input_catalog_path),
+            "primary_catalog": str(self.input_catalog_path),
             "indexing_column": self.indexing_column,
             "extra_columns": self.extra_columns,
         }
         return IndexCatalogInfo(**info)
 
-    def additional_runtime_provenance_info(self):
+    def additional_runtime_provenance_info(self) -> dict:
         return {
             "input_catalog_path": str(self.input_catalog_path),
             "indexing_column": self.indexing_column,
diff --git a/src/hipscat_import/margin_cache/margin_cache_arguments.py b/src/hipscat_import/margin_cache/margin_cache_arguments.py
index 3293ae0f..e6412390 100644
--- a/src/hipscat_import/margin_cache/margin_cache_arguments.py
+++ b/src/hipscat_import/margin_cache/margin_cache_arguments.py
@@ -77,7 +77,7 @@ def to_catalog_info(self, total_rows) -> MarginCacheCatalogInfo:
         }
         return MarginCacheCatalogInfo(**info)
 
-    def additional_runtime_provenance_info(self):
+    def additional_runtime_provenance_info(self) -> dict:
         return {
             "input_catalog_path": str(self.input_catalog_path),
             "margin_threshold": self.margin_threshold,
diff --git a/tests/hipscat_import/catalog/test_argument_validation.py b/tests/hipscat_import/catalog/test_argument_validation.py
index 76641037..9b9694df 100644
--- a/tests/hipscat_import/catalog/test_argument_validation.py
+++ b/tests/hipscat_import/catalog/test_argument_validation.py
@@ -3,8 +3,7 @@
 
 import pytest
 
-from hipscat_import.catalog.arguments import (ImportArguments,
-                                              check_healpix_order_range)
+from hipscat_import.catalog.arguments import ImportArguments, check_healpix_order_range
 
 # pylint: disable=protected-access
 
diff --git a/tests/hipscat_import/catalog/test_file_readers.py b/tests/hipscat_import/catalog/test_file_readers.py
index 6d907ea1..ff56229d 100644
--- a/tests/hipscat_import/catalog/test_file_readers.py
+++ b/tests/hipscat_import/catalog/test_file_readers.py
@@ -9,9 +9,12 @@
 import pytest
 from hipscat.catalog.catalog import CatalogInfo
 
-from hipscat_import.catalog.file_readers import (CsvReader, FitsReader,
-                                                 ParquetReader,
-                                                 get_file_reader)
+from hipscat_import.catalog.file_readers import (
+    CsvReader,
+    FitsReader,
+    ParquetReader,
+    get_file_reader,
+)
 
 
 # pylint: disable=redefined-outer-name
@@ -26,6 +29,7 @@ def basic_catalog_info():
     }
     return CatalogInfo(**info)
 
+
 def test_unknown_file_type():
     """File reader factory method should fail for unknown file types"""
     with pytest.raises(NotImplementedError):
diff --git a/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py b/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
index b7240ba9..00bab144 100644
--- a/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
+++ b/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py
@@ -7,6 +7,7 @@
 
 # pylint: disable=protected-access
 
+
 def test_empty_required(tmp_path):
     """*Most* required arguments are provided."""
     ## Input catalog path is missing
@@ -17,6 +18,7 @@ def test_empty_required(tmp_path):
             output_catalog_name="catalog_cache",
         )
 
+
 def test_margin_order_dynamic(small_sky_source_catalog, tmp_path):
     """Ensure we can dynamically set the margin_order"""
     args = MarginCacheArguments(
@@ -28,6 +30,7 @@ def test_margin_order_dynamic(small_sky_source_catalog, tmp_path):
 
     assert args.margin_order == 3
 
+
 def test_margin_order_static(small_sky_source_catalog, tmp_path):
     """Ensure we can manually set the margin_order"""
     args = MarginCacheArguments(
@@ -35,11 +38,12 @@ def test_margin_order_static(small_sky_source_catalog, tmp_path):
         input_catalog_path=small_sky_source_catalog,
         output_path=tmp_path,
         output_catalog_name="catalog_cache",
-        margin_order=4
+        margin_order=4,
     )
 
     assert args.margin_order == 4
 
+
 def test_margin_order_invalid(small_sky_source_catalog, tmp_path):
     """Ensure we raise an exception when margin_order is invalid"""
     with pytest.raises(ValueError, match="margin_order"):
@@ -48,9 +52,10 @@ def test_margin_order_invalid(small_sky_source_catalog, tmp_path):
             input_catalog_path=small_sky_source_catalog,
             output_path=tmp_path,
             output_catalog_name="catalog_cache",
-            margin_order=2
+            margin_order=2,
         )
 
+
 def test_margin_threshold_warns(small_sky_source_catalog, tmp_path):
     """Ensure we give a warning when margin_threshold is greater than margin_order resolution"""
 
@@ -60,7 +65,7 @@ def test_margin_threshold_warns(small_sky_source_catalog, tmp_path):
             input_catalog_path=small_sky_source_catalog,
             output_path=tmp_path,
             output_catalog_name="catalog_cache",
-            margin_order=16
+            margin_order=16,
         )
 
 
@@ -71,7 +76,7 @@ def test_to_catalog_info(small_sky_source_catalog, tmp_path):
         input_catalog_path=small_sky_source_catalog,
         output_path=tmp_path,
         output_catalog_name="catalog_cache",
-        margin_order=4
+        margin_order=4,
     )
     catalog_info = args.to_catalog_info(total_rows=10)
     assert catalog_info.catalog_name == args.output_catalog_name
@@ -85,7 +90,7 @@ def test_provenance_info(small_sky_source_catalog, tmp_path):
         input_catalog_path=small_sky_source_catalog,
         output_path=tmp_path,
         output_catalog_name="catalog_cache",
-        margin_order=4
+        margin_order=4,
     )
 
     runtime_args = args.provenance_info()["runtime_args"]

From 2eaf08bc4c97f855551b56cbf8e576dad1ad7e3a Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Mon, 12 Jun 2023 19:09:54 -0400
Subject: [PATCH 03/12] Fix file reader reference.

---
 src/hipscat_import/catalog/arguments.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/hipscat_import/catalog/arguments.py b/src/hipscat_import/catalog/arguments.py
index 01a468a6..38b59c11 100644
--- a/src/hipscat_import/catalog/arguments.py
+++ b/src/hipscat_import/catalog/arguments.py
@@ -169,7 +169,9 @@ def additional_runtime_provenance_info(self) -> dict:
             "pixel_threshold": self.pixel_threshold,
             "mapping_healpix_order": self.mapping_healpix_order,
             "debug_stats_only": self.debug_stats_only,
-            "file_reader_info": self.file_reader.provenance_info(),
+            "file_reader_info": self.file_reader.provenance_info()
+            if self.file_reader is not None
+            else {},
         }
 
 

From 3130761fb9e3c6494580fa3b61f30ce5e2b95a0f Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 14 Jun 2023 08:53:00 -0400
Subject: [PATCH 04/12] Remove command line arguments.

---
 docs/guide/command_line.rst                   |   7 -
 docs/guide/overview.rst                       |   1 -
 docs/index.rst                                |   1 -
 src/hipscat_import/__init__.py                |   1 -
 src/hipscat_import/__main__.py                |   7 -
 src/hipscat_import/catalog/__init__.py        |   1 -
 .../catalog/command_line_arguments.py         | 246 ------------------
 src/hipscat_import/control.py                 |  10 -
 .../association/test_association_argument.py  |   2 +-
 .../catalog/test_argument_validation.py       |   5 +-
 .../catalog/test_arguments_commandline.py     |  65 -----
 .../index/test_index_argument.py              |   2 +-
 .../hipscat_import/test_runtime_arguments.py  |   2 +-
 13 files changed, 6 insertions(+), 344 deletions(-)
 delete mode 100644 docs/guide/command_line.rst
 delete mode 100644 src/hipscat_import/__main__.py
 delete mode 100644 src/hipscat_import/catalog/command_line_arguments.py
 delete mode 100644 src/hipscat_import/control.py
 delete mode 100644 tests/hipscat_import/catalog/test_arguments_commandline.py

diff --git a/docs/guide/command_line.rst b/docs/guide/command_line.rst
deleted file mode 100644
index 34c36c40..00000000
--- a/docs/guide/command_line.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Command Line Arguments
-===============================================================================
-
-TODO 
-
-Arguments
--------------------------------------------------------------------------------
\ No newline at end of file
diff --git a/docs/guide/overview.rst b/docs/guide/overview.rst
index 1b9bdc6d..14fd472f 100644
--- a/docs/guide/overview.rst
+++ b/docs/guide/overview.rst
@@ -11,5 +11,4 @@ Installation
 Other Topics
 -------------------------------------------------------------------------------
 
-* :doc:`command_line`
 * :doc:`resume`
diff --git a/docs/index.rst b/docs/index.rst
index d0ecaaaf..f56c5dae 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -8,7 +8,6 @@ Utility for ingesting large survey data into HiPSCat structure.
    :caption: Importing Catalogs
 
    guide/overview
-   guide/command_line
    guide/resume
    Notebooks <notebooks>
 
diff --git a/src/hipscat_import/__init__.py b/src/hipscat_import/__init__.py
index 409818be..baba0a94 100644
--- a/src/hipscat_import/__init__.py
+++ b/src/hipscat_import/__init__.py
@@ -1,4 +1,3 @@
 """All modules for hipscat-import package"""
 
-from .control import main
 from .runtime_arguments import RuntimeArguments
diff --git a/src/hipscat_import/__main__.py b/src/hipscat_import/__main__.py
deleted file mode 100644
index a4585e85..00000000
--- a/src/hipscat_import/__main__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""Main method to enable command line execution"""
-
-
-from hipscat_import.control import main
-
-if __name__ == "__main__":
-    main()
diff --git a/src/hipscat_import/catalog/__init__.py b/src/hipscat_import/catalog/__init__.py
index 6015c90c..a2b40f62 100644
--- a/src/hipscat_import/catalog/__init__.py
+++ b/src/hipscat_import/catalog/__init__.py
@@ -1,7 +1,6 @@
 """All modules for importing new catalogs."""
 
 from .arguments import ImportArguments
-from .command_line_arguments import parse_command_line
 from .file_readers import (CsvReader, FitsReader, InputReader, ParquetReader,
                            get_file_reader)
 from .map_reduce import map_to_pixels, reduce_pixel_shards, split_pixels
diff --git a/src/hipscat_import/catalog/command_line_arguments.py b/src/hipscat_import/catalog/command_line_arguments.py
deleted file mode 100644
index 226e7096..00000000
--- a/src/hipscat_import/catalog/command_line_arguments.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""Parse import arguments from command line"""
-
-import argparse
-
-from hipscat_import.catalog.arguments import ImportArguments
-from hipscat_import.catalog.file_readers import get_file_reader
-
-
-def parse_command_line(cl_args):
-    """Parse arguments from the command line"""
-
-    parser = argparse.ArgumentParser(
-        prog="LSD2 Partitioner",
-        description="Instantiate a partitioned catalog from unpartitioned sources",
-    )
-
-    # ===========           INPUT ARGUMENTS           ===========
-    group = parser.add_argument_group("INPUT")
-    group.add_argument(
-        "-c",
-        "--catalog_name",
-        help="short name for the catalog that will be used for the output directory",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "-i",
-        "--input_path",
-        help="path prefix for unpartitioned input files",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "-fmt",
-        "--input_format",
-        help="file format for unpartitioned input files",
-        default="parquet",
-        type=str,
-    )
-    group.add_argument(
-        "--input_file_list",
-        help="explicit list of input files, comma-separated",
-        default="",
-        type=str,
-    )
-
-    # ===========           READER ARGUMENTS           ===========
-    group = parser.add_argument_group("READER")
-    group.add_argument(
-        "--schema_file",
-        help="parquet file that contains field names and types to be used when reading a CSV file",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "--header_rows",
-        help="number of rows of header in a CSV. if 0, there are only data rows",
-        default=1,
-        type=int,
-    )
-    group.add_argument(
-        "--column_names",
-        help="comma-separated list of names of columns. "
-        "used in the absence of a header row or to rename columns",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "--separator",
-        help="field delimiter in text or CSV file",
-        default=",",
-        type=str,
-    )
-    group.add_argument(
-        "--chunksize",
-        help="number of input rows to process in a chunk. recommend using"
-        " a smaller chunk size for input with wider rows",
-        default=500_000,
-        type=int,
-    )
-    # ===========            INPUT COLUMNS            ===========
-    group = parser.add_argument_group(
-        "INPUT COLUMNS",
-        """Column names in the input source that
-        correspond to spatial attributes used in partitioning""",
-    )
-    group.add_argument(
-        "-ra",
-        "--ra_column",
-        help="column name for the ra (rate of ascension)",
-        default="ra",
-        type=str,
-    )
-    group.add_argument(
-        "-dec",
-        "--dec_column",
-        help="column name for the dec (declination)",
-        default="dec",
-        type=str,
-    )
-    group.add_argument(
-        "-id",
-        "--id_column",
-        help="column name for the object id",
-        default="id",
-        type=str,
-    )
-    # ===========           OUTPUT ARGUMENTS          ===========
-    group = parser.add_argument_group("OUTPUT")
-    group.add_argument(
-        "-o",
-        "--output_path",
-        help="path prefix for partitioned output and metadata files",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "--add_hipscat_index",
-        help="Option to generate the _hipscat_index column "
-        "a spatially aware index for read and join optimization",
-        action="store_true",
-    )
-    group.add_argument(
-        "--overwrite",
-        help="if set, any existing catalog data will be overwritten",
-        action="store_true",
-    )
-    group.add_argument(
-        "--no_overwrite",
-        help="if set, the pipeline will exit if existing output is found",
-        dest="overwrite",
-        action="store_false",
-    )
-    group.add_argument(
-        "--resume",
-        help="if set, the pipeline will try to resume from a previous failed pipeline progress",
-        action="store_true",
-    )
-    group.add_argument(
-        "--no_resume",
-        help="if set, the pipeline will exit if existing intermediate files are found",
-        dest="resume",
-        action="store_false",
-    )
-
-    # ===========           STATS ARGUMENTS           ===========
-    group = parser.add_argument_group("STATS")
-    group.add_argument(
-        "-ho",
-        "--highest_healpix_order",
-        help="the most dense healpix order (7-10 is a good range for this)",
-        default=10,
-        type=int,
-    )
-    group.add_argument(
-        "-pt",
-        "--pixel_threshold",
-        help="maximum objects allowed in a single pixel",
-        default=1_000_000,
-        type=int,
-    )
-    group.add_argument(
-        "--debug_stats_only",
-        help="""DEBUGGING FLAG -
-        if set, the pipeline will only fetch statistics about the origin data
-        and will not generate partitioned output""",
-        action="store_true",
-    )
-    group.add_argument(
-        "--no_debug_stats_only",
-        help="DEBUGGING FLAG - if set, the pipeline will generate partitioned output",
-        dest="debug_stats_only",
-        action="store_false",
-    )
-    # ===========         EXECUTION ARGUMENTS         ===========
-    group = parser.add_argument_group("EXECUTION")
-    group.add_argument(
-        "--progress_bar",
-        help="should a progress bar be displayed?",
-        default=True,
-        action="store_true",
-    )
-    group.add_argument(
-        "--no_progress_bar",
-        help="should a progress bar be displayed?",
-        dest="progress_bar",
-        action="store_false",
-    )
-    group.add_argument(
-        "--tmp_dir",
-        help="directory for storing temporary parquet files",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "-dt",
-        "--dask_tmp",
-        help="directory for storing temporary files generated by dask engine",
-        default=None,
-        type=str,
-    )
-    group.add_argument(
-        "--dask_n_workers",
-        help="the number of dask workers available",
-        default=1,
-        type=int,
-    )
-    group.add_argument(
-        "--dask_threads_per_worker",
-        help="the number of threads per dask worker",
-        default=1,
-        type=int,
-    )
-
-    args = parser.parse_args(cl_args)
-
-    return ImportArguments(
-        output_catalog_name=args.catalog_name,
-        input_path=args.input_path,
-        input_format=args.input_format,
-        input_file_list=(
-            args.input_file_list.split(",") if args.input_file_list else None
-        ),
-        ra_column=args.ra_column,
-        dec_column=args.dec_column,
-        id_column=args.id_column,
-        add_hipscat_index=args.add_hipscat_index,
-        output_path=args.output_path,
-        overwrite=args.overwrite,
-        highest_healpix_order=args.highest_healpix_order,
-        pixel_threshold=args.pixel_threshold,
-        debug_stats_only=args.debug_stats_only,
-        file_reader=get_file_reader(
-            args.input_format,
-            chunksize=args.chunksize,
-            header=args.header_rows if args.header_rows != 0 else None,
-            schema_file=args.schema_file,
-            column_names=(args.column_names.split(",") if args.column_names else None),
-            separator=args.separator,
-        ),
-        tmp_dir=args.tmp_dir,
-        progress_bar=args.progress_bar,
-        dask_tmp=args.dask_tmp,
-        dask_n_workers=args.dask_n_workers,
-        dask_threads_per_worker=args.dask_threads_per_worker,
-    )
diff --git a/src/hipscat_import/control.py b/src/hipscat_import/control.py
deleted file mode 100644
index 5a46e808..00000000
--- a/src/hipscat_import/control.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Flow control and scripting entry points."""
-import sys
-
-import hipscat_import.catalog.run_import as runner
-from hipscat_import.catalog.command_line_arguments import parse_command_line
-
-
-def main():
-    """Wrapper of main for setuptools."""
-    runner.run(parse_command_line(sys.argv[1:]))
diff --git a/tests/hipscat_import/association/test_association_argument.py b/tests/hipscat_import/association/test_association_argument.py
index bf82055b..d8e1be4b 100644
--- a/tests/hipscat_import/association/test_association_argument.py
+++ b/tests/hipscat_import/association/test_association_argument.py
@@ -1,4 +1,4 @@
-"""Tests of argument validation, in the absense of command line parsing"""
+"""Tests of argument validation"""
 
 
 import pytest
diff --git a/tests/hipscat_import/catalog/test_argument_validation.py b/tests/hipscat_import/catalog/test_argument_validation.py
index 9b9694df..f7e377f4 100644
--- a/tests/hipscat_import/catalog/test_argument_validation.py
+++ b/tests/hipscat_import/catalog/test_argument_validation.py
@@ -1,9 +1,10 @@
-"""Tests of argument validation, in the absense of command line parsing"""
+"""Tests of argument validation"""
 
 
 import pytest
 
-from hipscat_import.catalog.arguments import ImportArguments, check_healpix_order_range
+from hipscat_import.catalog.arguments import (ImportArguments,
+                                              check_healpix_order_range)
 
 # pylint: disable=protected-access
 
diff --git a/tests/hipscat_import/catalog/test_arguments_commandline.py b/tests/hipscat_import/catalog/test_arguments_commandline.py
deleted file mode 100644
index bff07f8c..00000000
--- a/tests/hipscat_import/catalog/test_arguments_commandline.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Tests of command line argument validation"""
-
-
-import pytest
-
-from hipscat_import.catalog.command_line_arguments import parse_command_line
-
-# pylint: disable=protected-access
-
-
-def test_none():
-    """No arguments provided. Should error for required args."""
-    empty_args = []
-    with pytest.raises(ValueError):
-        parse_command_line(empty_args)
-
-
-def test_invalid_arguments():
-    """Arguments are ill-formed."""
-    bad_form_args = ["catalog", "path", "path"]
-    with pytest.raises(SystemExit):
-        parse_command_line(bad_form_args)
-
-
-def test_invalid_path():
-    """Required arguments are provided, but paths aren't found."""
-    bad_path_args = ["-c", "catalog", "-i", "path", "-o", "path"]
-    with pytest.raises(FileNotFoundError):
-        parse_command_line(bad_path_args)
-
-
-def test_good_paths(blank_data_dir, tmp_path):
-    """Required arguments are provided, and paths are found."""
-    tmp_path_name = str(tmp_path)
-    good_args = [
-        "--catalog_name",
-        "catalog",
-        "--input_path",
-        blank_data_dir,
-        "--output_path",
-        tmp_path_name,
-        "--input_format",
-        "csv",
-    ]
-    args = parse_command_line(good_args)
-    assert args.input_path == blank_data_dir
-    assert tmp_path_name in str(args.catalog_path)
-
-
-def test_good_paths_short_names(blank_data_dir, tmp_path):
-    """Required arguments are provided, using short names for arguments."""
-    tmp_path_name = str(tmp_path)
-    good_args = [
-        "-c",
-        "catalog",
-        "-i",
-        blank_data_dir,
-        "-o",
-        tmp_path_name,
-        "-fmt",
-        "csv",
-    ]
-    args = parse_command_line(good_args)
-    assert args.input_path == blank_data_dir
-    assert tmp_path_name in str(args.catalog_path)
diff --git a/tests/hipscat_import/index/test_index_argument.py b/tests/hipscat_import/index/test_index_argument.py
index c1b3e125..cf7036a4 100644
--- a/tests/hipscat_import/index/test_index_argument.py
+++ b/tests/hipscat_import/index/test_index_argument.py
@@ -1,4 +1,4 @@
-"""Tests of argument validation, in the absense of command line parsing"""
+"""Tests of argument validation"""
 
 
 import pytest
diff --git a/tests/hipscat_import/test_runtime_arguments.py b/tests/hipscat_import/test_runtime_arguments.py
index 97dc4ba5..7fc3c087 100644
--- a/tests/hipscat_import/test_runtime_arguments.py
+++ b/tests/hipscat_import/test_runtime_arguments.py
@@ -1,4 +1,4 @@
-"""Tests of argument validation, in the absense of command line parsing"""
+"""Tests of argument validation"""
 
 
 import os

From 8580cf26539afcb47b39dd853a678901dc903af2 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 14 Jun 2023 15:18:10 -0400
Subject: [PATCH 05/12] Consolicate pipeline logic

---
 pyproject.toml                                |  9 +-
 .../association/run_association.py            |  8 +-
 src/hipscat_import/catalog/__init__.py        | 31 +++++--
 src/hipscat_import/catalog/run_import.py      | 22 +----
 src/hipscat_import/index/run_index.py         |  8 +-
 src/hipscat_import/margin_cache/__init__.py   |  3 +-
 .../margin_cache/margin_cache.py              | 48 +++--------
 src/hipscat_import/pipeline.py                | 83 +++++++++++++++++++
 src/hipscat_import/runtime_arguments.py       |  4 +
 .../test_association_map_reduce.py            |  2 +-
 .../hipscat_import/catalog/test_run_import.py | 16 ++--
 .../catalog/test_run_round_trip.py            | 18 ++--
 .../margin_cache/test_margin_cache.py         | 15 ++--
 13 files changed, 161 insertions(+), 106 deletions(-)
 create mode 100644 src/hipscat_import/pipeline.py

diff --git a/pyproject.toml b/pyproject.toml
index 9fbb62d9..456069e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,12 +62,15 @@ write_to = "src/hipscat_import/_version.py"
 
 [tool.setuptools.package-data]
 hipscat_import = ["py.typed"]
-[project.scripts]
-hipscat-import = "hipscat_import.control:main"
-hc = "hipscat_import.control:main"
 
 [tool.pytest.ini_options]
 timeout = 1
 markers = [
     "dask: mark tests as having a dask client runtime dependency",
+]
+
+[tool.coverage.report]
+omit = [
+    "src/hipscat_import/_version.py", #auto-generated
+    "src/hipscat_import/pipeline.py" #too annoying to test
 ]
\ No newline at end of file
diff --git a/src/hipscat_import/association/run_association.py b/src/hipscat_import/association/run_association.py
index 461a36f1..b887b090 100644
--- a/src/hipscat_import/association/run_association.py
+++ b/src/hipscat_import/association/run_association.py
@@ -13,17 +13,13 @@
                                                    reduce_association)
 
 
-def _validate_args(args):
+def run(args):
+    """Run the association pipeline"""
     if not args:
         raise TypeError("args is required and should be type AssociationArguments")
     if not isinstance(args, AssociationArguments):
         raise TypeError("args must be type AssociationArguments")
 
-
-def run(args):
-    """Run the association pipeline"""
-    _validate_args(args)
-
     with tqdm(total=1, desc="Mapping ", disable=not args.progress_bar) as step_progress:
         map_association(args)
         step_progress.update(1)
diff --git a/src/hipscat_import/catalog/__init__.py b/src/hipscat_import/catalog/__init__.py
index a2b40f62..fe9a1e61 100644
--- a/src/hipscat_import/catalog/__init__.py
+++ b/src/hipscat_import/catalog/__init__.py
@@ -1,13 +1,26 @@
 """All modules for importing new catalogs."""
 
 from .arguments import ImportArguments
-from .file_readers import (CsvReader, FitsReader, InputReader, ParquetReader,
-                           get_file_reader)
+from .file_readers import (
+    CsvReader,
+    FitsReader,
+    InputReader,
+    ParquetReader,
+    get_file_reader,
+)
 from .map_reduce import map_to_pixels, reduce_pixel_shards, split_pixels
-from .resume_files import (clean_resume_files, is_mapping_done,
-                           is_reducing_done, read_histogram, read_mapping_keys,
-                           read_reducing_keys, set_mapping_done,
-                           set_reducing_done, write_histogram,
-                           write_mapping_done_key, write_mapping_start_key,
-                           write_reducing_key)
-from .run_import import run, run_with_client
+from .resume_files import (
+    clean_resume_files,
+    is_mapping_done,
+    is_reducing_done,
+    read_histogram,
+    read_mapping_keys,
+    read_reducing_keys,
+    set_mapping_done,
+    set_reducing_done,
+    write_histogram,
+    write_mapping_done_key,
+    write_mapping_start_key,
+    write_reducing_key,
+)
+from .run_import import run
diff --git a/src/hipscat_import/catalog/run_import.py b/src/hipscat_import/catalog/run_import.py
index 273e1da7..e8197f8f 100644
--- a/src/hipscat_import/catalog/run_import.py
+++ b/src/hipscat_import/catalog/run_import.py
@@ -151,30 +151,12 @@ def _reduce_pixels(args, destination_pixel_map, client):
     resume.set_reducing_done(args.tmp_path)
 
 
-def _validate_args(args):
+def run(args, client):
+    """Run catalog creation pipeline."""
     if not args:
         raise ValueError("args is required and should be type ImportArguments")
     if not isinstance(args, ImportArguments):
         raise ValueError("args must be type ImportArguments")
-
-
-def run(args):
-    """Importer that creates a dask client from the arguments"""
-    _validate_args(args)
-
-    # pylint: disable=duplicate-code
-    with Client(
-        local_directory=args.dask_tmp,
-        n_workers=args.dask_n_workers,
-        threads_per_worker=args.dask_threads_per_worker,
-    ) as client:  # pragma: no cover
-        run_with_client(args, client)
-    # pylint: enable=duplicate-code
-
-
-def run_with_client(args, client):
-    """Importer, where the client context may out-live the runner"""
-    _validate_args(args)
     raw_histogram = _map_pixels(args, client)
 
     with tqdm(
diff --git a/src/hipscat_import/index/run_index.py b/src/hipscat_import/index/run_index.py
index 46a7bd3e..e1912538 100644
--- a/src/hipscat_import/index/run_index.py
+++ b/src/hipscat_import/index/run_index.py
@@ -7,16 +7,12 @@
 from hipscat_import.index.arguments import IndexArguments
 
 
-def _validate_args(args):
+def run(args):
+    """Run index creation pipeline."""
     if not args:
         raise TypeError("args is required and should be type IndexArguments")
     if not isinstance(args, IndexArguments):
         raise TypeError("args must be type IndexArguments")
-
-
-def run(args):
-    """Importer, where the client context may out-live the runner"""
-    _validate_args(args)
     rows_written = mr.create_index(args)
 
     # All done - write out the metadata
diff --git a/src/hipscat_import/margin_cache/__init__.py b/src/hipscat_import/margin_cache/__init__.py
index 940d08b5..93f63fe3 100644
--- a/src/hipscat_import/margin_cache/__init__.py
+++ b/src/hipscat_import/margin_cache/__init__.py
@@ -1,4 +1,3 @@
 """All modules for generating margin caches."""
-from .margin_cache import (generate_margin_cache,
-                           generate_margin_cache_with_client)
+from .margin_cache import generate_margin_cache
 from .margin_cache_arguments import MarginCacheArguments
diff --git a/src/hipscat_import/margin_cache/margin_cache.py b/src/hipscat_import/margin_cache/margin_cache.py
index 59734bba..504ea060 100644
--- a/src/hipscat_import/margin_cache/margin_cache.py
+++ b/src/hipscat_import/margin_cache/margin_cache.py
@@ -1,5 +1,5 @@
 import pandas as pd
-from dask.distributed import Client, as_completed
+from dask.distributed import as_completed
 from hipscat import pixel_math
 from hipscat.io import file_io, paths
 from tqdm import tqdm
@@ -8,6 +8,7 @@
 
 # pylint: disable=too-many-locals,too-many-arguments
 
+
 def _find_partition_margin_pixel_pairs(stats, margin_order):
     """Creates a DataFrame filled with many-to-many connections between
     the catalog partition pixels and the margin pixels at `margin_order`.
@@ -31,30 +32,28 @@ def _find_partition_margin_pixel_pairs(stats, margin_order):
 
     margin_pairs_df = pd.DataFrame(
         zip(norders, part_pix, margin_pix),
-        columns=["partition_order", "partition_pixel", "margin_pixel"]
+        columns=["partition_order", "partition_pixel", "margin_pixel"],
     )
     return margin_pairs_df
 
+
 def _create_margin_directory(stats, output_path):
     """Creates directories for all the catalog partitions."""
     for healpixel in stats:
         order = healpixel.order
         pix = healpixel.pixel
 
-        destination_dir = paths.pixel_directory(
-            output_path, order, pix
-        )
+        destination_dir = paths.pixel_directory(output_path, order, pix)
         file_io.make_directory(destination_dir, exist_ok=True)
 
+
 def _map_to_margin_shards(client, args, partition_pixels, margin_pairs):
     """Create all the jobs for mapping partition files into the margin cache."""
     futures = []
     mp_future = client.scatter(margin_pairs, broadcast=True)
     for pix in partition_pixels:
         partition_file = paths.pixel_catalog_file(
-            args.input_catalog_path,
-            pix.order,
-            pix.pixel
+            args.input_catalog_path, pix.order, pix.pixel
         )
         futures.append(
             client.submit(
@@ -76,50 +75,27 @@ def _map_to_margin_shards(client, args, partition_pixels, margin_pairs):
     ):
         ...
 
-def generate_margin_cache(args):
+
+def generate_margin_cache(args, client):
     """Generate a margin cache for a given input catalog.
     The input catalog must be in hipscat format.
-    This method will handle the creation of the `dask.distributed` client
-    based on the `dask_tmp`, `dask_n_workers`, and `dask_threads_per_worker`
-    values of the `MarginCacheArguments` object.
 
     Args:
         args (MarginCacheArguments): A valid `MarginCacheArguments` object.
-    """
-    # pylint: disable=duplicate-code
-    with Client(
-        local_directory=args.dask_tmp,
-        n_workers=args.dask_n_workers,
-        threads_per_worker=args.dask_threads_per_worker,
-    ) as client:  # pragma: no cover
-        generate_margin_cache_with_client(
-            client,
-            args
-        )
-    # pylint: enable=duplicate-code
-
-def generate_margin_cache_with_client(client, args):
-    """Generate a margin cache for a given input catalog.
-    The input catalog must be in hipscat format.
-    Args:
         client (dask.distributed.Client): A dask distributed client object.
-        args (MarginCacheArguments): A valid `MarginCacheArguments` object.
     """
     # determine which order to generate margin pixels for
     partition_stats = args.catalog.partition_info.get_healpix_pixels()
 
     margin_pairs = _find_partition_margin_pixel_pairs(
-        partition_stats,
-        args.margin_order
+        partition_stats, args.margin_order
     )
 
     # arcsec to degree conversion
     # TODO: remove this once hipscat uses arcsec for calculation
-    args.margin_threshold = args.margin_threshold / 3600.
+    args.margin_threshold = args.margin_threshold / 3600.0
 
-    _create_margin_directory(
-        partition_stats, args.catalog_path
-    )
+    _create_margin_directory(partition_stats, args.catalog_path)
 
     _map_to_margin_shards(
         client=client,
diff --git a/src/hipscat_import/pipeline.py b/src/hipscat_import/pipeline.py
new file mode 100644
index 00000000..ed5ff0ba
--- /dev/null
+++ b/src/hipscat_import/pipeline.py
@@ -0,0 +1,83 @@
+"""Flow control and pipeline entry points."""
+import smtplib
+from email.message import EmailMessage
+
+from dask.distributed import Client
+
+import hipscat_import.association.run_association as association_runner
+import hipscat_import.catalog.run_import as catalog_runner
+import hipscat_import.index.run_index as index_runner
+import hipscat_import.margin_cache.margin_cache as margin_runner
+from hipscat_import.association.arguments import AssociationArguments
+from hipscat_import.catalog.arguments import ImportArguments
+from hipscat_import.index.arguments import IndexArguments
+from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments
+from hipscat_import.runtime_arguments import RuntimeArguments
+
+# pragma: no cover
+
+
+def pipeline(args: RuntimeArguments):
+    """Pipeline that creates its own client from the provided runtime arguments"""
+    with Client(
+        local_directory=args.dask_tmp,
+        n_workers=args.dask_n_workers,
+        threads_per_worker=args.dask_threads_per_worker,
+    ) as client:
+        pipeline_with_client(args, client)
+
+
+def pipeline_with_client(args: RuntimeArguments, client: Client):
+    """Pipeline that is run using an existing client.
+
+    This can be useful in tests, or when a dask client requires some more complex
+    configuraion.
+    """
+    try:
+        if not args:
+            raise ValueError("args is required and should be subclass RuntimeArguments")
+
+        if isinstance(args, ImportArguments):
+            catalog_runner.run(args, client)
+        elif isinstance(args, AssociationArguments):
+            association_runner.run(args)
+        elif isinstance(args, IndexArguments):
+            index_runner.run(args)
+        elif isinstance(args, MarginCacheArguments):
+            margin_runner.generate_margin_cache(args, client)
+        else:
+            raise ValueError("unknown args type")
+    except Exception as exception:  # pylint: disable=broad-exception-caught
+        _send_failure_email(args, exception)
+    else:
+        _send_success_email(args)
+
+
+def _send_failure_email(args: RuntimeArguments, exception: Exception):
+    if not args.completion_email_address:
+        pass
+    message = EmailMessage()
+    message["Subject"] = "hipscat-import failure."
+    message["To"] = args.completion_email_address
+    message.set_content(f"failed with message:\n{exception}")
+
+    _send_email(message)
+
+
+def _send_success_email(args):
+    if not args.completion_email_address:
+        pass
+    message = EmailMessage()
+    message["Subject"] = "hipscat-import success."
+    message["To"] = args.completion_email_address
+    message.set_content(f"output_catalog_name: {args.output_catalog_name}")
+
+    _send_email(message)
+
+
+def _send_email(message: EmailMessage):
+    message["From"] = "updates@lsdb.io"
+
+    # Send the message via our own SMTP server.
+    with smtplib.SMTP("localhost") as server:
+        server.send_message(message)
diff --git a/src/hipscat_import/runtime_arguments.py b/src/hipscat_import/runtime_arguments.py
index c7253450..a2bce52a 100644
--- a/src/hipscat_import/runtime_arguments.py
+++ b/src/hipscat_import/runtime_arguments.py
@@ -36,6 +36,10 @@ class RuntimeArguments:
     dask_threads_per_worker: int = 1
     """number of threads per dask worker"""
 
+    completion_email_address: str = ""
+    """if provided, send an email to the indicated email address once the 
+    import pipeline has complete."""
+
     catalog_path = ""
     """constructed output path for the catalog that will be something like
     <output_path>/<output_catalog_name>"""
diff --git a/tests/hipscat_import/association/test_association_map_reduce.py b/tests/hipscat_import/association/test_association_map_reduce.py
index 88de77bb..8d310869 100644
--- a/tests/hipscat_import/association/test_association_map_reduce.py
+++ b/tests/hipscat_import/association/test_association_map_reduce.py
@@ -33,7 +33,7 @@ def test_map_association(
     )
     subset_catalog_path = args.catalog_path
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     with open(
         os.path.join(subset_catalog_path, "catalog_info.json"), "r", encoding="utf-8"
diff --git a/tests/hipscat_import/catalog/test_run_import.py b/tests/hipscat_import/catalog/test_run_import.py
index 0a30b558..50b6efff 100644
--- a/tests/hipscat_import/catalog/test_run_import.py
+++ b/tests/hipscat_import/catalog/test_run_import.py
@@ -13,15 +13,15 @@
 
 def test_empty_args():
     """Runner should fail with empty arguments"""
-    with pytest.raises(ValueError):
-        runner.run(None)
+    with pytest.raises(ValueError, match="args is required"):
+        runner.run(None, None)
 
 
 def test_bad_args():
     """Runner should fail with mis-typed arguments"""
     args = {"output_catalog_name": "bad_arg_type"}
-    with pytest.raises(ValueError):
-        runner.run(args)
+    with pytest.raises(ValueError, match="ImportArguments"):
+        runner.run(args, None)
 
 
 @pytest.mark.dask
@@ -88,7 +88,7 @@ def test_resume_dask_runner(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog metadata file exists
     catalog = Catalog.read_from_hipscat(args.catalog_path)
@@ -131,7 +131,7 @@ def test_resume_dask_runner(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     catalog = Catalog.read_from_hipscat(args.catalog_path)
     assert catalog.on_disk
@@ -161,7 +161,7 @@ def test_dask_runner(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog metadata file exists
     catalog = Catalog.read_from_hipscat(args.catalog_path)
@@ -195,7 +195,7 @@ def test_dask_runner_stats_only(dask_client, small_sky_parts_dir, tmp_path):
         debug_stats_only=True,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     metadata_filename = os.path.join(args.catalog_path, "catalog_info.json")
     assert os.path.exists(metadata_filename)
diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
index 8b050f01..d8c12fed 100644
--- a/tests/hipscat_import/catalog/test_run_round_trip.py
+++ b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -44,7 +44,7 @@ def test_import_source_table(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog metadata file exists
     catalog = Catalog.read_from_hipscat(args.catalog_path)
@@ -84,7 +84,7 @@ def test_import_mixed_schema_csv(
         use_schema_file=mixed_schema_csv_parquet,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog parquet file exists
     output_file = os.path.join(
@@ -140,7 +140,7 @@ def test_import_preserve_index(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog parquet file exists
     output_file = os.path.join(
@@ -168,7 +168,7 @@ def test_import_preserve_index(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog parquet file exists
     output_file = os.path.join(
@@ -235,7 +235,7 @@ def test_import_multiindex(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog parquet file exists
     output_file = os.path.join(
@@ -263,7 +263,7 @@ def test_import_multiindex(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog parquet file exists
     output_file = os.path.join(
@@ -299,14 +299,16 @@ def test_import_constant_healpix_order(
         progress_bar=False,
     )
 
-    runner.run_with_client(args, dask_client)
+    runner.run(args, dask_client)
 
     # Check that the catalog metadata file exists
     catalog = Catalog.read_from_hipscat(args.catalog_path)
     assert catalog.on_disk
     assert catalog.catalog_path == args.catalog_path
     # Check that the partition info file exists - all pixels at order 2!
-    assert all(pixel.order == 2 for pixel in catalog.partition_info.get_healpix_pixels())
+    assert all(
+        pixel.order == 2 for pixel in catalog.partition_info.get_healpix_pixels()
+    )
 
     # Pick a parquet file and make sure it contains as many rows as we expect
     output_file = os.path.join(
diff --git a/tests/hipscat_import/margin_cache/test_margin_cache.py b/tests/hipscat_import/margin_cache/test_margin_cache.py
index 06672e1a..333acb02 100644
--- a/tests/hipscat_import/margin_cache/test_margin_cache.py
+++ b/tests/hipscat_import/margin_cache/test_margin_cache.py
@@ -7,6 +7,8 @@
 import hipscat_import.margin_cache.margin_cache as mc
 from hipscat_import.margin_cache import MarginCacheArguments
 
+# pylint: disable=protected-access
+
 
 @pytest.mark.dask(timeout=20)
 def test_margin_cache_gen(small_sky_source_catalog, tmp_path, dask_client):
@@ -20,10 +22,11 @@ def test_margin_cache_gen(small_sky_source_catalog, tmp_path, dask_client):
 
     assert args.catalog.catalog_info.ra_column == "source_ra"
 
-    mc.generate_margin_cache_with_client(dask_client, args)
+    mc.generate_margin_cache(args, dask_client)
     # TODO: add more verification of output to this test once the
     # reduce phase is implemented.
 
+
 def test_partition_margin_pixel_pairs(small_sky_source_catalog, tmp_path):
     args = MarginCacheArguments(
         margin_threshold=5.0,
@@ -33,8 +36,7 @@ def test_partition_margin_pixel_pairs(small_sky_source_catalog, tmp_path):
     )
 
     margin_pairs = mc._find_partition_margin_pixel_pairs(
-        args.catalog.partition_info.get_healpix_pixels(),
-        args.margin_order
+        args.catalog.partition_info.get_healpix_pixels(), args.margin_order
     )
 
     expected = np.array([725, 733, 757, 765, 727, 735, 759, 767, 469, 192])
@@ -42,6 +44,7 @@ def test_partition_margin_pixel_pairs(small_sky_source_catalog, tmp_path):
     npt.assert_array_equal(margin_pairs.iloc[:10]["margin_pixel"], expected)
     assert len(margin_pairs) == 196
 
+
 def test_create_margin_directory(small_sky_source_catalog, tmp_path):
     args = MarginCacheArguments(
         margin_threshold=5.0,
@@ -52,10 +55,8 @@ def test_create_margin_directory(small_sky_source_catalog, tmp_path):
 
     mc._create_margin_directory(
         stats=args.catalog.partition_info.get_healpix_pixels(),
-        output_path=args.catalog_path
+        output_path=args.catalog_path,
     )
 
-    output = file_io.append_paths_to_pointer(
-        args.catalog_path, "Norder=0", "Dir=0"
-    )
+    output = file_io.append_paths_to_pointer(args.catalog_path, "Norder=0", "Dir=0")
     assert file_io.does_file_or_directory_exist(output)

From 3fd8152191ed7efa9c8024ba55660483e52c314b Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 14 Jun 2023 15:22:51 -0400
Subject: [PATCH 06/12] Lint.

---
 src/hipscat_import/catalog/run_import.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hipscat_import/catalog/run_import.py b/src/hipscat_import/catalog/run_import.py
index e8197f8f..8c5db1ad 100644
--- a/src/hipscat_import/catalog/run_import.py
+++ b/src/hipscat_import/catalog/run_import.py
@@ -7,7 +7,7 @@
 
 import hipscat.io.write_metadata as io
 import numpy as np
-from dask.distributed import Client, as_completed
+from dask.distributed import as_completed
 from hipscat import pixel_math
 from tqdm import tqdm
 

From cfea516f95b99a37a043fed9320a9714b85a2f5d Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 14 Jun 2023 16:51:17 -0400
Subject: [PATCH 07/12] Try using "-e"

---
 .github/workflows/testing-and-coverage.yml | 4 ++--
 pyproject.toml                             | 4 ++--
 src/hipscat_import/pipeline.py             | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml
index 4507157b..1676fea9 100644
--- a/.github/workflows/testing-and-coverage.yml
+++ b/.github/workflows/testing-and-coverage.yml
@@ -27,8 +27,8 @@ jobs:
       run: |
         sudo apt-get update
         python -m pip install --upgrade pip
-        pip install .
-        pip install .[dev]
+        pip install -e .
+        pip install -e .[dev]
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Run unit tests with pytest
       run: |
diff --git a/pyproject.toml b/pyproject.toml
index 456069e8..64e6c3ac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,6 @@ markers = [
 
 [tool.coverage.report]
 omit = [
-    "src/hipscat_import/_version.py", #auto-generated
-    "src/hipscat_import/pipeline.py" #too annoying to test
+    "src/hipscat_import/_version.py", # auto-generated
+    "src/hipscat_import/pipeline.py", # too annoying to test
 ]
\ No newline at end of file
diff --git a/src/hipscat_import/pipeline.py b/src/hipscat_import/pipeline.py
index ed5ff0ba..d3695344 100644
--- a/src/hipscat_import/pipeline.py
+++ b/src/hipscat_import/pipeline.py
@@ -35,7 +35,7 @@ def pipeline_with_client(args: RuntimeArguments, client: Client):
     """
     try:
         if not args:
-            raise ValueError("args is required and should be subclass RuntimeArguments")
+            raise ValueError("args is required and should be subclass of RuntimeArguments")
 
         if isinstance(args, ImportArguments):
             catalog_runner.run(args, client)

From ef051b9206d66e61646affc159f3d159fc0545df Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 14 Jun 2023 19:48:56 -0400
Subject: [PATCH 08/12] Don't try to send to empty address.

---
 src/hipscat_import/pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hipscat_import/pipeline.py b/src/hipscat_import/pipeline.py
index d3695344..9d6ae581 100644
--- a/src/hipscat_import/pipeline.py
+++ b/src/hipscat_import/pipeline.py
@@ -55,7 +55,7 @@ def pipeline_with_client(args: RuntimeArguments, client: Client):
 
 def _send_failure_email(args: RuntimeArguments, exception: Exception):
     if not args.completion_email_address:
-        pass
+        return
     message = EmailMessage()
     message["Subject"] = "hipscat-import failure."
     message["To"] = args.completion_email_address
@@ -66,7 +66,7 @@ def _send_failure_email(args: RuntimeArguments, exception: Exception):
 
 def _send_success_email(args):
     if not args.completion_email_address:
-        pass
+        return
     message = EmailMessage()
     message["Subject"] = "hipscat-import success."
     message["To"] = args.completion_email_address

From 9105d8422d17f3a7bb912a0b623780769248ece5 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 14 Jun 2023 20:31:37 -0400
Subject: [PATCH 09/12] Re-raise instead of swallowing error.

---
 docs/notebooks/intro_notebook.ipynb           |  4 +-
 .../association/run_association.py            |  3 +-
 src/hipscat_import/catalog/map_reduce.py      |  4 +-
 .../margin_cache/margin_cache_map_reduce.py   | 47 +++++++++---------
 src/hipscat_import/pipeline.py                |  2 +-
 .../association/test_run_association.py       |  3 +-
 .../catalog/test_argument_validation.py       |  3 +-
 .../hipscat_import/catalog/test_map_reduce.py |  4 +-
 .../test_margin_cache_map_reduce.py           | 48 +++++++++----------
 9 files changed, 54 insertions(+), 64 deletions(-)

diff --git a/docs/notebooks/intro_notebook.ipynb b/docs/notebooks/intro_notebook.ipynb
index 2e7779f5..71086d5e 100644
--- a/docs/notebooks/intro_notebook.ipynb
+++ b/docs/notebooks/intro_notebook.ipynb
@@ -65,9 +65,9 @@
     "    # Calculate the cosine of each value of X\n",
     "    z = np.cos(x)\n",
     "    # Plot the sine wave in blue, using degrees rather than radians on the X axis\n",
-    "    pl.plot(xdeg, y, color='blue', label='Sine wave')\n",
+    "    pl.plot(xdeg, y, color=\"blue\", label=\"Sine wave\")\n",
     "    # Plot the cos wave in green, using degrees rather than radians on the X axis\n",
-    "    pl.plot(xdeg, z, color='green', label='Cosine wave')\n",
+    "    pl.plot(xdeg, z, color=\"green\", label=\"Cosine wave\")\n",
     "    pl.xlabel(\"Degrees\")\n",
     "    # More sensible X axis values\n",
     "    pl.xticks(np.arange(0, 361, 45))\n",
diff --git a/src/hipscat_import/association/run_association.py b/src/hipscat_import/association/run_association.py
index b887b090..162fd013 100644
--- a/src/hipscat_import/association/run_association.py
+++ b/src/hipscat_import/association/run_association.py
@@ -9,8 +9,7 @@
 from tqdm import tqdm
 
 from hipscat_import.association.arguments import AssociationArguments
-from hipscat_import.association.map_reduce import (map_association,
-                                                   reduce_association)
+from hipscat_import.association.map_reduce import map_association, reduce_association
 
 
 def run(args):
diff --git a/src/hipscat_import/catalog/map_reduce.py b/src/hipscat_import/catalog/map_reduce.py
index cc75dc70..34fa30a5 100644
--- a/src/hipscat_import/catalog/map_reduce.py
+++ b/src/hipscat_import/catalog/map_reduce.py
@@ -98,7 +98,7 @@ def map_to_pixels(
 
     Args:
         input_file (FilePointer): file to read for catalog data.
-        file_reader (hipscat_import.catalog.file_readers.InputReader): instance of input 
+        file_reader (hipscat_import.catalog.file_readers.InputReader): instance of input
             reader that specifies arguments necessary for reading from the input file.
         shard_suffix (str): unique counter for this input file, used
             when creating intermediate files
@@ -137,7 +137,7 @@ def split_pixels(
 
     Args:
         input_file (FilePointer): file to read for catalog data.
-        file_reader (hipscat_import.catalog.file_readers.InputReader): instance 
+        file_reader (hipscat_import.catalog.file_readers.InputReader): instance
             of input reader that specifies arguments necessary for reading from the input file.
         shard_suffix (str): unique counter for this input file, used
             when creating intermediate files
diff --git a/src/hipscat_import/margin_cache/margin_cache_map_reduce.py b/src/hipscat_import/margin_cache/margin_cache_map_reduce.py
index ee83837d..8ff13c41 100644
--- a/src/hipscat_import/margin_cache/margin_cache_map_reduce.py
+++ b/src/hipscat_import/margin_cache/margin_cache_map_reduce.py
@@ -6,6 +6,7 @@
 
 # pylint: disable=too-many-locals,too-many-arguments
 
+
 def map_pixel_shards(
     partition_file,
     margin_pairs,
@@ -13,7 +14,7 @@ def map_pixel_shards(
     output_path,
     margin_order,
     ra_column,
-    dec_column
+    dec_column,
 ):
     """Creates margin cache shards from a source partition file."""
     data = pd.read_parquet(partition_file)
@@ -38,7 +39,10 @@ def map_pixel_shards(
             dec_column=dec_column,
         )
 
-def _to_pixel_shard(data, margin_threshold, output_path, margin_order, ra_column, dec_column):
+
+def _to_pixel_shard(
+    data, margin_threshold, output_path, margin_order, ra_column, dec_column
+):
     """Do boundary checking for the cached partition and then output remaining data."""
     order, pix = data["partition_order"].iloc[0], data["partition_pixel"].iloc[0]
     source_order, source_pix = data["Norder"].iloc[0], data["Npix"].iloc[0]
@@ -60,9 +64,7 @@ def _to_pixel_shard(data, margin_threshold, output_path, margin_order, ra_column
         )
     else:
         data["margin_check"] = pixel_math.check_margin_bounds(
-            data[ra_column].values,
-            data[dec_column].values,
-            bounding_polygons
+            data[ra_column].values, data[dec_column].values, bounding_polygons
         )
 
     # pylint: disable-next=singleton-comparison
@@ -74,28 +76,27 @@ def _to_pixel_shard(data, margin_threshold, output_path, margin_order, ra_column
         # generate a file name for our margin shard
         partition_file = paths.pixel_catalog_file(output_path, order, pix)
         partition_dir = f"{partition_file[:-8]}/"
-        shard_dir = paths.pixel_directory(
-            partition_dir, source_order, source_pix
-        )
+        shard_dir = paths.pixel_directory(partition_dir, source_order, source_pix)
 
         file_io.make_directory(shard_dir, exist_ok=True)
 
-        shard_path = paths.pixel_catalog_file(
-            partition_dir, source_order, source_pix
-        )
+        shard_path = paths.pixel_catalog_file(partition_dir, source_order, source_pix)
 
-        final_df = margin_data.drop(columns=[
-            "partition_order", 
-            "partition_pixel", 
-            "margin_check", 
-            "margin_pixel"
-        ])
+        final_df = margin_data.drop(
+            columns=[
+                "partition_order",
+                "partition_pixel",
+                "margin_check",
+                "margin_pixel",
+            ]
+        )
 
         if is_polar:
             final_df = final_df.drop(columns=["is_trunc"])
 
         final_df.to_parquet(shard_path)
 
+
 def _margin_filter_polar(
     data,
     order,
@@ -104,13 +105,11 @@ def _margin_filter_polar(
     margin_threshold,
     ra_column,
     dec_column,
-    bounding_polygons
+    bounding_polygons,
 ):
     """Filter out margin data around the poles."""
     trunc_pix = pixel_math.get_truncated_margin_pixels(
-        order=order,
-        pix=pix,
-        margin_order=margin_order
+        order=order, pix=pix, margin_order=margin_order
     )
     data["is_trunc"] = np.isin(data["margin_pixel"], trunc_pix)
 
@@ -124,14 +123,12 @@ def _margin_filter_polar(
         order,
         pix,
         margin_order,
-        margin_threshold
+        margin_threshold,
     )
     data.loc[data["is_trunc"] == True, "margin_check"] = trunc_check
 
     no_trunc_check = pixel_math.check_margin_bounds(
-        other_data[ra_column].values,
-        other_data[dec_column].values,
-        bounding_polygons
+        other_data[ra_column].values, other_data[dec_column].values, bounding_polygons
     )
     data.loc[data["is_trunc"] == False, "margin_check"] = no_trunc_check
     # pylint: enable=singleton-comparison
diff --git a/src/hipscat_import/pipeline.py b/src/hipscat_import/pipeline.py
index 9d6ae581..eb0f47c9 100644
--- a/src/hipscat_import/pipeline.py
+++ b/src/hipscat_import/pipeline.py
@@ -55,7 +55,7 @@ def pipeline_with_client(args: RuntimeArguments, client: Client):
 
 def _send_failure_email(args: RuntimeArguments, exception: Exception):
     if not args.completion_email_address:
-        return
+        raise exception
     message = EmailMessage()
     message["Subject"] = "hipscat-import failure."
     message["To"] = args.completion_email_address
diff --git a/tests/hipscat_import/association/test_run_association.py b/tests/hipscat_import/association/test_run_association.py
index c09a31f7..628b6c6b 100644
--- a/tests/hipscat_import/association/test_run_association.py
+++ b/tests/hipscat_import/association/test_run_association.py
@@ -6,8 +6,7 @@
 import numpy.testing as npt
 import pandas as pd
 import pytest
-from hipscat.catalog.association_catalog.association_catalog import \
-    AssociationCatalog
+from hipscat.catalog.association_catalog.association_catalog import AssociationCatalog
 
 import hipscat_import.association.run_association as runner
 from hipscat_import.association.arguments import AssociationArguments
diff --git a/tests/hipscat_import/catalog/test_argument_validation.py b/tests/hipscat_import/catalog/test_argument_validation.py
index f7e377f4..9954415b 100644
--- a/tests/hipscat_import/catalog/test_argument_validation.py
+++ b/tests/hipscat_import/catalog/test_argument_validation.py
@@ -3,8 +3,7 @@
 
 import pytest
 
-from hipscat_import.catalog.arguments import (ImportArguments,
-                                              check_healpix_order_range)
+from hipscat_import.catalog.arguments import ImportArguments, check_healpix_order_range
 
 # pylint: disable=protected-access
 
diff --git a/tests/hipscat_import/catalog/test_map_reduce.py b/tests/hipscat_import/catalog/test_map_reduce.py
index 7589b2f0..356ac78c 100644
--- a/tests/hipscat_import/catalog/test_map_reduce.py
+++ b/tests/hipscat_import/catalog/test_map_reduce.py
@@ -194,9 +194,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     assert_parquet_file_ids(output_file, "id", expected_ids)
 
 
-def test_reduce_hipscat_index(
-    parquet_shards_dir, assert_parquet_file_ids, tmp_path
-):
+def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     """Test reducing with or without a _hipscat_index field"""
     mr.reduce_pixel_shards(
         cache_path=parquet_shards_dir,
diff --git a/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py b/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py
index b58b7c29..403302ed 100644
--- a/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py
+++ b/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py
@@ -6,19 +6,17 @@
 
 from hipscat_import.margin_cache import margin_cache_map_reduce
 
-keep_cols = [
-    "weird_ra",
-    "weird_dec"
-]
+keep_cols = ["weird_ra", "weird_dec"]
 
 drop_cols = [
-    "partition_order", 
-    "partition_pixel", 
-    "margin_check", 
+    "partition_order",
+    "partition_pixel",
+    "margin_check",
     "margin_pixel",
-    "is_trunc"
+    "is_trunc",
 ]
 
+
 def validate_result_dataframe(df_path, expected_len):
     res_df = pd.read_parquet(df_path)
 
@@ -32,10 +30,11 @@ def validate_result_dataframe(df_path, expected_len):
     for col in drop_cols:
         assert col not in cols
 
+
 @pytest.mark.timeout(5)
 def test_to_pixel_shard_equator(tmp_path):
-    ras = np.arange(0.,360.)
-    dec = np.full(360, 0.)
+    ras = np.arange(0.0, 360.0)
+    dec = np.full(360, 0.0)
     ppix = np.full(360, 21)
     porder = np.full(360, 1)
     norder = np.full(360, 1)
@@ -44,13 +43,13 @@ def test_to_pixel_shard_equator(tmp_path):
     test_df = pd.DataFrame(
         data=zip(ras, dec, ppix, porder, norder, npix),
         columns=[
-            "weird_ra", 
+            "weird_ra",
             "weird_dec",
             "partition_pixel",
             "partition_order",
             "Norder",
-            "Npix"
-        ]
+            "Npix",
+        ],
     )
 
     test_df["margin_pixel"] = hp.ang2pix(
@@ -58,7 +57,7 @@ def test_to_pixel_shard_equator(tmp_path):
         test_df["weird_ra"].values,
         test_df["weird_dec"].values,
         lonlat=True,
-        nest=True
+        nest=True,
     )
 
     margin_cache_map_reduce._to_pixel_shard(
@@ -67,21 +66,21 @@ def test_to_pixel_shard_equator(tmp_path):
         output_path=tmp_path,
         margin_order=3,
         ra_column="weird_ra",
-        dec_column="weird_dec"
+        dec_column="weird_dec",
     )
 
     path = file_io.append_paths_to_pointer(
-        tmp_path,
-        "Norder=1/Dir=0/Npix=21/Norder=1/Dir=0/Npix=0.parquet"
+        tmp_path, "Norder=1/Dir=0/Npix=21/Norder=1/Dir=0/Npix=0.parquet"
     )
 
     assert file_io.does_file_or_directory_exist(path)
 
     validate_result_dataframe(path, 46)
 
+
 @pytest.mark.timeout(5)
 def test_to_pixel_shard_polar(tmp_path):
-    ras = np.arange(0.,360.)
+    ras = np.arange(0.0, 360.0)
     dec = np.full(360, 89.9)
     ppix = np.full(360, 15)
     porder = np.full(360, 2)
@@ -91,13 +90,13 @@ def test_to_pixel_shard_polar(tmp_path):
     test_df = pd.DataFrame(
         data=zip(ras, dec, ppix, porder, norder, npix),
         columns=[
-            "weird_ra", 
+            "weird_ra",
             "weird_dec",
             "partition_pixel",
             "partition_order",
             "Norder",
-            "Npix"
-        ]
+            "Npix",
+        ],
     )
 
     test_df["margin_pixel"] = hp.ang2pix(
@@ -105,7 +104,7 @@ def test_to_pixel_shard_polar(tmp_path):
         test_df["weird_ra"].values,
         test_df["weird_dec"].values,
         lonlat=True,
-        nest=True
+        nest=True,
     )
 
     margin_cache_map_reduce._to_pixel_shard(
@@ -114,12 +113,11 @@ def test_to_pixel_shard_polar(tmp_path):
         output_path=tmp_path,
         margin_order=3,
         ra_column="weird_ra",
-        dec_column="weird_dec"
+        dec_column="weird_dec",
     )
 
     path = file_io.append_paths_to_pointer(
-        tmp_path,
-        "Norder=2/Dir=0/Npix=15/Norder=2/Dir=0/Npix=0.parquet"
+        tmp_path, "Norder=2/Dir=0/Npix=15/Norder=2/Dir=0/Npix=0.parquet"
     )
 
     assert file_io.does_file_or_directory_exist(path)

From 7a179c3a84e4cb47f90a3295d852eb2acaaec69a Mon Sep 17 00:00:00 2001
From: Melissa DeLucchi <mmd11@epyc.astro.washington.edu>
Date: Fri, 16 Jun 2023 09:58:29 -0700
Subject: [PATCH 10/12] Checkpoint some quality-of-life fixes

---
 src/hipscat_import/catalog/arguments.py  | 4 ++--
 src/hipscat_import/catalog/run_import.py | 3 +++
 src/hipscat_import/pipeline.py           | 4 ++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/hipscat_import/catalog/arguments.py b/src/hipscat_import/catalog/arguments.py
index 38b59c11..6d83c9f8 100644
--- a/src/hipscat_import/catalog/arguments.py
+++ b/src/hipscat_import/catalog/arguments.py
@@ -96,8 +96,8 @@ def _check_arguments(self):
             check_healpix_order_range(
                 self.highest_healpix_order, "highest_healpix_order"
             )
-            if not 100 <= self.pixel_threshold <= 10_000_000:
-                raise ValueError("pixel_threshold should be between 100 and 10,000,000")
+            if not 100 <= self.pixel_threshold <= 1_000_000_000:
+                raise ValueError("pixel_threshold should be between 100 and 1,000,000,000")
             self.mapping_healpix_order = self.highest_healpix_order
 
         if self.catalog_type not in ("source", "object"):
diff --git a/src/hipscat_import/catalog/run_import.py b/src/hipscat_import/catalog/run_import.py
index 8c5db1ad..7155c2e4 100644
--- a/src/hipscat_import/catalog/run_import.py
+++ b/src/hipscat_import/catalog/run_import.py
@@ -52,6 +52,7 @@ def _map_pixels(args, client):
     ):
         if future.status == "error":  # pragma: no cover
             some_error = True
+            continue
         raw_histogram = np.add(raw_histogram, result)
         resume.write_mapping_start_key(args.tmp_path, future.key)
         resume.write_histogram(args.tmp_path, raw_histogram)
@@ -100,6 +101,7 @@ def _split_pixels(args, alignment_future, client):
     ):
         if future.status == "error":  # pragma: no cover
             some_error = True
+            continue
         resume.write_splitting_done_key(args.tmp_path, future.key)
     if some_error:  # pragma: no cover
         raise RuntimeError("Some splitting stages failed. See logs for details.")
@@ -145,6 +147,7 @@ def _reduce_pixels(args, destination_pixel_map, client):
     ):
         if future.status == "error":  # pragma: no cover
             some_error = True
+            continue
         resume.write_reducing_key(args.tmp_path, future.key)
     if some_error:  # pragma: no cover
         raise RuntimeError("Some reducing stages failed. See logs for details.")
diff --git a/src/hipscat_import/pipeline.py b/src/hipscat_import/pipeline.py
index d3695344..eb0f47c9 100644
--- a/src/hipscat_import/pipeline.py
+++ b/src/hipscat_import/pipeline.py
@@ -55,7 +55,7 @@ def pipeline_with_client(args: RuntimeArguments, client: Client):
 
 def _send_failure_email(args: RuntimeArguments, exception: Exception):
     if not args.completion_email_address:
-        pass
+        raise exception
     message = EmailMessage()
     message["Subject"] = "hipscat-import failure."
     message["To"] = args.completion_email_address
@@ -66,7 +66,7 @@ def _send_failure_email(args: RuntimeArguments, exception: Exception):
 
 def _send_success_email(args):
     if not args.completion_email_address:
-        pass
+        return
     message = EmailMessage()
     message["Subject"] = "hipscat-import success."
     message["To"] = args.completion_email_address

From 331698a4bb411288515f7eeb80dca97438d923d7 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 21 Jun 2023 10:10:37 -0400
Subject: [PATCH 11/12] Remove redundant installs.

---
 .github/workflows/testing-and-coverage.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml
index 1676fea9..b497a967 100644
--- a/.github/workflows/testing-and-coverage.yml
+++ b/.github/workflows/testing-and-coverage.yml
@@ -27,7 +27,6 @@ jobs:
       run: |
         sudo apt-get update
         python -m pip install --upgrade pip
-        pip install -e .
         pip install -e .[dev]
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
     - name: Run unit tests with pytest

From 5c3655c4b1478c8e338765022221f51921c29966 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 21 Jun 2023 19:09:37 -0400
Subject: [PATCH 12/12] Add unit test for example in documentation.

---
 .../catalog/test_run_round_trip.py            |  46 +++++-
 tests/hipscat_import/conftest.py              |   5 +
 .../data/test_formats/catalog.starr           | 132 ++++++++++++++++++
 3 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 tests/hipscat_import/data/test_formats/catalog.starr

diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
index d8c12fed..49484164 100644
--- a/tests/hipscat_import/catalog/test_run_round_trip.py
+++ b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -15,7 +15,7 @@
 
 import hipscat_import.catalog.run_import as runner
 from hipscat_import.catalog.arguments import ImportArguments
-from hipscat_import.catalog.file_readers import get_file_reader
+from hipscat_import.catalog.file_readers import CsvReader, get_file_reader
 
 
 @pytest.mark.dask
@@ -319,3 +319,47 @@ def test_import_constant_healpix_order(
     assert len(data_frame) == 14
     ids = data_frame["id"]
     assert np.logical_and(ids >= 700, ids < 832).all()
+
+@pytest.mark.dask
+def test_import_starr_file(
+    dask_client,
+    formats_dir,
+    assert_parquet_file_ids,
+    tmp_path,
+):
+    """Test basic execution.
+    - tests that we can run pipeline with a totally unknown file type, so long
+      as a valid InputReader implementation is provided.
+    """
+
+    class StarrReader(CsvReader):
+        """Shallow subclass"""
+
+    args = ImportArguments(
+        output_catalog_name="starr",
+        input_path=formats_dir,
+        input_format="starr",
+        file_reader=StarrReader(),
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
+        highest_healpix_order=2,
+        pixel_threshold=3_000,
+        progress_bar=False,
+    )
+
+    runner.run(args, dask_client)
+
+    # Check that the catalog metadata file exists
+    catalog = Catalog.read_from_hipscat(args.catalog_path)
+    assert catalog.on_disk
+    assert catalog.catalog_path == args.catalog_path
+    assert catalog.catalog_info.total_rows == 131
+    assert len(catalog.get_pixels()) == 1
+
+    # Check that the catalog parquet file exists and contains correct object IDs
+    output_file = os.path.join(
+        args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet"
+    )
+
+    expected_ids = [*range(700, 831)]
+    assert_parquet_file_ids(output_file, "id", expected_ids)
\ No newline at end of file
diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py
index 2e64d854..fadeb841 100644
--- a/tests/hipscat_import/conftest.py
+++ b/tests/hipscat_import/conftest.py
@@ -91,6 +91,11 @@ def empty_data_dir(test_data_dir):
     return os.path.join(test_data_dir, "empty")
 
 
+@pytest.fixture
+def formats_dir(test_data_dir):
+    return os.path.join(test_data_dir, "test_formats")
+
+
 @pytest.fixture
 def formats_headers_csv(test_data_dir):
     return os.path.join(test_data_dir, "test_formats", "headers.csv")
diff --git a/tests/hipscat_import/data/test_formats/catalog.starr b/tests/hipscat_import/data/test_formats/catalog.starr
new file mode 100644
index 00000000..1bb66e95
--- /dev/null
+++ b/tests/hipscat_import/data/test_formats/catalog.starr
@@ -0,0 +1,132 @@
+id,ra,dec,ra_error,dec_error
+700,282.5,-58.5,0,0
+701,299.5,-48.5,0,0
+702,310.5,-27.5,0,0
+703,286.5,-69.5,0,0
+704,326.5,-45.5,0,0
+705,335.5,-32.5,0,0
+706,297.5,-36.5,0,0
+707,308.5,-69.5,0,0
+708,307.5,-37.5,0,0
+709,294.5,-45.5,0,0
+710,341.5,-39.5,0,0
+711,305.5,-49.5,0,0
+712,288.5,-49.5,0,0
+713,298.5,-41.5,0,0
+714,303.5,-37.5,0,0
+715,280.5,-35.5,0,0
+716,305.5,-60.5,0,0
+717,303.5,-43.5,0,0
+718,292.5,-60.5,0,0
+719,344.5,-39.5,0,0
+720,344.5,-47.5,0,0
+721,314.5,-34.5,0,0
+722,350.5,-58.5,0,0
+723,315.5,-68.5,0,0
+724,323.5,-41.5,0,0
+725,308.5,-41.5,0,0
+726,341.5,-37.5,0,0
+727,301.5,-44.5,0,0
+728,328.5,-47.5,0,0
+729,299.5,-59.5,0,0
+730,322.5,-61.5,0,0
+731,343.5,-52.5,0,0
+732,337.5,-39.5,0,0
+733,329.5,-65.5,0,0
+734,348.5,-66.5,0,0
+735,299.5,-65.5,0,0
+736,303.5,-52.5,0,0
+737,316.5,-33.5,0,0
+738,345.5,-64.5,0,0
+739,332.5,-57.5,0,0
+740,306.5,-33.5,0,0
+741,303.5,-38.5,0,0
+742,348.5,-45.5,0,0
+743,307.5,-25.5,0,0
+744,349.5,-39.5,0,0
+745,337.5,-38.5,0,0
+746,283.5,-31.5,0,0
+747,327.5,-61.5,0,0
+748,296.5,-63.5,0,0
+749,293.5,-55.5,0,0
+750,338.5,-67.5,0,0
+751,330.5,-44.5,0,0
+752,291.5,-34.5,0,0
+753,307.5,-45.5,0,0
+754,313.5,-30.5,0,0
+755,303.5,-38.5,0,0
+756,319.5,-35.5,0,0
+757,346.5,-34.5,0,0
+758,325.5,-53.5,0,0
+759,290.5,-48.5,0,0
+760,320.5,-53.5,0,0
+761,329.5,-29.5,0,0
+762,327.5,-51.5,0,0
+763,306.5,-38.5,0,0
+764,297.5,-45.5,0,0
+765,306.5,-35.5,0,0
+766,310.5,-63.5,0,0
+767,314.5,-29.5,0,0
+768,297.5,-60.5,0,0
+769,307.5,-42.5,0,0
+770,285.5,-29.5,0,0
+771,348.5,-67.5,0,0
+772,348.5,-64.5,0,0
+773,293.5,-50.5,0,0
+774,281.5,-54.5,0,0
+775,321.5,-54.5,0,0
+776,344.5,-63.5,0,0
+777,307.5,-39.5,0,0
+778,313.5,-36.5,0,0
+779,347.5,-29.5,0,0
+780,326.5,-52.5,0,0
+781,330.5,-46.5,0,0
+782,290.5,-39.5,0,0
+783,286.5,-42.5,0,0
+784,338.5,-40.5,0,0
+785,296.5,-44.5,0,0
+786,336.5,-33.5,0,0
+787,320.5,-47.5,0,0
+788,283.5,-61.5,0,0
+789,287.5,-45.5,0,0
+790,286.5,-35.5,0,0
+791,312.5,-28.5,0,0
+792,320.5,-69.5,0,0
+793,289.5,-58.5,0,0
+794,300.5,-66.5,0,0
+795,306.5,-58.5,0,0
+796,320.5,-33.5,0,0
+797,308.5,-62.5,0,0
+798,316.5,-36.5,0,0
+799,313.5,-31.5,0,0
+800,299.5,-37.5,0,0
+801,309.5,-50.5,0,0
+802,304.5,-49.5,0,0
+803,336.5,-25.5,0,0
+804,322.5,-66.5,0,0
+805,297.5,-52.5,0,0
+806,312.5,-29.5,0,0
+807,303.5,-60.5,0,0
+808,320.5,-40.5,0,0
+809,283.5,-34.5,0,0
+810,301.5,-59.5,0,0
+811,315.5,-68.5,0,0
+812,346.5,-60.5,0,0
+813,349.5,-37.5,0,0
+814,312.5,-33.5,0,0
+815,283.5,-68.5,0,0
+816,288.5,-69.5,0,0
+817,318.5,-48.5,0,0
+818,300.5,-55.5,0,0
+819,313.5,-35.5,0,0
+820,286.5,-46.5,0,0
+821,330.5,-52.5,0,0
+822,301.5,-54.5,0,0
+823,338.5,-45.5,0,0
+824,305.5,-28.5,0,0
+825,315.5,-30.5,0,0
+826,335.5,-69.5,0,0
+827,310.5,-40.5,0,0
+828,330.5,-26.5,0,0
+829,314.5,-35.5,0,0
+830,306.5,-50.5,0,0
\ No newline at end of file