diff --git a/.copier-answers.yml b/.copier-answers.yml index 44cede77..886b28f1 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -15,9 +15,9 @@ include_benchmarks: true include_docs: true include_notebooks: true mypy_type_checking: basic -package_name: hipscat_import +package_name: hats_import project_license: BSD -project_name: hipscat-import +project_name: hats-import project_organization: astronomy-commons python_versions: - '3.9' diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6a7b51dd..8d01c723 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,7 +26,7 @@ If it fixes an open issue, please link to the issue here. If this PR closes an i ## Code Quality -- [ ] I have read the [Contribution Guide](https://hipscat-import.readthedocs.io/en/stable/guide/contributing.html) and [LINCC Frameworks Code of Conduct](https://lsstdiscoveryalliance.org/programs/lincc-frameworks/code-conduct/) +- [ ] I have read the [Contribution Guide](https://hats-import.readthedocs.io/en/stable/guide/contributing.html) and [LINCC Frameworks Code of Conduct](https://lsstdiscoveryalliance.org/programs/lincc-frameworks/code-conduct/) - [ ] My code follows the code style of this project - [ ] My code builds (or compiles) cleanly without any errors or warnings - [ ] My code contains relevant comments and necessary documentation diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 49231cf6..ca15234a 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -32,7 +32,7 @@ jobs: python -m pip install --upgrade pip pip install . - name: Create lock requirements file - run: pip list --format=freeze --exclude "hipscat-import" > requirements.txt + run: pip list --format=freeze --exclude "hats-import" > requirements.txt - name: Install dev dependencies run: pip install .[dev] - name: Run unit tests with pytest diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml index bb6c1668..6a2a7c7a 100644 --- a/.github/workflows/testing-and-coverage.yml +++ b/.github/workflows/testing-and-coverage.yml @@ -31,7 +31,7 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Run unit tests with pytest run: | - python -m pytest tests --cov=hipscat_import --cov-report=xml + python -m pytest tests --cov=hats_import --cov-report=xml - name: Run dask-on-ray tests with pytest run: | python -m pytest tests --use_ray diff --git a/README.md b/README.md index a7f2805b..b4d56657 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,33 @@ -# hipscat-import +# hats-import [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/stable/) -[![PyPI](https://img.shields.io/pypi/v/hipscat-import?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/hipscat-import/) -[![Conda](https://img.shields.io/conda/vn/conda-forge/hipscat-import.svg?color=blue&logo=condaforge&logoColor=white)](https://anaconda.org/conda-forge/hipscat-import) +[![PyPI](https://img.shields.io/pypi/v/hats-import?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/hats-import/) +[![Conda](https://img.shields.io/conda/vn/conda-forge/hats-import.svg?color=blue&logo=condaforge&logoColor=white)](https://anaconda.org/conda-forge/hats-import) -[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/astronomy-commons/hipscat-import/smoke-test.yml)](https://github.com/astronomy-commons/hipscat-import/actions/workflows/smoke-test.yml) -[![codecov](https://codecov.io/gh/astronomy-commons/hipscat-import/branch/main/graph/badge.svg)](https://codecov.io/gh/astronomy-commons/hipscat-import) -[![Read the Docs](https://img.shields.io/readthedocs/hipscat-import)](https://hipscat-import.readthedocs.io/) +[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/astronomy-commons/hats-import/smoke-test.yml)](https://github.com/astronomy-commons/hats-import/actions/workflows/smoke-test.yml) +[![codecov](https://codecov.io/gh/astronomy-commons/hats-import/branch/main/graph/badge.svg)](https://codecov.io/gh/astronomy-commons/hats-import) +[![Read the Docs](https://img.shields.io/readthedocs/hats-import)](https://hats-import.readthedocs.io/) -## HiPSCat import - Utility for ingesting large survey data into HiPSCat structure. +## HATS import - Utility for ingesting large survey data into HATS structure. -Check out our [ReadTheDocs site](https://hipscat-import.readthedocs.io/en/stable/) +Check out our [ReadTheDocs site](https://hats-import.readthedocs.io/en/stable/) for more information on partitioning, installation, and contributing. See related projects: -* HiPSCat ([on GitHub](https://github.com/astronomy-commons/hipscat)) - ([on ReadTheDocs](https://hipscat.readthedocs.io/en/stable/)) +* HATS ([on GitHub](https://github.com/astronomy-commons/hats)) + ([on ReadTheDocs](https://hats.readthedocs.io/en/stable/)) * LSDB ([on GitHub](https://github.com/astronomy-commons/lsdb)) ([on ReadTheDocs](https://lsdb.readthedocs.io/en/stable/)) ## Contributing -[![GitHub issue custom search in repo](https://img.shields.io/github/issues-search/astronomy-commons/hipscat-import?color=purple&label=Good%20first%20issues&query=is%3Aopen%20label%3A%22good%20first%20issue%22)](https://github.com/astronomy-commons/hipscat-import/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) +[![GitHub issue custom search in repo](https://img.shields.io/github/issues-search/astronomy-commons/hats-import?color=purple&label=Good%20first%20issues&query=is%3Aopen%20label%3A%22good%20first%20issue%22)](https://github.com/astronomy-commons/hats-import/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) -See the [contribution guide](https://hipscat-import.readthedocs.io/en/stable/guide/contributing.html) +See the [contribution guide](https://hats-import.readthedocs.io/en/stable/guide/contributing.html) for complete installation instructions and contribution best practices. ## Acknowledgements diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index fbe36f21..1c4c537e 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -3,9 +3,9 @@ // you know what you are doing. "version": 1, // The name of the project being benchmarked. - "project": "hipscat-import", + "project": "hats-import", // The project's homepage. - "project_url": "https://github.com/astronomy-commons/hipscat-import", + "project_url": "https://github.com/astronomy-commons/hats-import", // The URL or local path of the source code repository for the // project being benchmarked. "repo": "..", @@ -32,7 +32,7 @@ // variable. "environment_type": "virtualenv", // the base URL to show a commit for the project. - "show_commit_url": "https://github.com/astronomy-commons/hipscat-import/commit/", + "show_commit_url": "https://github.com/astronomy-commons/hats-import/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. "pythons": [ diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 061dc651..86f36e98 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -3,8 +3,8 @@ import numpy as np -from hipscat_import.catalog.resume_plan import ResumePlan -from hipscat_import.catalog.sparse_histogram import SparseHistogram +from hats_import.catalog.resume_plan import ResumePlan +from hats_import.catalog.sparse_histogram import SparseHistogram class BinningSuite: diff --git a/docs/catalogs/arguments.rst b/docs/catalogs/arguments.rst index dc3f1b6e..aac42c0c 100644 --- a/docs/catalogs/arguments.rst +++ b/docs/catalogs/arguments.rst @@ -9,7 +9,7 @@ A minimal arguments block will look something like: .. code-block:: python - from hipscat_import.catalog.arguments import ImportArguments + from hats_import.catalog.arguments import ImportArguments args = ImportArguments( sort_columns="ObjectID", @@ -25,8 +25,8 @@ A minimal arguments block will look something like: More details on each of these parameters is provided in sections below. For the curious, see the API documentation for -:py:class:`hipscat_import.catalog.arguments.ImportArguments`, and its superclass -:py:class:`hipscat_import.runtime_arguments.RuntimeArguments`. +:py:class:`hats_import.catalog.arguments.ImportArguments`, and its superclass +:py:class:`hats_import.runtime_arguments.RuntimeArguments`. Pipeline setup ------------------------------------------------------------------------------- @@ -52,7 +52,7 @@ to the pipeline, ignoring the above arguments. This would look like: .. code-block:: python from dask.distributed import Client - from hipscat_import.pipeline import pipeline_with_client + from hats_import.pipeline import pipeline_with_client args = ... # ImportArguments() with Client('scheduler:port') as client: @@ -63,7 +63,7 @@ potentially avoid some python threading issues with dask: .. code-block:: python - from hipscat_import.pipeline import pipeline + from hats_import.pipeline import pipeline def import_pipeline(): args = ... @@ -88,14 +88,14 @@ files are found, we will restore the pipeline's previous progress. If you want to start the pipeline from scratch you can simply set `resume=False`. Alternatively, go to the temp directory you've specified and remove any intermediate -files created by the previous runs of the ``hipscat-import`` pipeline. You should also +files created by the previous runs of the ``hats-import`` pipeline. You should also remove the output directory if it has any content. The resume argument performs these cleaning operations automatically for you. Reading input files ------------------------------------------------------------------------------- -Catalog import reads through a list of files and converts them into a hipscatted catalog. +Catalog import reads through a list of files and converts them into a hats-sharded catalog. Which files? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ to parse a whitespace separated file. Otherwise, you can use a short string to specify an existing file reader type e.g. ``file_reader="csv"``. You can find the full API documentation for -:py:class:`hipscat_import.catalog.file_readers.InputReader` +:py:class:`hats_import.catalog.file_readers.InputReader` .. code-block:: python @@ -206,18 +206,18 @@ Which fields? Specify the ``ra_column`` and ``dec_column`` for the dataset. -There are two fields that we require in order to make a valid hipscatted +There are two fields that we require in order to make a valid hats-sharded catalog, the right ascension and declination. At this time, this is the only supported system for celestial coordinates. -If you're importing data that has previously been hipscatted, you may use -``use_hipscat_index = True``. This will use that previously compused hipscat spatial +If you're importing data that has previously been hats-sharded, you may use +``use_healpix_29 = True``. This will use that previously compused hats spatial index as the position, instead of ra/dec. Healpix order and thresholds ------------------------------------------------------------------------------- -When creating a new catalog through the hipscat-import process, we try to +When creating a new catalog through the hats-import process, we try to create partitions with approximately the same number of rows per partition. This isn't perfect, because the sky is uneven, but we still try to create smaller-area pixels in more dense areas, and larger-area pixels in less dense @@ -322,19 +322,19 @@ How? You may want to tweak parameters of the final catalog output, and we have helper arguments for a few of those. -``add_hipscat_index`` - ``bool`` - whether or not to add the hipscat spatial index -as a column in the resulting catalog. The ``_hipscat_index`` field is designed to make many +``add_healpix_29`` - ``bool`` - whether or not to add the hats spatial index +as a column in the resulting catalog. The ``_healpix_29`` field is designed to make many dask operations more performant, but if you do not intend to publish your dataset and do not intend to use dask, then you can suppress generation of this column to save a little space in your final disk usage. -The ``_hipscat_index`` uses a high healpix order and a uniqueness counter to create +The ``_healpix_29`` uses a high healpix order to create values that can order all points in the sky, according to a nested healpix scheme. ``sort_columns`` - ``str`` - column for survey identifier, or other sortable column. If sorting by multiple columns, they should be comma-separated. -If ``add_hipscat_index=True``, this sorting will be used to resolve the -index counter within the same higher-order pixel space. +If ``add_healpix_29=True``, ``_healpix_29`` will be the primary sort key, but the +provided sorting will be used for any rows within the same higher-order pixel space. ``use_schema_file`` - ``str`` - path to a parquet file with schema metadata. This will be used for column metadata when writing the files, if specified. diff --git a/docs/catalogs/public/allwise.rst b/docs/catalogs/public/allwise.rst index edf9b4e2..0daa246d 100644 --- a/docs/catalogs/public/allwise.rst +++ b/docs/catalogs/public/allwise.rst @@ -32,9 +32,9 @@ Example import import pandas as pd - import hipscat_import.pipeline as runner - from hipscat_import.catalog.arguments import ImportArguments - from hipscat_import.catalog.file_readers import CsvReader + import hats_import.pipeline as runner + from hats_import.catalog.arguments import ImportArguments + from hats_import.catalog.file_readers import CsvReader # Load the column names and types from a side file. type_frame = pd.read_csv("allwise_types.csv") diff --git a/docs/catalogs/public/neowise.rst b/docs/catalogs/public/neowise.rst index 4a21fd8c..5f7657b3 100644 --- a/docs/catalogs/public/neowise.rst +++ b/docs/catalogs/public/neowise.rst @@ -32,9 +32,9 @@ Example import import pandas as pd - import hipscat_import.pipeline as runner - from hipscat_import.catalog.arguments import ImportArguments - from hipscat_import.catalog.file_readers import CsvReader + import hats_import.pipeline as runner + from hats_import.catalog.arguments import ImportArguments + from hats_import.catalog.file_readers import CsvReader # Load the column names and types from a side file. type_frame = pd.read_csv("neowise_types.csv") diff --git a/docs/catalogs/public/panstarrs.rst b/docs/catalogs/public/panstarrs.rst index c5141d8f..edcac8d6 100644 --- a/docs/catalogs/public/panstarrs.rst +++ b/docs/catalogs/public/panstarrs.rst @@ -30,9 +30,9 @@ Example import of objects (otmo) import pandas as pd - import hipscat_import.pipeline as runner - from hipscat_import.catalog.arguments import ImportArguments - from hipscat_import.catalog.file_readers import CsvReader + import hats_import.pipeline as runner + from hats_import.catalog.arguments import ImportArguments + from hats_import.catalog.file_readers import CsvReader # Load the column names and types from a side file. type_frame = pd.read_csv("ps1_otmo_types.csv") diff --git a/docs/catalogs/public/sdss.rst b/docs/catalogs/public/sdss.rst index 6ad74cc7..fb342644 100644 --- a/docs/catalogs/public/sdss.rst +++ b/docs/catalogs/public/sdss.rst @@ -64,8 +64,8 @@ Example import .. code-block:: python - from hipscat_import.catalog.arguments import ImportArguments - import hipscat_import.pipeline as runner + from hats_import.catalog.arguments import ImportArguments + import hats_import.pipeline as runner args = ImportArguments( output_artifact_name="sdss_dr16q", diff --git a/docs/catalogs/public/tic.rst b/docs/catalogs/public/tic.rst index 9376347e..1902cb19 100644 --- a/docs/catalogs/public/tic.rst +++ b/docs/catalogs/public/tic.rst @@ -30,9 +30,9 @@ Example import import pandas as pd - import hipscat_import.pipeline as runner - from hipscat_import.catalog.arguments import ImportArguments - from hipscat_import.catalog.file_readers import CsvReader + import hats_import.pipeline as runner + from hats_import.catalog.arguments import ImportArguments + from hats_import.catalog.file_readers import CsvReader type_frame = pd.read_csv("tic_types.csv") type_map = dict(zip(type_frame["name"], type_frame["type"])) diff --git a/docs/catalogs/public/zubercal.rst b/docs/catalogs/public/zubercal.rst index d2a9e1c9..97835dc7 100644 --- a/docs/catalogs/public/zubercal.rst +++ b/docs/catalogs/public/zubercal.rst @@ -32,9 +32,9 @@ Challenges with this data set .. code-block:: python - import hipscat_import.pipeline as runner - from hipscat_import.catalog.arguments import ImportArguments - from hipscat_import.catalog.file_readers import ParquetReader + import hats_import.pipeline as runner + from hats_import.catalog.arguments import ImportArguments + from hats_import.catalog.file_readers import ParquetReader import pyarrow.parquet as pq import pyarrow as pa import re diff --git a/docs/catalogs/temp_files.rst b/docs/catalogs/temp_files.rst index 451bc990..f0eb45f6 100644 --- a/docs/catalogs/temp_files.rst +++ b/docs/catalogs/temp_files.rst @@ -1,7 +1,7 @@ Temporary files and disk usage =============================================================================== -This page aims to characterize intermediate files created by the hipscat-import +This page aims to characterize intermediate files created by the hats-import catalog creation process. Most users are going to be ok with setting the ``tmp_dir`` and not thinking much more about it. @@ -90,7 +90,7 @@ Some more explanation: What's happening when ------------------------------------------------------------------------------- -The hipscat-import catalog creation process generates a lot of temporary files. Some find this +The hats-import catalog creation process generates a lot of temporary files. Some find this surprising, so we try to provide a narrative of what's happening and why. Planning stage @@ -196,10 +196,10 @@ final catalog can be very different from the on-disk size of the input files. In our internal testing, we converted a number of different kinds of catalogs, and share some of the results with you, to give some suggestion of the disk requirements -you may face when converting your own catalogs to hipscat format. +you may face when converting your own catalogs to hats format. ============= =============== =========== =============== ========================= -Catalog Input size (-h) Input size Hipscatted size Ratio +Catalog Input size (-h) Input size HATS size Ratio ============= =============== =========== =============== ========================= allwise 1.2T 1196115700 310184460 0.26 (a lot smaller) neowise 3.9T 4177447284 4263269112 1.02 (about the same) @@ -213,4 +213,4 @@ Notes: - allwise, neowise, and tic were all originally compressed CSV files. - sdss was originally a series of fits files - zubercal was originally 500k parquet files, and is reduced in the example to - around 70k hipscat parquet files. + around 70k hats parquet files. diff --git a/docs/conf.py b/docs/conf.py index a41343d2..eaf804a4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,10 +14,10 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = "hipscat-import" +project = "hats-import" copyright = "2023, LINCC Frameworks" author = "LINCC Frameworks" -release = version("hipscat-import") +release = version("hats-import") # for example take major/minor version = ".".join(release.split(".")[:2]) @@ -80,8 +80,8 @@ ## lets us suppress the copy button on select code blocks. copybutton_selector = "div:not(.no-copybutton) > div.highlight > pre" -# Cross-link hipscat documentation from the API reference: +# Cross-link hats documentation from the API reference: # https://docs.readthedocs.io/en/stable/guides/intersphinx.html intersphinx_mapping = { - "hipscat": ("http://hipscat.readthedocs.io/en/stable/", None), + "hats": ("http://hats.readthedocs.io/en/stable/", None), } diff --git a/docs/guide/contact.rst b/docs/guide/contact.rst index 48fa9647..5645b658 100644 --- a/docs/guide/contact.rst +++ b/docs/guide/contact.rst @@ -6,7 +6,7 @@ We at LINCC Frameworks pride ourselves on being a friendly bunch! If you're encountering issues, have some gnarly dataset, have ideas for making our products better, or pretty much anything else, reach out! -* Open an issue in our github repo for hipscat-import - * https://github.com/astronomy-commons/hipscat-import/issues/new +* Open an issue in our github repo for hats-import + * https://github.com/astronomy-commons/hats-import/issues/new * If you're on LSSTC slack, so are we! `#lincc-frameworks-qa `_ \ No newline at end of file diff --git a/docs/guide/contributing.rst b/docs/guide/contributing.rst index b2e1f545..7aa478a8 100644 --- a/docs/guide/contributing.rst +++ b/docs/guide/contributing.rst @@ -1,4 +1,4 @@ -Contributing to hipscat-import +Contributing to hats-import =============================================================================== Find (or make) a new GitHub issue diff --git a/docs/guide/dask_on_ray.rst b/docs/guide/dask_on_ray.rst index a80ade10..53d290d2 100644 --- a/docs/guide/dask_on_ray.rst +++ b/docs/guide/dask_on_ray.rst @@ -8,7 +8,7 @@ See more on Ray's site: https://docs.ray.io/en/latest/ray-more-libs/dask-on-ray.html -How to use in hipscat-import pipelines +How to use in hats-import pipelines ------------------------------------------------------------------------------- Install ray @@ -27,7 +27,7 @@ You should also disable ray when you're done, just to clean things up. from dask.distributed import Client from ray.util.dask import disable_dask_on_ray, enable_dask_on_ray - from hipscat_import.pipeline import pipeline_with_client + from hats_import.pipeline import pipeline_with_client with ray.init( num_cpus=args.dask_n_workers, diff --git a/docs/guide/index_table.rst b/docs/guide/index_table.rst index eb816bf2..fc7232b5 100644 --- a/docs/guide/index_table.rst +++ b/docs/guide/index_table.rst @@ -2,7 +2,7 @@ Index Table =============================================================================== This page discusses topics around setting up a pipeline to generate a secondary -index lookup for a field on an existing hipscat catalog on disk. +index lookup for a field on an existing hats catalog on disk. This is useful if you would like to have quick access to rows of your table using a survey-provided unique identifier that is NOT spatially correlated. To find @@ -15,7 +15,7 @@ and where to put the output files. A minimal arguments block will look something .. code-block:: python - from hipscat_import.index.arguments import IndexArguments + from hats_import.index.arguments import IndexArguments args = IndexArguments( input_catalog_path="./my_data/my_catalog", @@ -27,8 +27,8 @@ and where to put the output files. A minimal arguments block will look something More details on each of these parameters is provided in sections below. For the curious, see the API documentation for -:py:class:`hipscat_import.index.arguments.IndexArguments`, -and its superclass :py:class:`hipscat_import.runtime_arguments.RuntimeArguments`. +:py:class:`hats_import.index.arguments.IndexArguments`, +and its superclass :py:class:`hats_import.runtime_arguments.RuntimeArguments`. Dask setup ------------------------------------------------------------------------------- @@ -51,7 +51,7 @@ to the pipeline, ignoring the above arguments. This would look like: .. code-block:: python from dask.distributed import Client - from hipscat_import.pipeline import pipeline_with_client + from hats_import.pipeline import pipeline_with_client args = IndexArguments(...) with Client('scheduler:port') as client: @@ -62,7 +62,7 @@ potentially avoid some python threading issues with dask: .. code-block:: python - from hipscat_import.pipeline import pipeline + from hats_import.pipeline import pipeline def index_pipeline(): args = IndexArguments(...) @@ -75,7 +75,7 @@ Input Catalog ------------------------------------------------------------------------------- For this pipeline, you will need to have already transformed your catalog into -hipscat parquet format. Provide the path to the catalog data with the argument +hats parquet format. Provide the path to the catalog data with the argument ``input_catalog_path``. ``indexing_column`` is required, and is the column that you would like to create @@ -149,8 +149,8 @@ list along to your ``ImportArguments``! import numpy as np import os - from hipscat.io.parquet_metadata import write_parquet_metadata - from hipscat.io import file_io + from hats.io.parquet_metadata import write_parquet_metadata + from hats.io import file_io ## Specify the catalog and column you're making your index over. input_catalog_path="/data/input_catalog" @@ -249,10 +249,8 @@ arguments for a few of those. ``compute_partition_size`` - ``int`` - partition size used when computing the leaf parquet files. -``include_hipscat_index`` - ``bool`` - whether or not to include the 64-bit -hipscat spatial index in the index table. Defaults to ``True``. It can be -useful to keep this value if the ``_hipscat_index`` is your only unique -identifier, or you intend to re-partition your data. +``include_healpix_29`` - ``bool`` - whether or not to include the 64-bit +hats spatial index in the index table. Defaults to ``True``. ``include_order_pixel`` - ``bool`` - whether to include partitioning columns, ``Norder``, ``Dir``, and ``Npix``. You probably want to keep these! @@ -261,7 +259,7 @@ when trying to use the index table. ``drop_duplicates`` - ``bool`` - drop duplicate occurrences of all fields that are included in the index table. This is enabled by default, but can be -**very** slow. This has an interaction with the above ``include_hipscat_index`` +**very** slow. This has an interaction with the above ``include_healpix_29`` and ``include_order_pixel`` options above. We desribe some common patterns below: - I want to create an index over the target ID in my catalog. There are no @@ -270,8 +268,7 @@ and ``include_order_pixel`` options above. We desribe some common patterns below .. code-block:: python indexing_column="target_id", - # target_id is unique, and I don't need to keep extra data - include_hipscat_index=False, + include_healpix_29=False, # I want to know where my data is in the sky. include_order_pixel=True, # target_id is unique, and I don't need to do extra work to de-duplicate @@ -287,7 +284,7 @@ and ``include_order_pixel`` options above. We desribe some common patterns below indexing_column="target_id", # target_id is NOT unique drop_duplicates=True, - # target_id is NOT unique, but including the _hipscat_index will bloat results - include_hipscat_index=False, + # including the _healpix_29 will bloat results + include_healpix_29=False, # I want to know where my data is in the sky. include_order_pixel=True, diff --git a/docs/guide/margin_cache.rst b/docs/guide/margin_cache.rst index d481b519..613df6cb 100644 --- a/docs/guide/margin_cache.rst +++ b/docs/guide/margin_cache.rst @@ -6,14 +6,14 @@ For more discussion of the whys and hows of margin caches, please see for more information. This page discusses topics around setting up a pipeline to generate a margin -cache from an existing hipscat catalog on disk. +cache from an existing hats catalog on disk. At a minimum, you need arguments that include where to find the input files, and where to put the output files. A minimal arguments block will look something like: .. code-block:: python - from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments + from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments args = MarginCacheArguments( input_catalog_path="./my_data/my_catalog", @@ -26,8 +26,8 @@ and where to put the output files. A minimal arguments block will look something More details on each of these parameters is provided in sections below. For the curious, see the API documentation for -:py:class:`hipscat_import.margin_cache.margin_cache_arguments.MarginCacheArguments`, -and its superclass :py:class:`hipscat_import.runtime_arguments.RuntimeArguments`. +:py:class:`hats_import.margin_cache.margin_cache_arguments.MarginCacheArguments`, +and its superclass :py:class:`hats_import.runtime_arguments.RuntimeArguments`. Dask setup ------------------------------------------------------------------------------- @@ -50,7 +50,7 @@ to the pipeline, ignoring the above arguments. This would look like: .. code-block:: python from dask.distributed import Client - from hipscat_import.pipeline import pipeline_with_client + from hats_import.pipeline import pipeline_with_client args = MarginCacheArguments(...) with Client('scheduler:port') as client: @@ -61,7 +61,7 @@ potentially avoid some python threading issues with dask: .. code-block:: python - from hipscat_import.pipeline import pipeline + from hats_import.pipeline import pipeline def margin_pipeline(): args = MarginCacheArguments(...) @@ -74,10 +74,10 @@ Input Catalog ------------------------------------------------------------------------------- For this pipeline, you will need to have already transformed your catalog into -hipscat parquet format. Provide the path to the catalog data with the argument +hats parquet format. Provide the path to the catalog data with the argument ``input_catalog_path``. -The input hipscat catalog will provide its own right ascension and declination +The input hats catalog will provide its own right ascension and declination that will be used when computing margin populations. Margin calculation parameters diff --git a/docs/index.rst b/docs/index.rst index 57852de1..01d6bb7f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,25 +1,25 @@ -HiPSCat Import +HATS Import ======================================================================================== -Utility for ingesting large survey data into HiPSCat structure. +Utility for ingesting large survey data into HATS structure. Installation ------------------------------------------------------------------------------- We recommend installing in a virtual environment, like venv or conda. You may -need to install or upgrade versions of dependencies to work with hipscat-import. +need to install or upgrade versions of dependencies to work with hats-import. .. code-block:: console - pip install hipscat-import + pip install hats-import .. tip:: Installing on Mac - ``healpy`` is a very necessary dependency for hipscat libraries at this time, but + ``healpy`` is a very necessary dependency for hats libraries at this time, but native prebuilt binaries for healpy on Apple Silicon Macs `do not yet exist `_, - so it's recommended to install via conda before proceeding to hipscat-import. + so it's recommended to install via conda before proceeding to hats-import. .. code-block:: console @@ -29,7 +29,7 @@ need to install or upgrade versions of dependencies to work with hipscat-import. Setting up a pipeline ------------------------------------------------------------------------------- -For each type of dataset the hipscat-import tool can generate, there is an argument +For each type of dataset the hats-import tool can generate, there is an argument container class that you will need to instantiate and populate with relevant arguments. See dataset-specific notes on arguments: @@ -45,7 +45,7 @@ threading issues with dask: .. code-block:: python from dask.distributed import Client - from hipscat_import.pipeline import pipeline_with_client + from hats_import.pipeline import pipeline_with_client def main(): args = ... diff --git a/docs/notebooks/estimate_pixel_threshold.ipynb b/docs/notebooks/estimate_pixel_threshold.ipynb index 670192bc..0908268e 100755 --- a/docs/notebooks/estimate_pixel_threshold.ipynb +++ b/docs/notebooks/estimate_pixel_threshold.ipynb @@ -11,7 +11,7 @@ "\n", "**Background**\n", "\n", - "When creating a new catalog through the hipscat-import process, we try to create partitions with approximately the same number of rows per partition. This isn't perfect, because the sky is uneven, but we still try to create smaller-area pixels in more dense areas, and larger-area pixels in less dense areas. We use the argument `pixel_threshold` and will split a partition into smaller healpix pixels until the number of rows is smaller than `pixel_threshold`.\n", + "When creating a new catalog through the hats-import process, we try to create partitions with approximately the same number of rows per partition. This isn't perfect, because the sky is uneven, but we still try to create smaller-area pixels in more dense areas, and larger-area pixels in less dense areas. We use the argument `pixel_threshold` and will split a partition into smaller healpix pixels until the number of rows is smaller than `pixel_threshold`.\n", "\n", "We do this to increase parallelization of reads and downstream analysis: if the files are around the same size, and operations on each partition take around the same amount of time, we're not as likely to be waiting on a single process to complete for the whole pipeline to complete.\n", "\n", @@ -19,7 +19,7 @@ "\n", "**Objective**\n", "\n", - "In this notebook, we'll go over *one* strategy for estimating the `pixel_threshold` argument you can use when importing a new catalog into hipscat format.\n", + "In this notebook, we'll go over *one* strategy for estimating the `pixel_threshold` argument you can use when importing a new catalog into hats format.\n", "\n", "This is not guaranteed to give you optimal results, but it could give you some hints toward *better* results." ] @@ -60,10 +60,10 @@ "metadata": {}, "outputs": [], "source": [ - "from hipscat_import.catalog.file_readers import CsvReader\n", + "from hats_import.catalog.file_readers import CsvReader\n", "\n", "### Change this path!!!\n", - "input_file = \"../../tests/hipscat_import/data/small_sky/catalog.csv\"\n", + "input_file = \"../../tests/data/small_sky/catalog.csv\"\n", "\n", "file_reader = CsvReader(chunksize=5_000)\n", "\n", @@ -77,7 +77,7 @@ "source": [ "## Inspect parquet file and metadata\n", "\n", - "Now that we have parsed our survey data into parquet, we can check what the data will look like when it's imported into hipscat format.\n", + "Now that we have parsed our survey data into parquet, we can check what the data will look like when it's imported into hats format.\n", "\n", "If you're just here to get a naive estimate for your pixel threshold, we'll do that first, then take a look at some other parquet characteristics later for the curious." ] @@ -161,7 +161,7 @@ "\n", "Below, we inspect the row and column group metadata to show the compressed size of the fields on disk. The last column, `percent`, show the percent of total size taken up by the column.\n", "\n", - "You *can* use this to inform which columns you keep when importing a catalog into hipscat format. e.g. if some columns are less useful for your science, and take up a lot of space, maybe leave them out!" + "You *can* use this to inform which columns you keep when importing a catalog into hats format. e.g. if some columns are less useful for your science, and take up a lot of space, maybe leave them out!" ] }, { @@ -192,7 +192,7 @@ ], "metadata": { "kernelspec": { - "display_name": "hipscatenv", + "display_name": "hatsenv", "language": "python", "name": "python3" }, diff --git a/docs/notebooks/unequal_schema.ipynb b/docs/notebooks/unequal_schema.ipynb index 3fc39e0a..664b448b 100644 --- a/docs/notebooks/unequal_schema.ipynb +++ b/docs/notebooks/unequal_schema.ipynb @@ -67,11 +67,11 @@ "import os\n", "from dask.distributed import Client\n", "\n", - "from hipscat_import.pipeline import pipeline_with_client\n", - "from hipscat_import.catalog.arguments import ImportArguments\n", - "from hipscat_import.catalog.file_readers import get_file_reader\n", + "from hats_import.pipeline import pipeline_with_client\n", + "from hats_import.catalog.arguments import ImportArguments\n", + "from hats_import.catalog.file_readers import get_file_reader\n", "\n", - "mixed_schema_csv_dir = \"../../tests/hipscat_import/data/mixed_schema\"\n", + "mixed_schema_csv_dir = \"../../tests/data/mixed_schema\"\n", "tmp_path = tempfile.TemporaryDirectory()\n", "\n", "args = ImportArguments(\n", @@ -110,7 +110,7 @@ "source": [ "import pyarrow.parquet as pq\n", "\n", - "mixed_schema_csv_parquet = \"../../tests/hipscat_import/data/mixed_schema/schema.parquet\"\n", + "mixed_schema_csv_parquet = \"../../tests/data/mixed_schema/schema.parquet\"\n", "\n", "parquet_file = pq.ParquetFile(mixed_schema_csv_parquet)\n", "print(parquet_file.schema)" @@ -294,7 +294,7 @@ ], "metadata": { "kernelspec": { - "display_name": "hipscatenv", + "display_name": "hatsenv", "language": "python", "name": "python3" }, diff --git a/docs/requirements.txt b/docs/requirements.txt index 11f126d9..65177807 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -9,4 +9,4 @@ sphinx sphinx-autoapi sphinx-copybutton sphinx-book-theme -git+https://github.com/astronomy-commons/hipscat.git@main \ No newline at end of file +git+https://github.com/astronomy-commons/hipscat.git@hats \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 9be58956..4c32bf7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "hipscat-import" +name = "hats-import" license = {file = "LICENSE"} readme = "README.md" authors = [ @@ -17,7 +17,7 @@ dynamic = ["version"] dependencies = [ "dask[complete]>=2024.3.0", # Includes dask expressions. "deprecated", - "hipscat >=0.3.8", + "hats >=0.3.8", "ipykernel", # Support for Jupyter notebooks "numpy", "pandas", @@ -53,10 +53,10 @@ requires = [ build-backend = "setuptools.build_meta" [tool.setuptools_scm] -write_to = "src/hipscat_import/_version.py" +write_to = "src/hats_import/_version.py" [tool.setuptools.package-data] -hipscat_import = ["py.typed"] +hats_import = ["py.typed"] [tool.pytest.ini_options] timeout = 1 @@ -69,8 +69,8 @@ testpaths = [ [tool.coverage.report] omit = [ - "src/hipscat_import/_version.py", # auto-generated - "src/hipscat_import/pipeline.py", # too annoying to test + "src/hats_import/_version.py", # auto-generated + "src/hats_import/pipeline.py", # too annoying to test ] [tool.black] @@ -129,6 +129,6 @@ ignore = [ [tool.coverage.run] omit = [ - "src/hipscat_import/_version.py", # auto-generated - "src/hipscat_import/pipeline.py", # too annoying to test + "src/hats_import/_version.py", # auto-generated + "src/hats_import/pipeline.py", # too annoying to test ] diff --git a/requirements.txt b/requirements.txt index 124b2043..8f505388 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -git+https://github.com/astronomy-commons/hipscat.git@main \ No newline at end of file +git+https://github.com/astronomy-commons/hipscat.git@hats \ No newline at end of file diff --git a/src/hipscat_import/__init__.py b/src/hats_import/__init__.py similarity index 64% rename from src/hipscat_import/__init__.py rename to src/hats_import/__init__.py index ccdbd851..e8990428 100644 --- a/src/hipscat_import/__init__.py +++ b/src/hats_import/__init__.py @@ -1,4 +1,4 @@ -"""All modules for hipscat-import package""" +"""All modules for hats-import package""" from ._version import __version__ from .runtime_arguments import RuntimeArguments diff --git a/src/hipscat_import/catalog/__init__.py b/src/hats_import/catalog/__init__.py similarity index 100% rename from src/hipscat_import/catalog/__init__.py rename to src/hats_import/catalog/__init__.py diff --git a/src/hipscat_import/catalog/arguments.py b/src/hats_import/catalog/arguments.py similarity index 89% rename from src/hipscat_import/catalog/arguments.py rename to src/hats_import/catalog/arguments.py index 89d0c144..a2b10b55 100644 --- a/src/hipscat_import/catalog/arguments.py +++ b/src/hats_import/catalog/arguments.py @@ -6,12 +6,12 @@ from pathlib import Path from typing import List -from hipscat.catalog.catalog import CatalogInfo -from hipscat.pixel_math import hipscat_id +from hats.catalog.catalog import CatalogInfo +from hats.pixel_math import hipscat_id from upath import UPath -from hipscat_import.catalog.file_readers import InputReader, get_file_reader -from hipscat_import.runtime_arguments import RuntimeArguments, find_input_paths +from hats_import.catalog.file_readers import InputReader, get_file_reader +from hats_import.runtime_arguments import RuntimeArguments, find_input_paths # pylint: disable=too-many-locals,too-many-arguments,too-many-instance-attributes,too-many-branches,too-few-public-methods @@ -36,14 +36,14 @@ class ImportArguments(RuntimeArguments): """column for right ascension""" dec_column: str = "dec" """column for declination""" - use_hipscat_index: bool = False - """use an existing hipscat spatial index as the position, instead of ra/dec""" + use_healpix_29: bool = False + """use an existing healpix-based hats spatial index as the position, instead of ra/dec""" sort_columns: str | None = None """column for survey identifier, or other sortable column. if sorting by multiple columns, - they should be comma-separated. if `add_hipscat_index=True`, this sorting will be used to + they should be comma-separated. if `add_healpix_29=True`, this sorting will be used to resolve the counter within the same higher-order pixel space""" - add_hipscat_index: bool = True - """add the hipscat spatial index field alongside the data""" + add_healpix_29: bool = True + """add the healpix-based hats spatial index field alongside the data""" use_schema_file: str | Path | UPath | None = None """path to a parquet file with schema metadata. this will be used for column metadata when writing the files, if specified""" @@ -123,10 +123,10 @@ def _check_arguments(self): if isinstance(self.file_reader, str): self.file_reader = get_file_reader(self.file_reader) - if self.use_hipscat_index: - self.add_hipscat_index = False + if self.use_healpix_29: + self.add_healpix_29 = False if self.sort_columns: - raise ValueError("When using _hipscat_index for position, no sort columns should be added") + raise ValueError("When using _healpix_29 for position, no sort columns should be added") # Basic checks complete - make more checks and create directories where necessary self.input_paths = find_input_paths(self.input_path, "**/*.*", self.input_file_list) @@ -156,7 +156,7 @@ def additional_runtime_provenance_info(self) -> dict: "input_file_list": self.input_file_list, "ra_column": self.ra_column, "dec_column": self.dec_column, - "use_hipscat_index": self.use_hipscat_index, + "use_healpix_29": self.use_healpix_29, "sort_columns": self.sort_columns, "constant_healpix_order": self.constant_healpix_order, "lowest_healpix_order": self.lowest_healpix_order, @@ -168,9 +168,7 @@ def additional_runtime_provenance_info(self) -> dict: } -def check_healpix_order_range( - order, field_name, lower_bound=0, upper_bound=hipscat_id.HIPSCAT_ID_HEALPIX_ORDER -): +def check_healpix_order_range(order, field_name, lower_bound=0, upper_bound=hipscat_id.SPATIAL_INDEX_ORDER): """Helper method to check if the ``order`` is within the range determined by the ``lower_bound`` and ``upper_bound``, inclusive. @@ -185,7 +183,7 @@ def check_healpix_order_range( """ if lower_bound < 0: raise ValueError("healpix orders must be positive") - if upper_bound > hipscat_id.HIPSCAT_ID_HEALPIX_ORDER: - raise ValueError(f"healpix order should be <= {hipscat_id.HIPSCAT_ID_HEALPIX_ORDER}") + if upper_bound > hipscat_id.SPATIAL_INDEX_ORDER: + raise ValueError(f"healpix order should be <= {hipscat_id.SPATIAL_INDEX_ORDER}") if not lower_bound <= order <= upper_bound: raise ValueError(f"{field_name} should be between {lower_bound} and {upper_bound}") diff --git a/src/hipscat_import/catalog/file_readers.py b/src/hats_import/catalog/file_readers.py similarity index 99% rename from src/hipscat_import/catalog/file_readers.py rename to src/hats_import/catalog/file_readers.py index 300717d9..a647e192 100644 --- a/src/hipscat_import/catalog/file_readers.py +++ b/src/hats_import/catalog/file_readers.py @@ -8,7 +8,7 @@ import pyarrow.parquet as pq from astropy.io import ascii as ascii_reader from astropy.table import Table -from hipscat.io import file_io +from hats.io import file_io from upath import UPath # pylint: disable=too-few-public-methods,too-many-arguments diff --git a/src/hipscat_import/catalog/map_reduce.py b/src/hats_import/catalog/map_reduce.py similarity index 86% rename from src/hipscat_import/catalog/map_reduce.py rename to src/hats_import/catalog/map_reduce.py index 799c3339..13bf7635 100644 --- a/src/hipscat_import/catalog/map_reduce.py +++ b/src/hats_import/catalog/map_reduce.py @@ -1,20 +1,20 @@ -"""Import a set of non-hipscat files using dask for parallelization""" +"""Import a set of non-hats files using dask for parallelization""" import pickle -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math.healpix_shim as hp import numpy as np import pyarrow as pa import pyarrow.parquet as pq -from hipscat import pixel_math -from hipscat.io import file_io, paths -from hipscat.pixel_math.healpix_pixel import HealpixPixel -from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN, hipscat_id_to_healpix +from hats import pixel_math +from hats.io import file_io, paths +from hats.pixel_math.healpix_pixel import HealpixPixel +from hats.pixel_math.hipscat_id import SPATIAL_INDEX_COLUMN, hipscat_id_to_healpix from upath import UPath -from hipscat_import.catalog.resume_plan import ResumePlan -from hipscat_import.catalog.sparse_histogram import SparseHistogram -from hipscat_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure +from hats_import.catalog.resume_plan import ResumePlan +from hats_import.catalog.sparse_histogram import SparseHistogram +from hats_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure # pylint: disable=too-many-locals,too-many-arguments @@ -39,7 +39,7 @@ def _iterate_input_file( highest_order, ra_column, dec_column, - use_hipscat_index=False, + use_healpix_29=False, read_columns=None, ): """Helper function to handle input file reading and healpix pixel calculation""" @@ -49,11 +49,11 @@ def _iterate_input_file( raise NotImplementedError("No file reader implemented") for chunk_number, data in enumerate(file_reader.read(input_file, read_columns=read_columns)): - if use_hipscat_index: - if data.index.name == HIPSCAT_ID_COLUMN: + if use_healpix_29: + if data.index.name == SPATIAL_INDEX_COLUMN: mapped_pixels = hipscat_id_to_healpix(data.index, target_order=highest_order) else: - mapped_pixels = hipscat_id_to_healpix(data[HIPSCAT_ID_COLUMN], target_order=highest_order) + mapped_pixels = hipscat_id_to_healpix(data[SPATIAL_INDEX_COLUMN], target_order=highest_order) else: # Set up the pixel data mapped_pixels = hp.ang2pix( @@ -74,7 +74,7 @@ def map_to_pixels( highest_order, ra_column, dec_column, - use_hipscat_index=False, + use_healpix_29=False, ): """Map a file of input objects to their healpix pixels. @@ -99,8 +99,8 @@ def map_to_pixels( try: histo = SparseHistogram.make_empty(highest_order) - if use_hipscat_index: - read_columns = [HIPSCAT_ID_COLUMN] + if use_healpix_29: + read_columns = [SPATIAL_INDEX_COLUMN] else: read_columns = [ra_column, dec_column] @@ -110,7 +110,7 @@ def map_to_pixels( highest_order, ra_column, dec_column, - use_hipscat_index, + use_healpix_29, read_columns, ): mapped_pixel, count_at_pixel = np.unique(mapped_pixels, return_counts=True) @@ -136,13 +136,13 @@ def split_pixels( cache_shard_path: UPath, resume_path: UPath, alignment_file=None, - use_hipscat_index=False, + use_healpix_29=False, ): """Map a file of input objects to their healpix pixels and split into shards. Args: input_file (UPath): file to read for catalog data. - file_reader (hipscat_import.catalog.file_readers.InputReader): instance + file_reader (hats_import.catalog.file_readers.InputReader): instance of input reader that specifies arguments necessary for reading from the input file. splitting_key (str): unique counter for this input file, used when creating intermediate files @@ -160,7 +160,7 @@ def split_pixels( with open(alignment_file, "rb") as pickle_file: alignment = pickle.load(pickle_file) for chunk_number, data, mapped_pixels in _iterate_input_file( - input_file, pickled_reader_file, highest_order, ra_column, dec_column, use_hipscat_index + input_file, pickled_reader_file, highest_order, ra_column, dec_column, use_healpix_29 ): aligned_pixels = alignment[mapped_pixels] unique_pixels, unique_inverse = np.unique(aligned_pixels, return_inverse=True) @@ -199,8 +199,8 @@ def reduce_pixel_shards( ra_column, dec_column, sort_columns: str = "", - use_hipscat_index=False, - add_hipscat_index=True, + use_healpix_29=False, + add_healpix_29=True, delete_input_files=True, use_schema_file="", ): @@ -212,10 +212,10 @@ def reduce_pixel_shards( - ``Norder`` - the healpix order for the pixel - ``Dir`` - the directory part, corresponding to the pixel - ``Npix`` - the healpix pixel - - ``_hipscat_index`` - optional - a spatially-correlated + - ``_healpix_29`` - optional - a spatially-correlated 64-bit index field. - Notes on ``_hipscat_index``: + Notes on ``_healpix_29``: - if we generate the field, we will promote any previous *named* pandas index field(s) to a column with @@ -235,7 +235,7 @@ def reduce_pixel_shards( for the catalog's final pixel output_path (UPath): where to write the final catalog pixel data sort_columns (str): column for survey identifier, or other sortable column - add_hipscat_index (bool): should we add a _hipscat_index column to + add_healpix_29 (bool): should we add a _healpix_29 column to the resulting parquet file? delete_input_files (bool): should we delete the intermediate files used as input for this method. @@ -281,22 +281,22 @@ def reduce_pixel_shards( dataframe = merged_table.to_pandas() if sort_columns: dataframe = dataframe.sort_values(sort_columns.split(",")) - if add_hipscat_index: + if add_healpix_29: ## If we had a meaningful index before, preserve it as a column. if _has_named_index(dataframe): dataframe = dataframe.reset_index() - dataframe[HIPSCAT_ID_COLUMN] = pixel_math.compute_hipscat_id( + dataframe[SPATIAL_INDEX_COLUMN] = pixel_math.compute_hipscat_id( dataframe[ra_column].values, dataframe[dec_column].values, ) - dataframe = dataframe.set_index(HIPSCAT_ID_COLUMN).sort_index() + dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN).sort_index() - # Adjust the schema to make sure that the _hipscat_index will + # Adjust the schema to make sure that the _healpix_29 will # be saved as a uint64 - elif use_hipscat_index: - if dataframe.index.name != HIPSCAT_ID_COLUMN: - dataframe = dataframe.set_index(HIPSCAT_ID_COLUMN) + elif use_healpix_29: + if dataframe.index.name != SPATIAL_INDEX_COLUMN: + dataframe = dataframe.set_index(SPATIAL_INDEX_COLUMN) dataframe = dataframe.sort_index() dataframe["Norder"] = np.full(rows_written, fill_value=healpix_pixel.order, dtype=np.uint8) @@ -304,7 +304,7 @@ def reduce_pixel_shards( dataframe["Npix"] = np.full(rows_written, fill_value=healpix_pixel.pixel, dtype=np.uint64) if schema: - schema = _modify_arrow_schema(schema, add_hipscat_index) + schema = _modify_arrow_schema(schema, add_healpix_29) dataframe.to_parquet(destination_file.path, schema=schema, filesystem=destination_file.fs) else: dataframe.to_parquet(destination_file.path, filesystem=destination_file.fs) @@ -325,12 +325,12 @@ def reduce_pixel_shards( raise exception -def _modify_arrow_schema(schema, add_hipscat_index): - if add_hipscat_index: +def _modify_arrow_schema(schema, add_healpix_29): + if add_healpix_29: pandas_index_column = schema.get_field_index("__index_level_0__") if pandas_index_column != -1: schema = schema.remove(pandas_index_column) - schema = schema.insert(0, pa.field(HIPSCAT_ID_COLUMN, pa.uint64())) + schema = schema.insert(0, pa.field(SPATIAL_INDEX_COLUMN, pa.uint64())) schema = ( schema.append(pa.field("Norder", pa.uint8())) .append(pa.field("Dir", pa.uint64())) diff --git a/src/hipscat_import/catalog/resume_plan.py b/src/hats_import/catalog/resume_plan.py similarity index 97% rename from src/hipscat_import/catalog/resume_plan.py rename to src/hats_import/catalog/resume_plan.py index 03acb7c9..8b68c903 100644 --- a/src/hipscat_import/catalog/resume_plan.py +++ b/src/hats_import/catalog/resume_plan.py @@ -6,17 +6,17 @@ from dataclasses import dataclass, field from typing import List, Optional, Tuple -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math.healpix_shim as hp import numpy as np -from hipscat import pixel_math -from hipscat.io import file_io -from hipscat.pixel_math import empty_histogram -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats import pixel_math +from hats.io import file_io +from hats.pixel_math import empty_histogram +from hats.pixel_math.healpix_pixel import HealpixPixel from numpy import frombuffer from upath import UPath -from hipscat_import.catalog.sparse_histogram import SparseHistogram -from hipscat_import.pipeline_resume_plan import PipelineResumePlan +from hats_import.catalog.sparse_histogram import SparseHistogram +from hats_import.pipeline_resume_plan import PipelineResumePlan @dataclass diff --git a/src/hipscat_import/catalog/run_import.py b/src/hats_import/catalog/run_import.py similarity index 89% rename from src/hipscat_import/catalog/run_import.py rename to src/hats_import/catalog/run_import.py index 20ca721f..b52e3a25 100644 --- a/src/hipscat_import/catalog/run_import.py +++ b/src/hats_import/catalog/run_import.py @@ -1,4 +1,4 @@ -"""Import a set of non-hipscat files using dask for parallelization +"""Import a set of non-hats files using dask for parallelization Methods in this file set up a dask pipeline using futures. The actual logic of the map reduce is in the `map_reduce.py` file. @@ -7,14 +7,14 @@ import os import pickle -import hipscat.io.write_metadata as io -from hipscat.catalog import PartitionInfo -from hipscat.io import paths -from hipscat.io.parquet_metadata import write_parquet_metadata +import hats.io.write_metadata as io +from hats.catalog import PartitionInfo +from hats.io import paths +from hats.io.parquet_metadata import write_parquet_metadata -import hipscat_import.catalog.map_reduce as mr -from hipscat_import.catalog.arguments import ImportArguments -from hipscat_import.catalog.resume_plan import ResumePlan +import hats_import.catalog.map_reduce as mr +from hats_import.catalog.arguments import ImportArguments +from hats_import.catalog.resume_plan import ResumePlan def run(args, client): @@ -43,7 +43,7 @@ def run(args, client): highest_order=args.mapping_healpix_order, ra_column=args.ra_column, dec_column=args.dec_column, - use_hipscat_index=args.use_hipscat_index, + use_healpix_29=args.use_healpix_29, ) ) resume_plan.wait_for_mapping(futures) @@ -84,7 +84,7 @@ def run(args, client): cache_shard_path=args.tmp_path, resume_path=resume_plan.tmp_path, alignment_file=alignment_file, - use_hipscat_index=args.use_hipscat_index, + use_healpix_29=args.use_healpix_29, ) ) @@ -110,9 +110,9 @@ def run(args, client): ra_column=args.ra_column, dec_column=args.dec_column, sort_columns=args.sort_columns, - add_hipscat_index=args.add_hipscat_index, + add_healpix_29=args.add_healpix_29, use_schema_file=args.use_schema_file, - use_hipscat_index=args.use_hipscat_index, + use_healpix_29=args.use_healpix_29, delete_input_files=args.delete_intermediate_parquet_files, ) ) diff --git a/src/hipscat_import/catalog/sparse_histogram.py b/src/hats_import/catalog/sparse_histogram.py similarity index 98% rename from src/hipscat_import/catalog/sparse_histogram.py rename to src/hats_import/catalog/sparse_histogram.py index ac1549ae..0ed130bb 100644 --- a/src/hipscat_import/catalog/sparse_histogram.py +++ b/src/hats_import/catalog/sparse_histogram.py @@ -1,6 +1,6 @@ """Sparse 1-D histogram of healpix pixel counts.""" -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math.healpix_shim as hp import numpy as np from scipy.sparse import csc_array, load_npz, save_npz, sparray diff --git a/src/hipscat_import/index/__init__.py b/src/hats_import/index/__init__.py similarity index 54% rename from src/hipscat_import/index/__init__.py rename to src/hats_import/index/__init__.py index 008c9952..f59b54e8 100644 --- a/src/hipscat_import/index/__init__.py +++ b/src/hats_import/index/__init__.py @@ -1,4 +1,4 @@ -"""Create performance index for a single column of an already-hipscatted catalog""" +"""Create performance index for a single column of an already-hats-sharded catalog""" from .arguments import IndexArguments from .map_reduce import create_index diff --git a/src/hipscat_import/index/arguments.py b/src/hats_import/index/arguments.py similarity index 85% rename from src/hipscat_import/index/arguments.py rename to src/hats_import/index/arguments.py index 1fb6fa6b..5f40f91e 100644 --- a/src/hipscat_import/index/arguments.py +++ b/src/hats_import/index/arguments.py @@ -6,12 +6,12 @@ from pathlib import Path from typing import List, Optional -from hipscat.catalog import Catalog -from hipscat.catalog.index.index_catalog_info import IndexCatalogInfo -from hipscat.io.validation import is_valid_catalog +from hats.catalog import Catalog +from hats.catalog.index.index_catalog_info import IndexCatalogInfo +from hats.io.validation import is_valid_catalog from upath import UPath -from hipscat_import.runtime_arguments import RuntimeArguments +from hats_import.runtime_arguments import RuntimeArguments @dataclass @@ -25,8 +25,8 @@ class IndexArguments(RuntimeArguments): extra_columns: List[str] = field(default_factory=list) ## Output - include_hipscat_index: bool = True - """Include the hipscat spatial partition index.""" + include_healpix_29: bool = True + """Include the healpix-based hats spatial index.""" include_order_pixel: bool = True """Include partitioning columns, Norder, Dir, and Npix. You probably want to keep these!""" include_radec: bool = False @@ -57,12 +57,12 @@ def _check_arguments(self): if not self.indexing_column: raise ValueError("indexing_column is required") - if not self.include_hipscat_index and not self.include_order_pixel: - raise ValueError("At least one of include_hipscat_index or include_order_pixel must be True") + if not self.include_healpix_29 and not self.include_order_pixel: + raise ValueError("At least one of include_healpix_29 or include_order_pixel must be True") if not is_valid_catalog(self.input_catalog_path): raise ValueError("input_catalog_path not a valid catalog") - self.input_catalog = Catalog.read_from_hipscat(catalog_path=self.input_catalog_path) + self.input_catalog = Catalog.read_hats(catalog_path=self.input_catalog_path) if self.include_radec: catalog_info = self.input_catalog.catalog_info self.extra_columns.extend([catalog_info.ra_column, catalog_info.dec_column]) @@ -99,7 +99,7 @@ def additional_runtime_provenance_info(self) -> dict: "input_catalog_path": self.input_catalog_path, "indexing_column": self.indexing_column, "extra_columns": self.extra_columns, - "include_hipscat_index": self.include_hipscat_index, + "include_healpix_29": self.include_healpix_29, "include_order_pixel": self.include_order_pixel, "include_radec": self.include_radec, } diff --git a/src/hipscat_import/index/map_reduce.py b/src/hats_import/index/map_reduce.py similarity index 84% rename from src/hipscat_import/index/map_reduce.py rename to src/hats_import/index/map_reduce.py index 8bba30ba..ed143162 100644 --- a/src/hipscat_import/index/map_reduce.py +++ b/src/hats_import/index/map_reduce.py @@ -1,12 +1,12 @@ -"""Create columnar index of hipscat table using dask for parallelization""" +"""Create columnar index of hats table using dask for parallelization""" import dask.dataframe as dd import numpy as np -from hipscat.io import file_io, paths -from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN +from hats.io import file_io, paths +from hats.pixel_math.hipscat_id import SPATIAL_INDEX_COLUMN -def read_leaf_file(input_file, include_columns, include_hipscat_index, drop_duplicates, schema): +def read_leaf_file(input_file, include_columns, include_healpix_29, drop_duplicates, schema): """Mapping function called once per input file. Reads the leaf parquet file, and returns with appropriate columns and duplicates dropped.""" @@ -18,8 +18,8 @@ def read_leaf_file(input_file, include_columns, include_hipscat_index, drop_dupl ) data = data.reset_index() - if not include_hipscat_index: - data = data.drop(columns=[HIPSCAT_ID_COLUMN]) + if not include_healpix_29: + data = data.drop(columns=[SPATIAL_INDEX_COLUMN]) if drop_duplicates: data = data.drop_duplicates() @@ -44,7 +44,7 @@ def create_index(args, client): for pixel in args.input_catalog.get_healpix_pixels() ], include_columns=include_columns, - include_hipscat_index=args.include_hipscat_index, + include_healpix_29=args.include_healpix_29, drop_duplicates=args.drop_duplicates, schema=args.input_catalog.schema, ) diff --git a/src/hipscat_import/index/run_index.py b/src/hats_import/index/run_index.py similarity index 79% rename from src/hipscat_import/index/run_index.py rename to src/hats_import/index/run_index.py index fc324af2..4585dd0e 100644 --- a/src/hipscat_import/index/run_index.py +++ b/src/hats_import/index/run_index.py @@ -1,10 +1,10 @@ -"""Create columnar index of hipscat table using dask for parallelization""" +"""Create columnar index of hats table using dask for parallelization""" -from hipscat.io import file_io, parquet_metadata, write_metadata +from hats.io import file_io, parquet_metadata, write_metadata -import hipscat_import.index.map_reduce as mr -from hipscat_import.index.arguments import IndexArguments -from hipscat_import.pipeline_resume_plan import print_progress +import hats_import.index.map_reduce as mr +from hats_import.index.arguments import IndexArguments +from hats_import.pipeline_resume_plan import print_progress def run(args, client): diff --git a/src/hipscat_import/margin_cache/__init__.py b/src/hats_import/margin_cache/__init__.py similarity index 100% rename from src/hipscat_import/margin_cache/__init__.py rename to src/hats_import/margin_cache/__init__.py diff --git a/src/hipscat_import/margin_cache/margin_cache.py b/src/hats_import/margin_cache/margin_cache.py similarity index 91% rename from src/hipscat_import/margin_cache/margin_cache.py rename to src/hats_import/margin_cache/margin_cache.py index c9815139..4d3cb367 100644 --- a/src/hipscat_import/margin_cache/margin_cache.py +++ b/src/hats_import/margin_cache/margin_cache.py @@ -1,15 +1,15 @@ -from hipscat.catalog import PartitionInfo -from hipscat.io import file_io, parquet_metadata, paths, write_metadata +from hats.catalog import PartitionInfo +from hats.io import file_io, parquet_metadata, paths, write_metadata -import hipscat_import.margin_cache.margin_cache_map_reduce as mcmr -from hipscat_import.margin_cache.margin_cache_resume_plan import MarginCachePlan +import hats_import.margin_cache.margin_cache_map_reduce as mcmr +from hats_import.margin_cache.margin_cache_resume_plan import MarginCachePlan # pylint: disable=too-many-locals,too-many-arguments def generate_margin_cache(args, client): """Generate a margin cache for a given input catalog. - The input catalog must be in hipscat format. + The input catalog must be in hats format. Args: args (MarginCacheArguments): A valid `MarginCacheArguments` object. diff --git a/src/hipscat_import/margin_cache/margin_cache_arguments.py b/src/hats_import/margin_cache/margin_cache_arguments.py similarity index 89% rename from src/hipscat_import/margin_cache/margin_cache_arguments.py rename to src/hats_import/margin_cache/margin_cache_arguments.py index e65f8542..d89a84b8 100644 --- a/src/hipscat_import/margin_cache/margin_cache_arguments.py +++ b/src/hats_import/margin_cache/margin_cache_arguments.py @@ -4,14 +4,14 @@ from pathlib import Path from typing import List -import hipscat.pixel_math.healpix_shim as hp -from hipscat.catalog import Catalog -from hipscat.catalog.margin_cache.margin_cache_catalog_info import MarginCacheCatalogInfo -from hipscat.io.validation import is_valid_catalog -from hipscat.pixel_math.healpix_pixel import HealpixPixel +import hats.pixel_math.healpix_shim as hp +from hats.catalog import Catalog +from hats.catalog.margin_cache.margin_cache_catalog_info import MarginCacheCatalogInfo +from hats.io.validation import is_valid_catalog +from hats.pixel_math.healpix_pixel import HealpixPixel from upath import UPath -from hipscat_import.runtime_arguments import RuntimeArguments +from hats_import.runtime_arguments import RuntimeArguments @dataclass @@ -39,7 +39,7 @@ class MarginCacheArguments(RuntimeArguments): if False, we will keep all done marker files at the end of the pipeline.""" input_catalog_path: str | Path | UPath | None = None - """the path to the hipscat-formatted input catalog.""" + """the path to the hats-formatted input catalog.""" debug_filter_pixel_list: List[HealpixPixel] = field(default_factory=list) """debug setting. if provided, we will first filter the catalog to the pixels provided. this can be useful for creating a margin over a subset of a catalog.""" @@ -54,7 +54,7 @@ def _check_arguments(self): if not is_valid_catalog(self.input_catalog_path): raise ValueError("input_catalog_path not a valid catalog") - self.catalog = Catalog.read_from_hipscat(self.input_catalog_path) + self.catalog = Catalog.read_hats(self.input_catalog_path) if len(self.debug_filter_pixel_list) > 0: self.catalog = self.catalog.filter_from_pixel_list(self.debug_filter_pixel_list) if len(self.catalog.get_healpix_pixels()) == 0: diff --git a/src/hipscat_import/margin_cache/margin_cache_map_reduce.py b/src/hats_import/margin_cache/margin_cache_map_reduce.py similarity index 94% rename from src/hipscat_import/margin_cache/margin_cache_map_reduce.py rename to src/hats_import/margin_cache/margin_cache_map_reduce.py index d63d705e..bda17b74 100644 --- a/src/hipscat_import/margin_cache/margin_cache_map_reduce.py +++ b/src/hats_import/margin_cache/margin_cache_map_reduce.py @@ -1,15 +1,15 @@ -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math.healpix_shim as hp import numpy as np import pandas as pd import pyarrow as pa import pyarrow.dataset as ds -from hipscat import pixel_math -from hipscat.catalog.partition_info import PartitionInfo -from hipscat.io import file_io, paths -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats import pixel_math +from hats.catalog.partition_info import PartitionInfo +from hats.io import file_io, paths +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.margin_cache.margin_cache_resume_plan import MarginCachePlan -from hipscat_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure +from hats_import.margin_cache.margin_cache_resume_plan import MarginCachePlan +from hats_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure # pylint: disable=too-many-arguments diff --git a/src/hipscat_import/margin_cache/margin_cache_resume_plan.py b/src/hats_import/margin_cache/margin_cache_resume_plan.py similarity index 96% rename from src/hipscat_import/margin_cache/margin_cache_resume_plan.py rename to src/hats_import/margin_cache/margin_cache_resume_plan.py index 000e1ae2..9cc75685 100644 --- a/src/hipscat_import/margin_cache/margin_cache_resume_plan.py +++ b/src/hats_import/margin_cache/margin_cache_resume_plan.py @@ -6,12 +6,12 @@ from typing import List import pandas as pd -from hipscat import pixel_math -from hipscat.io import file_io -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats import pixel_math +from hats.io import file_io +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments -from hipscat_import.pipeline_resume_plan import PipelineResumePlan +from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments +from hats_import.pipeline_resume_plan import PipelineResumePlan @dataclass diff --git a/src/hipscat_import/pipeline.py b/src/hats_import/pipeline.py similarity index 75% rename from src/hipscat_import/pipeline.py rename to src/hats_import/pipeline.py index dd4487d5..f696b4be 100644 --- a/src/hipscat_import/pipeline.py +++ b/src/hats_import/pipeline.py @@ -5,17 +5,17 @@ from dask.distributed import Client -import hipscat_import.catalog.run_import as catalog_runner -import hipscat_import.index.run_index as index_runner -import hipscat_import.margin_cache.margin_cache as margin_runner -import hipscat_import.soap.run_soap as soap_runner -import hipscat_import.verification.run_verification as verification_runner -from hipscat_import.catalog.arguments import ImportArguments -from hipscat_import.index.arguments import IndexArguments -from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments -from hipscat_import.runtime_arguments import RuntimeArguments -from hipscat_import.soap.arguments import SoapArguments -from hipscat_import.verification.arguments import VerificationArguments +import hats_import.catalog.run_import as catalog_runner +import hats_import.index.run_index as index_runner +import hats_import.margin_cache.margin_cache as margin_runner +import hats_import.soap.run_soap as soap_runner +import hats_import.verification.run_verification as verification_runner +from hats_import.catalog.arguments import ImportArguments +from hats_import.index.arguments import IndexArguments +from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments +from hats_import.runtime_arguments import RuntimeArguments +from hats_import.soap.arguments import SoapArguments +from hats_import.verification.arguments import VerificationArguments # pragma: no cover @@ -62,7 +62,7 @@ def pipeline_with_client(args: RuntimeArguments, client: Client): def _send_failure_email(args: RuntimeArguments, exception: Exception): message = EmailMessage() - message["Subject"] = "hipscat-import failure." + message["Subject"] = "hats-import failure." message["To"] = args.completion_email_address message.set_content( f"output_artifact_name: {args.output_artifact_name}" @@ -77,7 +77,7 @@ def _send_success_email(args): if not args.completion_email_address: return message = EmailMessage() - message["Subject"] = "hipscat-import success." + message["Subject"] = "hats-import success." message["To"] = args.completion_email_address message.set_content(f"output_artifact_name: {args.output_artifact_name}") diff --git a/src/hipscat_import/pipeline_resume_plan.py b/src/hats_import/pipeline_resume_plan.py similarity index 99% rename from src/hipscat_import/pipeline_resume_plan.py rename to src/hats_import/pipeline_resume_plan.py index 95a966af..9e49b15f 100644 --- a/src/hipscat_import/pipeline_resume_plan.py +++ b/src/hats_import/pipeline_resume_plan.py @@ -8,8 +8,8 @@ from dask.distributed import as_completed, get_worker from dask.distributed import print as dask_print -from hipscat.io import file_io -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.io import file_io +from hats.pixel_math.healpix_pixel import HealpixPixel from tqdm.auto import tqdm as auto_tqdm from tqdm.std import tqdm as std_tqdm from upath import UPath @@ -218,7 +218,7 @@ def get_pixel_cache_directory(cache_path, pixel: HealpixPixel): """Create a path for intermediate pixel data. You can use this over the paths.get_pixel_directory method, as it will include the pixel - number in the path. Further, it will just *look* different from a real hipscat + number in the path. Further, it will just *look* different from a real hats path, so it's clearer that it's a temporary directory:: {cache_path}/order_{order}/dir_{dir}/pixel_{pixel}/ diff --git a/src/hipscat_import/py.typed b/src/hats_import/py.typed similarity index 100% rename from src/hipscat_import/py.typed rename to src/hats_import/py.typed diff --git a/src/hipscat_import/runtime_arguments.py b/src/hats_import/runtime_arguments.py similarity index 98% rename from src/hipscat_import/runtime_arguments.py rename to src/hats_import/runtime_arguments.py index eae0ad89..fcab3277 100644 --- a/src/hipscat_import/runtime_arguments.py +++ b/src/hats_import/runtime_arguments.py @@ -7,7 +7,7 @@ from importlib.metadata import version from pathlib import Path -from hipscat.io import file_io +from hats.io import file_io from upath import UPath # pylint: disable=too-many-instance-attributes @@ -123,8 +123,8 @@ def provenance_info(self) -> dict: runtime_args.update(self.additional_runtime_provenance_info()) provenance_info = { - "tool_name": "hipscat_import", - "version": version("hipscat-import"), + "tool_name": "hats_import", + "version": version("hats-import"), "runtime_args": runtime_args, } diff --git a/src/hipscat_import/soap/__init__.py b/src/hats_import/soap/__init__.py similarity index 100% rename from src/hipscat_import/soap/__init__.py rename to src/hats_import/soap/__init__.py diff --git a/src/hipscat_import/soap/arguments.py b/src/hats_import/soap/arguments.py similarity index 87% rename from src/hipscat_import/soap/arguments.py rename to src/hats_import/soap/arguments.py index 2cfe5fe1..135f4807 100644 --- a/src/hipscat_import/soap/arguments.py +++ b/src/hats_import/soap/arguments.py @@ -3,13 +3,13 @@ from dataclasses import dataclass from pathlib import Path -from hipscat.catalog import Catalog -from hipscat.catalog.association_catalog.association_catalog import AssociationCatalogInfo -from hipscat.catalog.catalog_type import CatalogType -from hipscat.io.validation import is_valid_catalog +from hats.catalog import Catalog +from hats.catalog.association_catalog.association_catalog import AssociationCatalogInfo +from hats.catalog.catalog_type import CatalogType +from hats.io.validation import is_valid_catalog from upath import UPath -from hipscat_import.runtime_arguments import RuntimeArguments +from hats_import.runtime_arguments import RuntimeArguments @dataclass @@ -52,7 +52,7 @@ def _check_arguments(self): if not is_valid_catalog(self.object_catalog_dir): raise ValueError("object_catalog_dir not a valid catalog") - self.object_catalog = Catalog.read_from_hipscat(catalog_path=self.object_catalog_dir) + self.object_catalog = Catalog.read_hats(catalog_path=self.object_catalog_dir) if not self.source_catalog_dir: raise ValueError("source_catalog_dir is required") @@ -61,7 +61,7 @@ def _check_arguments(self): if not is_valid_catalog(self.source_catalog_dir): raise ValueError("source_catalog_dir not a valid catalog") - self.source_catalog = Catalog.read_from_hipscat(catalog_path=self.source_catalog_dir) + self.source_catalog = Catalog.read_hats(catalog_path=self.source_catalog_dir) if self.compute_partition_size < 100_000: raise ValueError("compute_partition_size must be at least 100_000") diff --git a/src/hipscat_import/soap/map_reduce.py b/src/hats_import/soap/map_reduce.py similarity index 93% rename from src/hipscat_import/soap/map_reduce.py rename to src/hats_import/soap/map_reduce.py index 009d921e..44d8c612 100644 --- a/src/hipscat_import/soap/map_reduce.py +++ b/src/hats_import/soap/map_reduce.py @@ -5,15 +5,15 @@ import numpy as np import pandas as pd import pyarrow.parquet as pq -from hipscat.catalog.association_catalog.partition_join_info import PartitionJoinInfo -from hipscat.io import file_io, paths -from hipscat.io.parquet_metadata import get_healpix_pixel_from_metadata -from hipscat.pixel_math.healpix_pixel import HealpixPixel -from hipscat.pixel_math.healpix_pixel_function import get_pixel_argsort +from hats.catalog.association_catalog.partition_join_info import PartitionJoinInfo +from hats.io import file_io, paths +from hats.io.parquet_metadata import get_healpix_pixel_from_metadata +from hats.pixel_math.healpix_pixel import HealpixPixel +from hats.pixel_math.healpix_pixel_function import get_pixel_argsort -from hipscat_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure -from hipscat_import.soap.arguments import SoapArguments -from hipscat_import.soap.resume_plan import SoapPlan +from hats_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure +from hats_import.soap.arguments import SoapArguments +from hats_import.soap.resume_plan import SoapPlan def _count_joins_for_object(source_data, source_pixel, object_pixel, soap_args): @@ -84,7 +84,7 @@ def count_joins(soap_args: SoapArguments, source_pixel: HealpixPixel, object_pix If any un-joined source pixels remain, stretch out to neighboring object pixels. Args: - soap_args(`hipscat_import.soap.SoapArguments`): set of arguments for pipeline execution + soap_args(`hats_import.soap.SoapArguments`): set of arguments for pipeline execution source_pixel(HealpixPixel): order and pixel for the source catalog single pixel. object_pixels(List[HealpixPixel]): set of tuples of order and pixel for the partitions of the object catalog to be joined. diff --git a/src/hipscat_import/soap/resume_plan.py b/src/hats_import/soap/resume_plan.py similarity index 94% rename from src/hipscat_import/soap/resume_plan.py rename to src/hats_import/soap/resume_plan.py index a77eb25d..447694a3 100644 --- a/src/hipscat_import/soap/resume_plan.py +++ b/src/hats_import/soap/resume_plan.py @@ -5,15 +5,15 @@ from dataclasses import dataclass, field from typing import List, Optional, Tuple -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math.healpix_shim as hp import numpy as np -from hipscat.catalog import Catalog -from hipscat.io import file_io -from hipscat.pixel_math.healpix_pixel import HealpixPixel -from hipscat.pixel_tree import PixelAlignment, align_trees +from hats.catalog import Catalog +from hats.io import file_io +from hats.pixel_math.healpix_pixel import HealpixPixel +from hats.pixel_tree import PixelAlignment, align_trees -from hipscat_import.pipeline_resume_plan import PipelineResumePlan -from hipscat_import.soap.arguments import SoapArguments +from hats_import.pipeline_resume_plan import PipelineResumePlan +from hats_import.soap.arguments import SoapArguments @dataclass @@ -59,12 +59,12 @@ def gather_plan(self, args): return step_progress.update(1) - self.object_catalog = Catalog.read_from_hipscat(args.object_catalog_dir) + self.object_catalog = Catalog.read_hats(args.object_catalog_dir) source_map_file = file_io.append_paths_to_pointer(self.tmp_path, self.SOURCE_MAP_FILE) if file_io.does_file_or_directory_exist(source_map_file): source_pixel_map = np.load(source_map_file, allow_pickle=True)["arr_0"].item() else: - source_catalog = Catalog.read_from_hipscat(args.source_catalog_dir) + source_catalog = Catalog.read_hats(args.source_catalog_dir) source_pixel_map = source_to_object_map(self.object_catalog, source_catalog) np.savez_compressed(source_map_file, source_pixel_map) self.count_keys = self.get_sources_to_count(source_pixel_map=source_pixel_map) diff --git a/src/hipscat_import/soap/run_soap.py b/src/hats_import/soap/run_soap.py similarity index 87% rename from src/hipscat_import/soap/run_soap.py rename to src/hats_import/soap/run_soap.py index d5f7a0cf..36aacff2 100644 --- a/src/hipscat_import/soap/run_soap.py +++ b/src/hats_import/soap/run_soap.py @@ -3,12 +3,12 @@ The actual logic of the map reduce is in the `map_reduce.py` file. """ -from hipscat.catalog.association_catalog.partition_join_info import PartitionJoinInfo -from hipscat.io import parquet_metadata, paths, write_metadata +from hats.catalog.association_catalog.partition_join_info import PartitionJoinInfo +from hats.io import parquet_metadata, paths, write_metadata -from hipscat_import.soap.arguments import SoapArguments -from hipscat_import.soap.map_reduce import combine_partial_results, count_joins, reduce_joins -from hipscat_import.soap.resume_plan import SoapPlan +from hats_import.soap.arguments import SoapArguments +from hats_import.soap.map_reduce import combine_partial_results, count_joins, reduce_joins +from hats_import.soap.resume_plan import SoapPlan def run(args, client): diff --git a/src/hipscat_import/verification/__init__.py b/src/hats_import/verification/__init__.py similarity index 100% rename from src/hipscat_import/verification/__init__.py rename to src/hats_import/verification/__init__.py diff --git a/src/hipscat_import/verification/arguments.py b/src/hats_import/verification/arguments.py similarity index 87% rename from src/hipscat_import/verification/arguments.py rename to src/hats_import/verification/arguments.py index 86c139b1..207793db 100644 --- a/src/hipscat_import/verification/arguments.py +++ b/src/hats_import/verification/arguments.py @@ -6,11 +6,11 @@ from pathlib import Path from typing import List, Optional -from hipscat.catalog import Catalog -from hipscat.io.validation import is_valid_catalog +from hats.catalog import Catalog +from hats.io.validation import is_valid_catalog from upath import UPath -from hipscat_import.runtime_arguments import RuntimeArguments +from hats_import.runtime_arguments import RuntimeArguments @dataclass @@ -39,7 +39,7 @@ def _check_arguments(self): if not self.input_catalog: if not is_valid_catalog(self.input_catalog_path): raise ValueError("input_catalog_path not a valid catalog") - self.input_catalog = Catalog.read_from_hipscat(catalog_path=self.input_catalog_path) + self.input_catalog = Catalog.read_hats(catalog_path=self.input_catalog_path) if not self.input_catalog_path: self.input_catalog_path = self.input_catalog.catalog_path diff --git a/src/hipscat_import/verification/run_verification.py b/src/hats_import/verification/run_verification.py similarity index 82% rename from src/hipscat_import/verification/run_verification.py rename to src/hats_import/verification/run_verification.py index 2b7d5954..ea623ddf 100644 --- a/src/hipscat_import/verification/run_verification.py +++ b/src/hats_import/verification/run_verification.py @@ -1,6 +1,6 @@ -"""Run pass/fail checks and generate verification report of existing hipscat table.""" +"""Run pass/fail checks and generate verification report of existing hats table.""" -from hipscat_import.verification.arguments import VerificationArguments +from hats_import.verification.arguments import VerificationArguments def run(args): diff --git a/tests/conftest.py b/tests/conftest.py index 7666200f..2d9f12ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,7 +39,7 @@ def dask_client(use_ray): def pytest_addoption(parser): """Add command line option to test dask unit tests on ray. - This must live in /tests/conftest.py (not /tests/hipscat-import/conftest.py)""" + This must live in /tests/conftest.py (not /tests/hats-import/conftest.py)""" parser.addoption( "--use_ray", action="store_true", diff --git a/tests/hipscat_import/data/blank/blank.csv b/tests/data/blank/blank.csv similarity index 100% rename from tests/hipscat_import/data/blank/blank.csv rename to tests/data/blank/blank.csv diff --git a/tests/data/generate_data.ipynb b/tests/data/generate_data.ipynb new file mode 100644 index 00000000..d3ba24c9 --- /dev/null +++ b/tests/data/generate_data.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Unit test data\n", + "\n", + "This directory contains very small, toy, data sets that are used\n", + "for unit tests.\n", + "\n", + "## Object catalog: small_sky\n", + "\n", + "This \"object catalog\" is 131 randomly generated radec values. \n", + "\n", + "- All radec positions are in the Healpix pixel order 0, pixel 11.\n", + "- IDs are integers from 700-831.\n", + "\n", + "The following are imports and paths that are used throughout the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hats_import.pipeline as runner\n", + "from hats_import.catalog.arguments import ImportArguments\n", + "import tempfile\n", + "from pathlib import Path\n", + "from dask.distributed import Client\n", + "\n", + "tmp_path = tempfile.TemporaryDirectory()\n", + "tmp_dir = tmp_path.name\n", + "\n", + "hats_import_dir = \".\"\n", + "client = Client(n_workers=1, threads_per_worker=1, local_directory=tmp_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky\n", + "\n", + "This \"object catalog\" is 131 randomly generated radec values. \n", + "\n", + "- All radec positions are in the Healpix pixel order 0, pixel 11.\n", + "- IDs are integers from 700-831.\n", + "\n", + "This catalog was generated with the following snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with tempfile.TemporaryDirectory() as pipeline_tmp:\n", + " args = ImportArguments(\n", + " input_path=Path(hats_import_dir) / \"small_sky\",\n", + " output_path=\".\",\n", + " file_reader=\"csv\",\n", + " highest_healpix_order=5,\n", + " output_artifact_name=\"small_sky_object_catalog\",\n", + " tmp_dir=pipeline_tmp,\n", + " )\n", + " runner.pipeline_with_client(args, client)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Source catalog: small_sky_source\n", + "\n", + "This \"source catalog\" is 131 detections at each of the 131 objects\n", + "in the \"small_sky\" catalog. These have a random magnitude, MJD, and \n", + "band (selected from ugrizy). The full script that generated the values\n", + "can be found [here](https://github.com/delucchi-cmu/hipscripts/blob/main/twiddling/small_sky_source.py)\n", + "\n", + "The catalog was generated with the following snippet, using raw data \n", + "from the `hats-import` file.\n", + "\n", + "NB: `pixel_threshold=3000` is set just to make sure that we're generating\n", + "a handful of files at various healpix orders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with tempfile.TemporaryDirectory() as pipeline_tmp:\n", + " args = ImportArguments(\n", + " input_path=Path(hats_import_dir) / \"small_sky_source\",\n", + " output_path=\".\",\n", + " file_reader=\"csv\",\n", + " ra_column=\"source_ra\",\n", + " dec_column=\"source_dec\",\n", + " catalog_type=\"source\",\n", + " highest_healpix_order=5,\n", + " pixel_threshold=3000,\n", + " output_artifact_name=\"small_sky_source_catalog\",\n", + " tmp_dir=pipeline_tmp,\n", + " )\n", + " runner.pipeline_with_client(args, client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client.close()\n", + "tmp_path.cleanup()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/data/indexed_files/csv_list_double_1_of_2.txt b/tests/data/indexed_files/csv_list_double_1_of_2.txt new file mode 100644 index 00000000..a30f60be --- /dev/null +++ b/tests/data/indexed_files/csv_list_double_1_of_2.txt @@ -0,0 +1,3 @@ +tests/data/small_sky_parts/catalog_00_of_05.csv +tests/data/small_sky_parts/catalog_01_of_05.csv + diff --git a/tests/data/indexed_files/csv_list_double_2_of_2.txt b/tests/data/indexed_files/csv_list_double_2_of_2.txt new file mode 100644 index 00000000..bb12c6db --- /dev/null +++ b/tests/data/indexed_files/csv_list_double_2_of_2.txt @@ -0,0 +1,3 @@ +tests/data/small_sky_parts/catalog_02_of_05.csv +tests/data/small_sky_parts/catalog_03_of_05.csv +tests/data/small_sky_parts/catalog_04_of_05.csv \ No newline at end of file diff --git a/tests/data/indexed_files/csv_list_single.txt b/tests/data/indexed_files/csv_list_single.txt new file mode 100644 index 00000000..0d98af84 --- /dev/null +++ b/tests/data/indexed_files/csv_list_single.txt @@ -0,0 +1,6 @@ +tests/data/small_sky_parts/catalog_00_of_05.csv +tests/data/small_sky_parts/catalog_01_of_05.csv +tests/data/small_sky_parts/catalog_02_of_05.csv +tests/data/small_sky_parts/catalog_03_of_05.csv +tests/data/small_sky_parts/catalog_04_of_05.csv + diff --git a/tests/data/indexed_files/parquet_list_single.txt b/tests/data/indexed_files/parquet_list_single.txt new file mode 100644 index 00000000..77f8c852 --- /dev/null +++ b/tests/data/indexed_files/parquet_list_single.txt @@ -0,0 +1,5 @@ +tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_0_0.parquet +tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_1_0.parquet +tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_2_0.parquet +tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_3_0.parquet +tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_4_0.parquet diff --git a/tests/hipscat_import/data/margin_pairs/negative_pairs.csv b/tests/data/margin_pairs/negative_pairs.csv similarity index 100% rename from tests/hipscat_import/data/margin_pairs/negative_pairs.csv rename to tests/data/margin_pairs/negative_pairs.csv diff --git a/tests/hipscat_import/data/margin_pairs/small_sky_source_pairs.csv b/tests/data/margin_pairs/small_sky_source_pairs.csv similarity index 100% rename from tests/hipscat_import/data/margin_pairs/small_sky_source_pairs.csv rename to tests/data/margin_pairs/small_sky_source_pairs.csv diff --git a/tests/hipscat_import/data/mixed_schema/input_01.csv b/tests/data/mixed_schema/input_01.csv similarity index 100% rename from tests/hipscat_import/data/mixed_schema/input_01.csv rename to tests/data/mixed_schema/input_01.csv diff --git a/tests/hipscat_import/data/mixed_schema/input_02.csv b/tests/data/mixed_schema/input_02.csv similarity index 100% rename from tests/hipscat_import/data/mixed_schema/input_02.csv rename to tests/data/mixed_schema/input_02.csv diff --git a/tests/hipscat_import/data/mixed_schema/schema.parquet b/tests/data/mixed_schema/schema.parquet similarity index 100% rename from tests/hipscat_import/data/mixed_schema/schema.parquet rename to tests/data/mixed_schema/schema.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_0_0.parquet b/tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_0_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_0_0.parquet rename to tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_0_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_1_0.parquet b/tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_1_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_1_0.parquet rename to tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_1_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_2_0.parquet b/tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_2_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_2_0.parquet rename to tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_2_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_3_0.parquet b/tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_3_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_3_0.parquet rename to tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_3_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_4_0.parquet b/tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_4_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_4_0.parquet rename to tests/data/parquet_shards/order_0/dir_0/pixel_11/shard_4_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_0_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_0_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_0_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_0_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_1_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_1_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_1_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_1_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_2_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_2_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_2_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_2_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_3_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_3_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_3_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_3_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_4_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_4_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_44/shard_4_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_44/shard_4_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_0_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_0_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_0_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_0_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_1_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_1_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_1_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_1_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_2_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_2_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_2_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_2_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_3_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_3_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_3_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_3_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_4_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_4_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_45/shard_4_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_45/shard_4_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_0_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_0_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_0_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_0_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_1_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_1_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_1_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_1_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_2_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_2_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_2_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_2_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_3_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_3_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_3_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_3_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_4_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_4_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_46/shard_4_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_46/shard_4_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_0_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_0_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_0_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_0_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_1_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_1_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_1_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_1_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_2_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_2_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_2_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_2_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_3_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_3_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_3_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_3_0.parquet diff --git a/tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_4_0.parquet b/tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_4_0.parquet similarity index 100% rename from tests/hipscat_import/data/parquet_shards/order_1/dir_0/pixel_47/shard_4_0.parquet rename to tests/data/parquet_shards/order_1/dir_0/pixel_47/shard_4_0.parquet diff --git a/tests/hipscat_import/data/resume/Norder=0/Dir=0/Npix=11.parquet b/tests/data/resume/Norder=0/Dir=0/Npix=11.parquet similarity index 100% rename from tests/hipscat_import/data/resume/Norder=0/Dir=0/Npix=11.parquet rename to tests/data/resume/Norder=0/Dir=0/Npix=11.parquet diff --git a/tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=44.parquet b/tests/data/resume/Norder=1/Dir=0/Npix=44.parquet similarity index 100% rename from tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=44.parquet rename to tests/data/resume/Norder=1/Dir=0/Npix=44.parquet diff --git a/tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=45.parquet b/tests/data/resume/Norder=1/Dir=0/Npix=45.parquet similarity index 100% rename from tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=45.parquet rename to tests/data/resume/Norder=1/Dir=0/Npix=45.parquet diff --git a/tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=46.parquet b/tests/data/resume/Norder=1/Dir=0/Npix=46.parquet similarity index 100% rename from tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=46.parquet rename to tests/data/resume/Norder=1/Dir=0/Npix=46.parquet diff --git a/tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=47.parquet b/tests/data/resume/Norder=1/Dir=0/Npix=47.parquet similarity index 100% rename from tests/hipscat_import/data/resume/Norder=1/Dir=0/Npix=47.parquet rename to tests/data/resume/Norder=1/Dir=0/Npix=47.parquet diff --git a/tests/hipscat_import/data/resume/intermediate/mapping_histogram.binary b/tests/data/resume/intermediate/mapping_histogram.binary similarity index 100% rename from tests/hipscat_import/data/resume/intermediate/mapping_histogram.binary rename to tests/data/resume/intermediate/mapping_histogram.binary diff --git a/tests/hipscat_import/data/small_sky/catalog.csv b/tests/data/small_sky/catalog.csv similarity index 100% rename from tests/hipscat_import/data/small_sky/catalog.csv rename to tests/data/small_sky/catalog.csv diff --git a/tests/hipscat_import/data/small_sky_object_catalog/Norder=0/Dir=0/Npix=11.parquet b/tests/data/small_sky_object_catalog/Norder=0/Dir=0/Npix=11.parquet similarity index 52% rename from tests/hipscat_import/data/small_sky_object_catalog/Norder=0/Dir=0/Npix=11.parquet rename to tests/data/small_sky_object_catalog/Norder=0/Dir=0/Npix=11.parquet index e0cb8d94..229a20c8 100644 Binary files a/tests/hipscat_import/data/small_sky_object_catalog/Norder=0/Dir=0/Npix=11.parquet and b/tests/data/small_sky_object_catalog/Norder=0/Dir=0/Npix=11.parquet differ diff --git a/tests/data/small_sky_object_catalog/_common_metadata b/tests/data/small_sky_object_catalog/_common_metadata new file mode 100644 index 00000000..cc444c69 Binary files /dev/null and b/tests/data/small_sky_object_catalog/_common_metadata differ diff --git a/tests/data/small_sky_object_catalog/_metadata b/tests/data/small_sky_object_catalog/_metadata new file mode 100644 index 00000000..ffca8c5a Binary files /dev/null and b/tests/data/small_sky_object_catalog/_metadata differ diff --git a/tests/hipscat_import/data/small_sky_object_catalog/catalog_info.json b/tests/data/small_sky_object_catalog/catalog_info.json similarity index 100% rename from tests/hipscat_import/data/small_sky_object_catalog/catalog_info.json rename to tests/data/small_sky_object_catalog/catalog_info.json diff --git a/tests/data/small_sky_object_catalog/partition_info.csv b/tests/data/small_sky_object_catalog/partition_info.csv new file mode 100644 index 00000000..7c5eaac4 --- /dev/null +++ b/tests/data/small_sky_object_catalog/partition_info.csv @@ -0,0 +1,2 @@ +Norder,Npix,Dir +0,11,0 diff --git a/tests/data/small_sky_object_catalog/point_map.fits b/tests/data/small_sky_object_catalog/point_map.fits new file mode 100644 index 00000000..a57a1f76 Binary files /dev/null and b/tests/data/small_sky_object_catalog/point_map.fits differ diff --git a/tests/hipscat_import/data/small_sky_object_catalog/provenance_info.json b/tests/data/small_sky_object_catalog/provenance_info.json similarity index 52% rename from tests/hipscat_import/data/small_sky_object_catalog/provenance_info.json rename to tests/data/small_sky_object_catalog/provenance_info.json index 0c390d83..44c9152a 100644 --- a/tests/hipscat_import/data/small_sky_object_catalog/provenance_info.json +++ b/tests/data/small_sky_object_catalog/provenance_info.json @@ -5,46 +5,48 @@ "epoch": "J2000", "ra_column": "ra", "dec_column": "dec", - "version": "0.2.1", - "generation_date": "2024.01.09", + "version": "0.3.10.dev6+g5cb658f", + "generation_date": "2024.09.18", "tool_args": { - "tool_name": "hipscat_import", - "version": "0.2.1", + "tool_name": "hats_import", + "version": "0.3.6.dev16+g829fe47.d20240918", "runtime_args": { "catalog_name": "small_sky_object_catalog", - "output_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data/", + "output_path": ".", "output_artifact_name": "small_sky_object_catalog", - "tmp_dir": "", - "overwrite": true, - "dask_tmp": "", + "tmp_dir": "/tmp/tmpyxy0lnjn", + "dask_tmp": null, "dask_n_workers": 1, "dask_threads_per_worker": 1, - "catalog_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky_object_catalog", - "tmp_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky_object_catalog/intermediate", + "catalog_path": "small_sky_object_catalog", + "tmp_path": "/tmp/tmpyxy0lnjn/small_sky_object_catalog/intermediate", "epoch": "J2000", "catalog_type": "object", - "input_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky", + "input_path": "small_sky", "input_paths": [ - "file:///home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky/catalog.csv" + "small_sky/catalog.csv" ], "input_file_list": [], "ra_column": "ra", "dec_column": "dec", - "use_hipscat_index": false, + "use_healpix_29": false, "sort_columns": null, "constant_healpix_order": -1, - "highest_healpix_order": 7, + "lowest_healpix_order": 0, + "highest_healpix_order": 5, "pixel_threshold": 1000000, - "mapping_healpix_order": 7, + "mapping_healpix_order": 5, "debug_stats_only": false, "file_reader_info": { "input_reader_type": "CsvReader", "chunksize": 500000, "header": "infer", "schema_file": null, - "separator": ",", "column_names": null, - "type_map": {} + "type_map": null, + "parquet_kwargs": null, + "upath_kwargs": null, + "kwargs": {} } } } diff --git a/tests/hipscat_import/data/small_sky_parts/catalog_00_of_05.csv b/tests/data/small_sky_parts/catalog_00_of_05.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_parts/catalog_00_of_05.csv rename to tests/data/small_sky_parts/catalog_00_of_05.csv diff --git a/tests/hipscat_import/data/small_sky_parts/catalog_01_of_05.csv b/tests/data/small_sky_parts/catalog_01_of_05.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_parts/catalog_01_of_05.csv rename to tests/data/small_sky_parts/catalog_01_of_05.csv diff --git a/tests/hipscat_import/data/small_sky_parts/catalog_02_of_05.csv b/tests/data/small_sky_parts/catalog_02_of_05.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_parts/catalog_02_of_05.csv rename to tests/data/small_sky_parts/catalog_02_of_05.csv diff --git a/tests/hipscat_import/data/small_sky_parts/catalog_03_of_05.csv b/tests/data/small_sky_parts/catalog_03_of_05.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_parts/catalog_03_of_05.csv rename to tests/data/small_sky_parts/catalog_03_of_05.csv diff --git a/tests/hipscat_import/data/small_sky_parts/catalog_04_of_05.csv b/tests/data/small_sky_parts/catalog_04_of_05.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_parts/catalog_04_of_05.csv rename to tests/data/small_sky_parts/catalog_04_of_05.csv diff --git a/tests/hipscat_import/data/small_sky_parts/catalog_10_of_05.csv b/tests/data/small_sky_parts/catalog_10_of_05.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_parts/catalog_10_of_05.csv rename to tests/data/small_sky_parts/catalog_10_of_05.csv diff --git a/tests/hipscat_import/data/small_sky_source/small_sky_source.csv b/tests/data/small_sky_source/small_sky_source.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_source/small_sky_source.csv rename to tests/data/small_sky_source/small_sky_source.csv diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=0/Dir=0/Npix=4.parquet b/tests/data/small_sky_source_catalog/Norder=0/Dir=0/Npix=4.parquet similarity index 51% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=0/Dir=0/Npix=4.parquet rename to tests/data/small_sky_source_catalog/Norder=0/Dir=0/Npix=4.parquet index 30b43a52..ac897bc3 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=0/Dir=0/Npix=4.parquet and b/tests/data/small_sky_source_catalog/Norder=0/Dir=0/Npix=4.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=1/Dir=0/Npix=47.parquet b/tests/data/small_sky_source_catalog/Norder=1/Dir=0/Npix=47.parquet similarity index 95% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=1/Dir=0/Npix=47.parquet rename to tests/data/small_sky_source_catalog/Norder=1/Dir=0/Npix=47.parquet index bb20f73d..81b1c57f 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=1/Dir=0/Npix=47.parquet and b/tests/data/small_sky_source_catalog/Norder=1/Dir=0/Npix=47.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=176.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=176.parquet similarity index 80% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=176.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=176.parquet index 5186c58f..fa0f9aa3 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=176.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=176.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=177.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=177.parquet similarity index 93% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=177.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=177.parquet index 9ce5e005..73dae0cb 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=177.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=177.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=178.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=178.parquet similarity index 94% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=178.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=178.parquet index 133f899e..ff6c2576 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=178.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=178.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=179.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=179.parquet similarity index 94% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=179.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=179.parquet index 8454ed03..fdca8693 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=179.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=179.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=180.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=180.parquet similarity index 87% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=180.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=180.parquet index 83b2806b..cc0cd514 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=180.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=180.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=181.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=181.parquet similarity index 89% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=181.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=181.parquet index 3af86fb2..eade2729 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=181.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=181.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=182.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=182.parquet similarity index 92% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=182.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=182.parquet index 66606f31..05b2fe1a 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=182.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=182.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=183.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=183.parquet similarity index 91% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=183.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=183.parquet index 33b3d8e8..179ce9b4 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=183.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=183.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=184.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=184.parquet similarity index 93% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=184.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=184.parquet index 0382d917..9af897c8 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=184.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=184.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=185.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=185.parquet similarity index 96% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=185.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=185.parquet index c3e7edce..83cb6428 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=185.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=185.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=186.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=186.parquet similarity index 82% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=186.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=186.parquet index a53f3490..93f2ee68 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=186.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=186.parquet differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=187.parquet b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=187.parquet similarity index 87% rename from tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=187.parquet rename to tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=187.parquet index 9eacc4ec..3fdb219f 100644 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=187.parquet and b/tests/data/small_sky_source_catalog/Norder=2/Dir=0/Npix=187.parquet differ diff --git a/tests/data/small_sky_source_catalog/_common_metadata b/tests/data/small_sky_source_catalog/_common_metadata new file mode 100644 index 00000000..1cf5b8d9 Binary files /dev/null and b/tests/data/small_sky_source_catalog/_common_metadata differ diff --git a/tests/data/small_sky_source_catalog/_metadata b/tests/data/small_sky_source_catalog/_metadata new file mode 100644 index 00000000..d142edfc Binary files /dev/null and b/tests/data/small_sky_source_catalog/_metadata differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/catalog_info.json b/tests/data/small_sky_source_catalog/catalog_info.json similarity index 100% rename from tests/hipscat_import/data/small_sky_source_catalog/catalog_info.json rename to tests/data/small_sky_source_catalog/catalog_info.json diff --git a/tests/hipscat_import/data/small_sky_source_catalog/partition_info.csv b/tests/data/small_sky_source_catalog/partition_info.csv similarity index 100% rename from tests/hipscat_import/data/small_sky_source_catalog/partition_info.csv rename to tests/data/small_sky_source_catalog/partition_info.csv diff --git a/tests/data/small_sky_source_catalog/point_map.fits b/tests/data/small_sky_source_catalog/point_map.fits new file mode 100644 index 00000000..dc28a293 Binary files /dev/null and b/tests/data/small_sky_source_catalog/point_map.fits differ diff --git a/tests/data/small_sky_source_catalog/provenance_info.json b/tests/data/small_sky_source_catalog/provenance_info.json new file mode 100644 index 00000000..45ae7214 --- /dev/null +++ b/tests/data/small_sky_source_catalog/provenance_info.json @@ -0,0 +1,53 @@ +{ + "catalog_name": "small_sky_source_catalog", + "catalog_type": "source", + "total_rows": 17161, + "epoch": "J2000", + "ra_column": "source_ra", + "dec_column": "source_dec", + "version": "0.3.10.dev6+g5cb658f", + "generation_date": "2024.09.18", + "tool_args": { + "tool_name": "hats_import", + "version": "0.3.6.dev16+g829fe47.d20240918", + "runtime_args": { + "catalog_name": "small_sky_source_catalog", + "output_path": ".", + "output_artifact_name": "small_sky_source_catalog", + "tmp_dir": "/tmp/tmponzx3o05", + "dask_tmp": null, + "dask_n_workers": 1, + "dask_threads_per_worker": 1, + "catalog_path": "small_sky_source_catalog", + "tmp_path": "/tmp/tmponzx3o05/small_sky_source_catalog/intermediate", + "epoch": "J2000", + "catalog_type": "source", + "input_path": "small_sky_source", + "input_paths": [ + "small_sky_source/small_sky_source.csv" + ], + "input_file_list": [], + "ra_column": "source_ra", + "dec_column": "source_dec", + "use_healpix_29": false, + "sort_columns": null, + "constant_healpix_order": -1, + "lowest_healpix_order": 0, + "highest_healpix_order": 5, + "pixel_threshold": 3000, + "mapping_healpix_order": 5, + "debug_stats_only": false, + "file_reader_info": { + "input_reader_type": "CsvReader", + "chunksize": 500000, + "header": "infer", + "schema_file": null, + "column_names": null, + "type_map": null, + "parquet_kwargs": null, + "upath_kwargs": null, + "kwargs": {} + } + } + } +} diff --git a/tests/hipscat_import/data/soap_intermediate/0_4.csv b/tests/data/soap_intermediate/0_4.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/0_4.csv rename to tests/data/soap_intermediate/0_4.csv diff --git a/tests/hipscat_import/data/soap_intermediate/1_47.csv b/tests/data/soap_intermediate/1_47.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/1_47.csv rename to tests/data/soap_intermediate/1_47.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_176.csv b/tests/data/soap_intermediate/2_176.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_176.csv rename to tests/data/soap_intermediate/2_176.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_177.csv b/tests/data/soap_intermediate/2_177.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_177.csv rename to tests/data/soap_intermediate/2_177.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_178.csv b/tests/data/soap_intermediate/2_178.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_178.csv rename to tests/data/soap_intermediate/2_178.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_179.csv b/tests/data/soap_intermediate/2_179.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_179.csv rename to tests/data/soap_intermediate/2_179.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_180.csv b/tests/data/soap_intermediate/2_180.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_180.csv rename to tests/data/soap_intermediate/2_180.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_181.csv b/tests/data/soap_intermediate/2_181.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_181.csv rename to tests/data/soap_intermediate/2_181.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_182.csv b/tests/data/soap_intermediate/2_182.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_182.csv rename to tests/data/soap_intermediate/2_182.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_183.csv b/tests/data/soap_intermediate/2_183.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_183.csv rename to tests/data/soap_intermediate/2_183.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_184.csv b/tests/data/soap_intermediate/2_184.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_184.csv rename to tests/data/soap_intermediate/2_184.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_185.csv b/tests/data/soap_intermediate/2_185.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_185.csv rename to tests/data/soap_intermediate/2_185.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_186.csv b/tests/data/soap_intermediate/2_186.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_186.csv rename to tests/data/soap_intermediate/2_186.csv diff --git a/tests/hipscat_import/data/soap_intermediate/2_187.csv b/tests/data/soap_intermediate/2_187.csv similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/2_187.csv rename to tests/data/soap_intermediate/2_187.csv diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_0_4.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_0_4.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_0_4.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_0_4.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_1_47.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_1_47.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_1_47.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_1_47.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_176.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_176.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_176.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_176.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_177.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_177.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_177.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_177.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_178.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_178.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_178.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_178.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_179.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_179.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_179.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_179.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_180.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_180.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_180.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_180.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_181.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_181.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_181.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_181.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_182.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_182.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_182.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_182.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_183.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_183.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_183.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_183.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_184.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_184.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_184.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_184.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_185.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_185.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_185.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_185.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_186.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_186.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_186.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_186.parquet diff --git a/tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_187.parquet b/tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_187.parquet similarity index 100% rename from tests/hipscat_import/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_187.parquet rename to tests/data/soap_intermediate/order_0/dir_0/pixel_11/source_2_187.parquet diff --git a/tests/hipscat_import/data/test_formats/catalog.csv.gz b/tests/data/test_formats/catalog.csv.gz similarity index 100% rename from tests/hipscat_import/data/test_formats/catalog.csv.gz rename to tests/data/test_formats/catalog.csv.gz diff --git a/tests/hipscat_import/data/test_formats/catalog.starr b/tests/data/test_formats/catalog.starr similarity index 100% rename from tests/hipscat_import/data/test_formats/catalog.starr rename to tests/data/test_formats/catalog.starr diff --git a/tests/hipscat_import/data/test_formats/catalog.zip b/tests/data/test_formats/catalog.zip similarity index 100% rename from tests/hipscat_import/data/test_formats/catalog.zip rename to tests/data/test_formats/catalog.zip diff --git a/tests/hipscat_import/data/test_formats/gaia_epoch.ecsv b/tests/data/test_formats/gaia_epoch.ecsv similarity index 100% rename from tests/hipscat_import/data/test_formats/gaia_epoch.ecsv rename to tests/data/test_formats/gaia_epoch.ecsv diff --git a/tests/hipscat_import/data/test_formats/gaia_minimum.csv b/tests/data/test_formats/gaia_minimum.csv similarity index 100% rename from tests/hipscat_import/data/test_formats/gaia_minimum.csv rename to tests/data/test_formats/gaia_minimum.csv diff --git a/tests/hipscat_import/data/test_formats/gaia_minimum_schema.parquet b/tests/data/test_formats/gaia_minimum_schema.parquet similarity index 100% rename from tests/hipscat_import/data/test_formats/gaia_minimum_schema.parquet rename to tests/data/test_formats/gaia_minimum_schema.parquet diff --git a/tests/hipscat_import/data/test_formats/headers.csv b/tests/data/test_formats/headers.csv similarity index 100% rename from tests/hipscat_import/data/test_formats/headers.csv rename to tests/data/test_formats/headers.csv diff --git a/tests/data/test_formats/healpix_29_index.parquet b/tests/data/test_formats/healpix_29_index.parquet new file mode 100644 index 00000000..365eb900 Binary files /dev/null and b/tests/data/test_formats/healpix_29_index.parquet differ diff --git a/tests/hipscat_import/data/test_formats/hipscat_index.csv b/tests/data/test_formats/hipscat_index.csv similarity index 99% rename from tests/hipscat_import/data/test_formats/hipscat_index.csv rename to tests/data/test_formats/hipscat_index.csv index 0b4f8a78..b64f2fe6 100644 --- a/tests/hipscat_import/data/test_formats/hipscat_index.csv +++ b/tests/data/test_formats/hipscat_index.csv @@ -1,4 +1,4 @@ -id,_hipscat_index,magnitude,nobs +id,_healpix_29,magnitude,nobs 707,12749688880727326720,22.13496609,264 792,12751184493818150912,6.487240283,395 811,12753202806647685120,23.7801059,268 diff --git a/tests/hipscat_import/data/test_formats/macauff_metadata.yaml b/tests/data/test_formats/macauff_metadata.yaml similarity index 100% rename from tests/hipscat_import/data/test_formats/macauff_metadata.yaml rename to tests/data/test_formats/macauff_metadata.yaml diff --git a/tests/hipscat_import/data/test_formats/pandasindex.parquet b/tests/data/test_formats/pandasindex.parquet similarity index 100% rename from tests/hipscat_import/data/test_formats/pandasindex.parquet rename to tests/data/test_formats/pandasindex.parquet diff --git a/tests/hipscat_import/data/test_formats/pipe_delimited.csv b/tests/data/test_formats/pipe_delimited.csv similarity index 100% rename from tests/hipscat_import/data/test_formats/pipe_delimited.csv rename to tests/data/test_formats/pipe_delimited.csv diff --git a/tests/hipscat_import/data/test_formats/small_sky.fits b/tests/data/test_formats/small_sky.fits similarity index 100% rename from tests/hipscat_import/data/test_formats/small_sky.fits rename to tests/data/test_formats/small_sky.fits diff --git a/tests/hipscat_import/catalog/test_argument_validation.py b/tests/hats_import/catalog/test_argument_validation.py similarity index 96% rename from tests/hipscat_import/catalog/test_argument_validation.py rename to tests/hats_import/catalog/test_argument_validation.py index 2f98f431..3612e69e 100644 --- a/tests/hipscat_import/catalog/test_argument_validation.py +++ b/tests/hats_import/catalog/test_argument_validation.py @@ -1,10 +1,10 @@ """Tests of argument validation""" import pytest -from hipscat.io import write_metadata +from hats.io import write_metadata -from hipscat_import.catalog.arguments import ImportArguments, check_healpix_order_range -from hipscat_import.catalog.file_readers import CsvReader +from hats_import.catalog.arguments import ImportArguments, check_healpix_order_range +from hats_import.catalog.file_readers import CsvReader # pylint: disable=protected-access @@ -185,14 +185,14 @@ def test_catalog_type(blank_data_dir, tmp_path): ) -def test_use_hipscat_index(blank_data_dir, tmp_path): +def test_use_healpix_29(blank_data_dir, tmp_path): with pytest.raises(ValueError, match="no sort columns should be added"): ImportArguments( output_artifact_name="catalog", input_path=blank_data_dir, file_reader="csv", output_path=tmp_path, - use_hipscat_index=True, + use_healpix_29=True, sort_columns="foo", ) ImportArguments( @@ -200,7 +200,7 @@ def test_use_hipscat_index(blank_data_dir, tmp_path): input_path=blank_data_dir, file_reader="csv", output_path=tmp_path, - use_hipscat_index=True, + use_healpix_29=True, sort_columns="", # empty string is ok ) diff --git a/tests/hipscat_import/catalog/test_file_readers.py b/tests/hats_import/catalog/test_file_readers.py similarity index 98% rename from tests/hipscat_import/catalog/test_file_readers.py rename to tests/hats_import/catalog/test_file_readers.py index 3bab85df..7da36ba3 100644 --- a/tests/hipscat_import/catalog/test_file_readers.py +++ b/tests/hats_import/catalog/test_file_readers.py @@ -1,14 +1,14 @@ """Test dataframe-generating file readers""" -import hipscat.io.write_metadata as io +import hats.io.write_metadata as io import numpy as np import pandas as pd import pyarrow as pa import pyarrow.parquet as pq import pytest -from hipscat.catalog.catalog import CatalogInfo +from hats.catalog.catalog import CatalogInfo -from hipscat_import.catalog.file_readers import ( +from hats_import.catalog.file_readers import ( CsvReader, FitsReader, IndexedCsvReader, diff --git a/tests/hipscat_import/catalog/test_map_reduce.py b/tests/hats_import/catalog/test_map_reduce.py similarity index 95% rename from tests/hipscat_import/catalog/test_map_reduce.py rename to tests/hats_import/catalog/test_map_reduce.py index 0cc67580..6d72f8b6 100644 --- a/tests/hipscat_import/catalog/test_map_reduce.py +++ b/tests/hats_import/catalog/test_map_reduce.py @@ -4,18 +4,18 @@ import pickle from io import StringIO -import hipscat.pixel_math as hist -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math as hist +import hats.pixel_math.healpix_shim as hp import numpy as np import numpy.testing as npt import pandas as pd import pyarrow as pa import pytest -import hipscat_import.catalog.map_reduce as mr -from hipscat_import.catalog.file_readers import get_file_reader -from hipscat_import.catalog.resume_plan import ResumePlan -from hipscat_import.catalog.sparse_histogram import SparseHistogram +import hats_import.catalog.map_reduce as mr +from hats_import.catalog.file_readers import get_file_reader +from hats_import.catalog.resume_plan import ResumePlan +from hats_import.catalog.sparse_histogram import SparseHistogram def pickle_file_reader(tmp_path, file_reader) -> str: @@ -148,7 +148,7 @@ def test_map_headers(tmp_path, formats_headers_csv): assert (result == expected).all() -def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file): +def test_map_with_healpix_29(tmp_path, formats_dir, small_sky_single_file): (tmp_path / "histograms").mkdir(parents=True) input_file = formats_dir / "hipscat_index.csv" mr.map_to_pixels( @@ -157,7 +157,7 @@ def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file): highest_order=0, ra_column="NOPE", dec_column="NOPE", - use_hipscat_index=True, # radec don't matter. just use existing index + use_healpix_29=True, # radec don't matter. just use existing index resume_path=tmp_path, mapping_key="map_0", ) @@ -175,7 +175,7 @@ def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file): highest_order=0, ra_column="NOPE", dec_column="NOPE", - use_hipscat_index=True, # no pre-existing index! expect failure. + use_healpix_29=True, # no pre-existing index! expect failure. resume_path=tmp_path, mapping_key="map_0", ) @@ -318,7 +318,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path): destination_pixel_number=11, destination_pixel_size=131, output_path=tmp_path, - add_hipscat_index=True, + add_healpix_29=True, ra_column="ra", dec_column="dec", sort_columns="id", @@ -331,8 +331,8 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path): assert_parquet_file_ids(output_file, "id", expected_ids) -def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_path): - """Test reducing with or without a _hipscat_index field""" +def test_reduce_healpix_29(parquet_shards_dir, assert_parquet_file_ids, tmp_path): + """Test reducing with or without a _healpix_29 field""" (tmp_path / "reducing").mkdir(parents=True) mr.reduce_pixel_shards( cache_shard_path=parquet_shards_dir, @@ -353,7 +353,7 @@ def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_p expected_ids = [*range(700, 831)] assert_parquet_file_ids(output_file, "id", expected_ids) data_frame = pd.read_parquet(output_file, engine="pyarrow") - assert data_frame.index.name == "_hipscat_index" + assert data_frame.index.name == "_healpix_29" npt.assert_array_equal( data_frame.columns, ["id", "ra", "dec", "ra_error", "dec_error", "Norder", "Dir", "Npix"], @@ -367,7 +367,7 @@ def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_p destination_pixel_number=11, destination_pixel_size=131, output_path=tmp_path, - add_hipscat_index=False, ## different from above + add_healpix_29=False, ## different from above ra_column="ra", dec_column="dec", sort_columns="id", @@ -444,7 +444,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path): lonlat=True, nest=True, ) - ## Use this to prune generated columns like Norder, Npix, and _hipscat_index + ## Use this to prune generated columns like Norder, Npix, and _healpix_29 comparison_columns = ["source_id", "object_id", "time", "ra", "dec"] ######################## Sort option 1: by source_id @@ -535,7 +535,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path): ra_column="ra", dec_column="dec", sort_columns="object_id,time", - add_hipscat_index=False, + add_healpix_29=False, delete_input_files=False, ) diff --git a/tests/hipscat_import/catalog/test_resume_plan.py b/tests/hats_import/catalog/test_resume_plan.py similarity index 98% rename from tests/hipscat_import/catalog/test_resume_plan.py rename to tests/hats_import/catalog/test_resume_plan.py index 705e6079..73fb1f11 100644 --- a/tests/hipscat_import/catalog/test_resume_plan.py +++ b/tests/hats_import/catalog/test_resume_plan.py @@ -4,8 +4,8 @@ import numpy.testing as npt import pytest -from hipscat_import.catalog.resume_plan import ResumePlan -from hipscat_import.catalog.sparse_histogram import SparseHistogram +from hats_import.catalog.resume_plan import ResumePlan +from hats_import.catalog.sparse_histogram import SparseHistogram def test_done_checks(tmp_path): diff --git a/tests/hipscat_import/catalog/test_run_import.py b/tests/hats_import/catalog/test_run_import.py similarity index 94% rename from tests/hipscat_import/catalog/test_run_import.py rename to tests/hats_import/catalog/test_run_import.py index 96612615..9bd5b77f 100644 --- a/tests/hipscat_import/catalog/test_run_import.py +++ b/tests/hats_import/catalog/test_run_import.py @@ -9,13 +9,13 @@ import pyarrow as pa import pyarrow.parquet as pq import pytest -from hipscat.catalog.catalog import Catalog +from hats.catalog.catalog import Catalog -import hipscat_import.catalog.run_import as runner -from hipscat_import.catalog.arguments import ImportArguments -from hipscat_import.catalog.file_readers import CsvReader -from hipscat_import.catalog.resume_plan import ResumePlan -from hipscat_import.catalog.sparse_histogram import SparseHistogram +import hats_import.catalog.run_import as runner +from hats_import.catalog.arguments import ImportArguments +from hats_import.catalog.file_readers import CsvReader +from hats_import.catalog.resume_plan import ResumePlan +from hats_import.catalog.sparse_histogram import SparseHistogram def test_empty_args(): @@ -79,7 +79,7 @@ def test_resume_dask_runner( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.ra_column == "ra" @@ -111,7 +111,7 @@ def test_resume_dask_runner( runner.run(args, dask_client) - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.ra_column == "ra" @@ -177,7 +177,7 @@ def test_resume_dask_runner_diff_pixel_order( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.ra_column == "ra" @@ -262,7 +262,7 @@ def test_dask_runner( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.ra_column == "ra" @@ -287,7 +287,7 @@ def test_dask_runner( pa.field("Norder", pa.uint8()), pa.field("Dir", pa.uint64()), pa.field("Npix", pa.uint64()), - pa.field("_hipscat_index", pa.uint64()), + pa.field("_healpix_29", pa.uint64()), ] ) schema = pq.read_metadata(output_file).schema.to_arrow_schema() @@ -337,7 +337,7 @@ def test_dask_runner_stats_only(dask_client, small_sky_parts_dir, tmp_path): assert not os.path.exists(output_file) - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.ra_column == "ra" diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hats_import/catalog/test_run_round_trip.py similarity index 93% rename from tests/hipscat_import/catalog/test_run_round_trip.py rename to tests/hats_import/catalog/test_run_round_trip.py index b488f320..db0e507d 100644 --- a/tests/hipscat_import/catalog/test_run_round_trip.py +++ b/tests/hats_import/catalog/test_run_round_trip.py @@ -15,12 +15,12 @@ import pyarrow.dataset as pds import pyarrow.parquet as pq import pytest -from hipscat.catalog.catalog import Catalog -from hipscat.pixel_math.hipscat_id import hipscat_id_to_healpix +from hats.catalog.catalog import Catalog +from hats.pixel_math.hipscat_id import hipscat_id_to_healpix -import hipscat_import.catalog.run_import as runner -from hipscat_import.catalog.arguments import ImportArguments -from hipscat_import.catalog.file_readers import CsvReader, get_file_reader +import hats_import.catalog.run_import as runner +from hats_import.catalog.arguments import ImportArguments +from hats_import.catalog.file_readers import CsvReader, get_file_reader @pytest.mark.dask @@ -51,7 +51,7 @@ def test_import_source_table( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.ra_column == "source_ra" @@ -111,7 +111,7 @@ def test_import_mixed_schema_csv( pa.field("Norder", pa.uint8()), pa.field("Dir", pa.uint64()), pa.field("Npix", pa.uint64()), - pa.field("_hipscat_index", pa.uint64()), + pa.field("_healpix_29", pa.uint64()), ] ) schema = pq.read_metadata(output_file).schema.to_arrow_schema() @@ -159,7 +159,7 @@ def test_import_preserve_index( input_file_list=[formats_pandasindex], file_reader="parquet", sort_columns="obs_id", - add_hipscat_index=False, + add_healpix_29=False, output_path=tmp_path, dask_tmp=tmp_path, highest_healpix_order=1, @@ -185,7 +185,7 @@ def test_import_preserve_index( input_file_list=[formats_pandasindex], file_reader="parquet", sort_columns="obs_id", - add_hipscat_index=True, + add_healpix_29=True, output_path=tmp_path, dask_tmp=tmp_path, highest_healpix_order=1, @@ -198,7 +198,7 @@ def test_import_preserve_index( output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet") data_frame = pd.read_parquet(output_file, engine="pyarrow") - assert data_frame.index.name == "_hipscat_index" + assert data_frame.index.name == "_healpix_29" npt.assert_array_equal( data_frame.columns, ["obs_id", "obj_id", "band", "ra", "dec", "mag", "Norder", "Dir", "Npix"], @@ -229,7 +229,7 @@ def test_import_constant_healpix_order( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path # Check that the partition info file exists - all pixels at order 2! @@ -270,7 +270,7 @@ def test_import_keep_intermediate_files( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path @@ -424,7 +424,7 @@ def test_import_lowest_healpix_order( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path # Check that the partition info file exists - all pixels at order 2! @@ -475,7 +475,7 @@ def test_import_starr_file( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.total_rows == 131 @@ -489,32 +489,32 @@ def test_import_starr_file( @pytest.mark.dask -def test_import_hipscat_index( +def test_import_healpix_29( dask_client, formats_dir, assert_parquet_file_ids, tmp_path, ): - """Test basic execution, using a previously-computed _hipscat_index column for spatial partitioning.""" + """Test basic execution, using a previously-computed _healpix_29 column for spatial partitioning.""" ## First, let's just check the assumptions we have about our input file: - ## - should have _hipscat_index as the indexed column + ## - should have _healpix_29 as the indexed column ## - should NOT have any columns like "ra" or "dec" - input_file = formats_dir / "hipscat_index.parquet" + input_file = formats_dir / "healpix_29_index.parquet" expected_ids = [*range(700, 831)] assert_parquet_file_ids(input_file, "id", expected_ids) data_frame = pd.read_parquet(input_file, engine="pyarrow") - assert data_frame.index.name == "_hipscat_index" + assert data_frame.index.name == "_healpix_29" npt.assert_array_equal(data_frame.columns, ["id"]) args = ImportArguments( - output_artifact_name="using_hipscat_index", + output_artifact_name="using_healpix_29", input_file_list=[input_file], file_reader="parquet", output_path=tmp_path, dask_tmp=tmp_path, - use_hipscat_index=True, + use_healpix_29=True, highest_healpix_order=2, pixel_threshold=3_000, progress_bar=False, @@ -523,7 +523,7 @@ def test_import_hipscat_index( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.total_rows == 131 @@ -535,7 +535,7 @@ def test_import_hipscat_index( expected_ids = [*range(700, 831)] assert_parquet_file_ids(output_file, "id", expected_ids) data_frame = pd.read_parquet(output_file, engine="pyarrow") - assert data_frame.index.name == "_hipscat_index" + assert data_frame.index.name == "_healpix_29" npt.assert_array_equal( data_frame.columns, ["id", "Norder", "Dir", "Npix"], @@ -543,21 +543,21 @@ def test_import_hipscat_index( @pytest.mark.dask -def test_import_hipscat_index_no_pandas( +def test_import_healpix_29_no_pandas( dask_client, formats_dir, assert_parquet_file_ids, tmp_path, ): - """Test basic execution, using a previously-computed _hipscat_index column for spatial partitioning.""" + """Test basic execution, using a previously-computed _healpix_29 column for spatial partitioning.""" input_file = formats_dir / "hipscat_index.csv" args = ImportArguments( - output_artifact_name="using_hipscat_index", + output_artifact_name="using_healpix_29", input_file_list=[input_file], file_reader="csv", output_path=tmp_path, dask_tmp=tmp_path, - use_hipscat_index=True, + use_healpix_29=True, highest_healpix_order=2, pixel_threshold=3_000, progress_bar=False, @@ -566,7 +566,7 @@ def test_import_hipscat_index_no_pandas( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.total_rows == 131 @@ -578,7 +578,7 @@ def test_import_hipscat_index_no_pandas( expected_ids = [*range(700, 831)] assert_parquet_file_ids(output_file, "id", expected_ids) data_frame = pd.read_parquet(output_file, engine="pyarrow") - assert data_frame.index.name == "_hipscat_index" + assert data_frame.index.name == "_healpix_29" npt.assert_array_equal( data_frame.columns, ["id", "magnitude", "nobs", "Norder", "Dir", "Npix"], @@ -616,7 +616,7 @@ def test_import_gaia_minimum( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.total_rows == 5 @@ -627,7 +627,7 @@ def test_import_gaia_minimum( data_frame = pd.read_parquet(output_file) # Make sure that the hipscat index values match the pixel for the partition (0,5) - assert data_frame.index.name == "_hipscat_index" + assert data_frame.index.name == "_healpix_29" hipscat_index_pixels = hipscat_id_to_healpix(data_frame.index.values, 0) npt.assert_array_equal(hipscat_index_pixels, [5, 5, 5]) @@ -663,7 +663,7 @@ def test_gaia_ecsv( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert catalog.catalog_info.total_rows == 3 @@ -729,7 +729,7 @@ def test_gaia_ecsv( pa.field("Norder", pa.uint8()), pa.field("Dir", pa.uint64()), pa.field("Npix", pa.uint64()), - pa.field("_hipscat_index", pa.uint64()), + pa.field("_healpix_29", pa.uint64()), ] ) @@ -782,7 +782,7 @@ def test_import_indexed_csv( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert len(catalog.get_healpix_pixels()) == 1 diff --git a/tests/hipscat_import/catalog/test_sparse_histogram.py b/tests/hats_import/catalog/test_sparse_histogram.py similarity index 97% rename from tests/hipscat_import/catalog/test_sparse_histogram.py rename to tests/hats_import/catalog/test_sparse_histogram.py index 57ce78f2..50b9e68f 100644 --- a/tests/hipscat_import/catalog/test_sparse_histogram.py +++ b/tests/hats_import/catalog/test_sparse_histogram.py @@ -5,7 +5,7 @@ import pytest from scipy.sparse import csr_array -from hipscat_import.catalog.sparse_histogram import SparseHistogram +from hats_import.catalog.sparse_histogram import SparseHistogram def test_read_write_round_trip(tmp_path): diff --git a/tests/hipscat_import/conftest.py b/tests/hats_import/conftest.py similarity index 98% rename from tests/hipscat_import/conftest.py rename to tests/hats_import/conftest.py index 1cd8cbf2..7ef5947e 100644 --- a/tests/hipscat_import/conftest.py +++ b/tests/hats_import/conftest.py @@ -8,7 +8,7 @@ import numpy.testing as npt import pandas as pd import pytest -from hipscat import pixel_math +from hats import pixel_math # pylint: disable=missing-function-docstring, redefined-outer-name @@ -52,7 +52,7 @@ def test_long_running(): @pytest.fixture def test_data_dir(): - return Path(TEST_DIR) / "data" + return Path(TEST_DIR).parent / "data" @pytest.fixture @@ -176,7 +176,7 @@ def basic_data_shard_df(): test_df = pd.DataFrame( data=zip(hipscat_indexes, ras, dec, norder, npix), columns=[ - "_hipscat_index", + "_healpix_29", "weird_ra", "weird_dec", "Norder", @@ -197,7 +197,7 @@ def polar_data_shard_df(): test_df = pd.DataFrame( data=zip(hipscat_indexes, ras, dec, norder, npix), columns=[ - "_hipscat_index", + "_healpix_29", "weird_ra", "weird_dec", "Norder", diff --git a/tests/hipscat_import/index/test_index_argument.py b/tests/hats_import/index/test_index_argument.py similarity index 94% rename from tests/hipscat_import/index/test_index_argument.py rename to tests/hats_import/index/test_index_argument.py index 2d97432e..706c2075 100644 --- a/tests/hipscat_import/index/test_index_argument.py +++ b/tests/hats_import/index/test_index_argument.py @@ -4,7 +4,7 @@ import pytest -from hipscat_import.index.arguments import IndexArguments +from hats_import.index.arguments import IndexArguments def test_none(): @@ -83,7 +83,7 @@ def test_column_inclusion_args(tmp_path, small_sky_object_catalog): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - include_hipscat_index=False, + include_healpix_29=False, include_order_pixel=False, ) _ = IndexArguments( @@ -91,7 +91,7 @@ def test_column_inclusion_args(tmp_path, small_sky_object_catalog): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - include_hipscat_index=True, + include_healpix_29=True, include_order_pixel=True, ) @@ -100,7 +100,7 @@ def test_column_inclusion_args(tmp_path, small_sky_object_catalog): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - include_hipscat_index=True, + include_healpix_29=True, include_order_pixel=False, ) _ = IndexArguments( @@ -108,7 +108,7 @@ def test_column_inclusion_args(tmp_path, small_sky_object_catalog): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - include_hipscat_index=False, + include_healpix_29=False, include_order_pixel=True, ) @@ -119,9 +119,9 @@ def test_extra_columns(tmp_path, small_sky_object_catalog): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - extra_columns=["_hipscat_index"], + extra_columns=["_healpix_29"], ) - assert args.extra_columns == ["_hipscat_index"] + assert args.extra_columns == ["_healpix_29"] args = IndexArguments( input_catalog_path=small_sky_object_catalog, @@ -161,7 +161,7 @@ def test_to_catalog_info(small_sky_object_catalog, tmp_path): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - include_hipscat_index=True, + include_healpix_29=True, include_order_pixel=True, ) catalog_info = args.to_catalog_info(total_rows=10) @@ -176,7 +176,7 @@ def test_provenance_info(small_sky_object_catalog, tmp_path): indexing_column="id", output_path=tmp_path, output_artifact_name="small_sky_object_index", - include_hipscat_index=True, + include_healpix_29=True, include_order_pixel=True, ) diff --git a/tests/hipscat_import/index/test_index_map_reduce.py b/tests/hats_import/index/test_index_map_reduce.py similarity index 91% rename from tests/hipscat_import/index/test_index_map_reduce.py rename to tests/hats_import/index/test_index_map_reduce.py index 025d0f02..26ad233e 100644 --- a/tests/hipscat_import/index/test_index_map_reduce.py +++ b/tests/hats_import/index/test_index_map_reduce.py @@ -5,8 +5,8 @@ import pandas as pd import pytest -import hipscat_import.index.map_reduce as mr -from hipscat_import.index.arguments import IndexArguments +import hats_import.index.map_reduce as mr +from hats_import.index.arguments import IndexArguments @pytest.mark.dask @@ -34,19 +34,19 @@ def test_create_index( data_frame = pd.read_parquet(output_file, engine="pyarrow") npt.assert_array_equal( data_frame.columns, - ["_hipscat_index", "Norder", "Dir", "Npix"], + ["_healpix_29", "Norder", "Dir", "Npix"], ) assert data_frame.index.name == "id" assert (data_frame["Norder"] == 0).all() @pytest.mark.dask -def test_create_index_no_hipscat_index(small_sky_object_catalog, tmp_path, dask_client): - """Create an index for simple object catalog, without the _hipscat_index field.""" +def test_create_index_no_healpix_29(small_sky_object_catalog, tmp_path, dask_client): + """Create an index for simple object catalog, without the _healpix_29 field.""" args = IndexArguments( input_catalog_path=small_sky_object_catalog, indexing_column="id", - include_hipscat_index=False, + include_healpix_29=False, output_path=tmp_path, output_artifact_name="small_sky_object_index", progress_bar=False, @@ -77,7 +77,7 @@ def test_create_index_no_order_pixel(small_sky_object_catalog, tmp_path, dask_cl output_file = tmp_path / "small_sky_object_index" / "index" / "part.0.parquet" data_frame = pd.read_parquet(output_file, engine="pyarrow") - npt.assert_array_equal(data_frame.columns, ["_hipscat_index"]) + npt.assert_array_equal(data_frame.columns, ["_healpix_29"]) assert data_frame.index.name == "id" @@ -101,7 +101,7 @@ def test_create_index_source(small_sky_source_catalog, assert_parquet_file_index data_frame = pd.read_parquet(output_file, engine="pyarrow") npt.assert_array_equal( data_frame.columns, - ["_hipscat_index", "Norder", "Dir", "Npix"], + ["_healpix_29", "Norder", "Dir", "Npix"], ) assert data_frame.index.name == "source_id" assert len(data_frame) == 17161 @@ -140,7 +140,7 @@ def test_create_index_with_divisions( data_frame = pd.read_parquet(output_file, engine="pyarrow") npt.assert_array_equal( data_frame.columns, - ["_hipscat_index", "Norder", "Dir", "Npix"], + ["_healpix_29", "Norder", "Dir", "Npix"], ) assert data_frame.index.name == "source_id" assert len(data_frame) == 17161 @@ -173,7 +173,7 @@ def test_create_index_source_by_object( data_frame = pd.read_parquet(output_file, engine="pyarrow") npt.assert_array_equal( data_frame.columns, - ["_hipscat_index", "Norder", "Dir", "Npix"], + ["_healpix_29", "Norder", "Dir", "Npix"], ) assert data_frame.index.name == "object_id" assert len(data_frame) == 17161 @@ -205,7 +205,7 @@ def test_create_index_extra_columns( data_frame = pd.read_parquet(output_file, engine="pyarrow") npt.assert_array_equal( data_frame.columns, - ["_hipscat_index", "source_ra", "Norder", "Dir", "Npix"], + ["_healpix_29", "source_ra", "Norder", "Dir", "Npix"], ) assert data_frame.index.name == "object_id" assert len(data_frame) == 17161 diff --git a/tests/hipscat_import/index/test_run_index.py b/tests/hats_import/index/test_run_index.py similarity index 91% rename from tests/hipscat_import/index/test_run_index.py rename to tests/hats_import/index/test_run_index.py index a465e557..cc36ac68 100644 --- a/tests/hipscat_import/index/test_run_index.py +++ b/tests/hats_import/index/test_run_index.py @@ -6,10 +6,10 @@ import pyarrow as pa import pyarrow.parquet as pq import pytest -from hipscat.catalog.dataset.dataset import Dataset +from hats.catalog.dataset.dataset import Dataset -import hipscat_import.index.run_index as runner -from hipscat_import.index.arguments import IndexArguments +import hats_import.index.run_index as runner +from hats_import.index.arguments import IndexArguments def test_empty_args(): @@ -43,13 +43,13 @@ def test_run_index( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Dataset.read_from_hipscat(args.catalog_path) + catalog = Dataset.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path basic_index_parquet_schema = pa.schema( [ - pa.field("_hipscat_index", pa.uint64()), + pa.field("_healpix_29", pa.uint64()), pa.field("Norder", pa.uint8()), pa.field("Dir", pa.uint64()), pa.field("Npix", pa.uint64()), @@ -86,13 +86,13 @@ def test_run_index_on_source( runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Dataset.read_from_hipscat(args.catalog_path) + catalog = Dataset.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path basic_index_parquet_schema = pa.schema( [ - pa.field("_hipscat_index", pa.uint64()), + pa.field("_healpix_29", pa.uint64()), pa.field("Norder", pa.uint8()), pa.field("Dir", pa.uint64()), pa.field("Npix", pa.uint64()), @@ -125,13 +125,13 @@ def test_run_index_on_source_object_id( indexing_column="object_id", output_path=tmp_path, output_artifact_name="small_sky_source_object_id_index", - include_hipscat_index=False, + include_healpix_29=False, progress_bar=False, ) runner.run(args, dask_client) # Check that the catalog metadata file exists - catalog = Dataset.read_from_hipscat(args.catalog_path) + catalog = Dataset.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path diff --git a/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py b/tests/hats_import/margin_cache/test_arguments_margin_cache.py similarity index 96% rename from tests/hipscat_import/margin_cache/test_arguments_margin_cache.py rename to tests/hats_import/margin_cache/test_arguments_margin_cache.py index 9729a195..5fbbbf1e 100644 --- a/tests/hipscat_import/margin_cache/test_arguments_margin_cache.py +++ b/tests/hats_import/margin_cache/test_arguments_margin_cache.py @@ -1,10 +1,10 @@ """Tests of margin cache generation arguments""" import pytest -from hipscat.io import write_metadata -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.io import write_metadata +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments +from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments def test_empty_required(tmp_path): diff --git a/tests/hipscat_import/margin_cache/test_margin_cache.py b/tests/hats_import/margin_cache/test_margin_cache.py similarity index 85% rename from tests/hipscat_import/margin_cache/test_margin_cache.py rename to tests/hats_import/margin_cache/test_margin_cache.py index 3b79e25b..9543ac46 100644 --- a/tests/hipscat_import/margin_cache/test_margin_cache.py +++ b/tests/hats_import/margin_cache/test_margin_cache.py @@ -4,13 +4,13 @@ import numpy.testing as npt import pandas as pd import pytest -from hipscat.catalog import PartitionInfo -from hipscat.catalog.healpix_dataset.healpix_dataset import HealpixDataset -from hipscat.io import paths -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.catalog import PartitionInfo +from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset +from hats.io import paths +from hats.pixel_math.healpix_pixel import HealpixPixel -import hipscat_import.margin_cache.margin_cache as mc -from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments +import hats_import.margin_cache.margin_cache as mc +from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments @pytest.mark.dask(timeout=150) @@ -66,9 +66,9 @@ def test_margin_cache_gen(small_sky_source_catalog, tmp_path, dask_client): "margin_Npix", ], ) - assert data.index.name == "_hipscat_index" + assert data.index.name == "_healpix_29" - catalog = HealpixDataset.read_from_hipscat(args.catalog_path) + catalog = HealpixDataset.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path diff --git a/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py b/tests/hats_import/margin_cache/test_margin_cache_map_reduce.py similarity index 96% rename from tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py rename to tests/hats_import/margin_cache/test_margin_cache_map_reduce.py index 7e029423..b84c6131 100644 --- a/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py +++ b/tests/hats_import/margin_cache/test_margin_cache_map_reduce.py @@ -1,15 +1,15 @@ import os -import hipscat.pixel_math.healpix_shim as hp +import hats.pixel_math.healpix_shim as hp import numpy as np import pandas as pd import pytest -from hipscat import pixel_math -from hipscat.io import paths -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats import pixel_math +from hats.io import paths +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.margin_cache import margin_cache_map_reduce -from hipscat_import.pipeline_resume_plan import get_pixel_cache_directory +from hats_import.margin_cache import margin_cache_map_reduce +from hats_import.pipeline_resume_plan import get_pixel_cache_directory keep_cols = ["weird_ra", "weird_dec"] @@ -179,7 +179,7 @@ def test_reduce_margin_shards(tmp_path): test_df = pd.DataFrame( data=zip(hipscat_indexes, ras, dec, norder, ndir, npix, margin_order, margin_dir, margin_pixels), columns=[ - "_hipscat_index", + "_healpix_29", "weird_ra", "weird_dec", "Norder", diff --git a/tests/hipscat_import/margin_cache/test_margin_cache_resume_plan.py b/tests/hats_import/margin_cache/test_margin_cache_resume_plan.py similarity index 93% rename from tests/hipscat_import/margin_cache/test_margin_cache_resume_plan.py rename to tests/hats_import/margin_cache/test_margin_cache_resume_plan.py index 34087ada..f9b3a3a2 100644 --- a/tests/hipscat_import/margin_cache/test_margin_cache_resume_plan.py +++ b/tests/hats_import/margin_cache/test_margin_cache_resume_plan.py @@ -1,10 +1,10 @@ import numpy as np import numpy.testing as npt import pytest -from hipscat.catalog import Catalog +from hats.catalog import Catalog -from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments -from hipscat_import.margin_cache.margin_cache_resume_plan import ( +from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments +from hats_import.margin_cache.margin_cache_resume_plan import ( MarginCachePlan, _find_partition_margin_pixel_pairs, ) @@ -101,7 +101,7 @@ def test_some_reducing_task_failures(small_sky_margin_args, dask_client): def test_partition_margin_pixel_pairs(small_sky_source_catalog): """Ensure partition_margin_pixel_pairs can generate main partition pixels.""" - source_catalog = Catalog.read_from_hipscat(small_sky_source_catalog) + source_catalog = Catalog.read_hats(small_sky_source_catalog) margin_pairs = _find_partition_margin_pixel_pairs(source_catalog.get_healpix_pixels(), 3) expected = np.array([0, 2, 8, 10, 32, 34, 40, 42, 192, 192]) @@ -112,7 +112,7 @@ def test_partition_margin_pixel_pairs(small_sky_source_catalog): def test_partition_margin_pixel_pairs_negative(small_sky_source_catalog): """Ensure partition_margin_pixel_pairs can generate negative tree pixels.""" - source_catalog = Catalog.read_from_hipscat(small_sky_source_catalog) + source_catalog = Catalog.read_hats(small_sky_source_catalog) partition_stats = source_catalog.get_healpix_pixels() negative_pixels = source_catalog.generate_negative_tree_pixels() diff --git a/tests/hipscat_import/margin_cache/test_margin_round_trip.py b/tests/hats_import/margin_cache/test_margin_round_trip.py similarity index 81% rename from tests/hipscat_import/margin_cache/test_margin_round_trip.py rename to tests/hats_import/margin_cache/test_margin_round_trip.py index b557cb77..7fe87a0a 100644 --- a/tests/hipscat_import/margin_cache/test_margin_round_trip.py +++ b/tests/hats_import/margin_cache/test_margin_round_trip.py @@ -5,16 +5,16 @@ import pandas as pd import pytest -from hipscat.catalog.catalog import Catalog -from hipscat.catalog.healpix_dataset.healpix_dataset import HealpixDataset -from hipscat.io import paths -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.catalog.catalog import Catalog +from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset +from hats.io import paths +from hats.pixel_math.healpix_pixel import HealpixPixel -import hipscat_import.catalog.run_import as runner -import hipscat_import.margin_cache.margin_cache as mc -from hipscat_import.catalog.arguments import ImportArguments -from hipscat_import.catalog.file_readers import CsvReader, get_file_reader -from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments +import hats_import.catalog.run_import as runner +import hats_import.margin_cache.margin_cache as mc +from hats_import.catalog.arguments import ImportArguments +from hats_import.catalog.file_readers import CsvReader, get_file_reader +from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments @pytest.mark.dask(timeout=180) @@ -49,7 +49,7 @@ def test_margin_import_gaia_minimum( runner.run(args, dask_client) # Check that the catalog metadata file exists - Catalog.read_from_hipscat(args.catalog_path) + Catalog.read_hats(args.catalog_path) args = MarginCacheArguments( margin_threshold=180.0, @@ -61,7 +61,7 @@ def test_margin_import_gaia_minimum( ) mc.generate_margin_cache(args, dask_client) - catalog = HealpixDataset.read_from_hipscat(args.catalog_path) + catalog = HealpixDataset.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert len(catalog.get_healpix_pixels()) == 1 @@ -102,7 +102,7 @@ def test_margin_import_mixed_schema_csv( progress_bar=False, ) runner.run(args, dask_client) - catalog = Catalog.read_from_hipscat(args.catalog_path) + catalog = Catalog.read_hats(args.catalog_path) assert len(catalog.get_healpix_pixels()) == 8 args = MarginCacheArguments( @@ -115,7 +115,7 @@ def test_margin_import_mixed_schema_csv( ) mc.generate_margin_cache(args, dask_client) - catalog = HealpixDataset.read_from_hipscat(args.catalog_path) + catalog = HealpixDataset.read_hats(args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == args.catalog_path assert len(catalog.get_healpix_pixels()) == 5 diff --git a/tests/hipscat_import/soap/conftest.py b/tests/hats_import/soap/conftest.py similarity index 94% rename from tests/hipscat_import/soap/conftest.py rename to tests/hats_import/soap/conftest.py index 72161ca8..93400d92 100644 --- a/tests/hipscat_import/soap/conftest.py +++ b/tests/hats_import/soap/conftest.py @@ -1,7 +1,7 @@ import pytest -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.soap.arguments import SoapArguments +from hats_import.soap.arguments import SoapArguments @pytest.fixture diff --git a/tests/hipscat_import/soap/test_run_soap.py b/tests/hats_import/soap/test_run_soap.py similarity index 93% rename from tests/hipscat_import/soap/test_run_soap.py rename to tests/hats_import/soap/test_run_soap.py index a3fd5333..323a0289 100644 --- a/tests/hipscat_import/soap/test_run_soap.py +++ b/tests/hats_import/soap/test_run_soap.py @@ -5,10 +5,10 @@ import pyarrow as pa import pyarrow.parquet as pq import pytest -from hipscat.catalog.association_catalog.association_catalog import AssociationCatalog +from hats.catalog.association_catalog.association_catalog import AssociationCatalog -import hipscat_import.soap.run_soap as runner -from hipscat_import.soap.arguments import SoapArguments +import hats_import.soap.run_soap as runner +from hats_import.soap.arguments import SoapArguments def test_empty_args(): @@ -30,7 +30,7 @@ def test_object_to_source(dask_client, small_sky_soap_args): runner.run(small_sky_soap_args, dask_client) ## Check that the association data can be parsed as a valid association catalog. - catalog = AssociationCatalog.read_from_hipscat(small_sky_soap_args.catalog_path) + catalog = AssociationCatalog.read_hats(small_sky_soap_args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == small_sky_soap_args.catalog_path assert len(catalog.get_join_pixels()) == 14 @@ -54,7 +54,7 @@ def test_object_to_self(dask_client, tmp_path, small_sky_object_catalog): runner.run(small_sky_soap_args, dask_client) ## Check that the association data can be parsed as a valid association catalog. - catalog = AssociationCatalog.read_from_hipscat(small_sky_soap_args.catalog_path) + catalog = AssociationCatalog.read_hats(small_sky_soap_args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == small_sky_soap_args.catalog_path assert len(catalog.get_join_pixels()) == 1 @@ -81,7 +81,7 @@ def test_object_to_source_with_leaves( runner.run(small_sky_soap_args, dask_client) ## Check that the association data can be parsed as a valid association catalog. - catalog = AssociationCatalog.read_from_hipscat(small_sky_soap_args.catalog_path) + catalog = AssociationCatalog.read_hats(small_sky_soap_args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == small_sky_soap_args.catalog_path assert len(catalog.get_join_pixels()) == 14 @@ -148,7 +148,7 @@ def test_object_to_source_with_leaves_drop_duplicates( runner.run(small_sky_soap_args, dask_client) ## Check that the association data can be parsed as a valid association catalog. - catalog = AssociationCatalog.read_from_hipscat(small_sky_soap_args.catalog_path) + catalog = AssociationCatalog.read_hats(small_sky_soap_args.catalog_path) assert catalog.on_disk assert catalog.catalog_path == small_sky_soap_args.catalog_path assert len(catalog.get_join_pixels()) == 14 diff --git a/tests/hipscat_import/soap/test_soap_arguments.py b/tests/hats_import/soap/test_soap_arguments.py similarity index 98% rename from tests/hipscat_import/soap/test_soap_arguments.py rename to tests/hats_import/soap/test_soap_arguments.py index d40f5ed3..b398d8ab 100644 --- a/tests/hipscat_import/soap/test_soap_arguments.py +++ b/tests/hats_import/soap/test_soap_arguments.py @@ -1,6 +1,6 @@ import pytest -from hipscat_import.soap.arguments import SoapArguments +from hats_import.soap.arguments import SoapArguments def test_none(): diff --git a/tests/hipscat_import/soap/test_soap_map_reduce.py b/tests/hats_import/soap/test_soap_map_reduce.py similarity index 96% rename from tests/hipscat_import/soap/test_soap_map_reduce.py rename to tests/hats_import/soap/test_soap_map_reduce.py index c605eb29..366788b4 100644 --- a/tests/hipscat_import/soap/test_soap_map_reduce.py +++ b/tests/hats_import/soap/test_soap_map_reduce.py @@ -8,10 +8,10 @@ import pandas as pd import pyarrow.parquet as pq import pytest -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.soap.arguments import SoapArguments -from hipscat_import.soap.map_reduce import combine_partial_results, count_joins, reduce_joins +from hats_import.soap.arguments import SoapArguments +from hats_import.soap.map_reduce import combine_partial_results, count_joins, reduce_joins def test_count_joins(small_sky_soap_args, tmp_path, small_sky_soap_maps): diff --git a/tests/hipscat_import/soap/test_soap_resume_plan.py b/tests/hats_import/soap/test_soap_resume_plan.py similarity index 92% rename from tests/hipscat_import/soap/test_soap_resume_plan.py rename to tests/hats_import/soap/test_soap_resume_plan.py index 29e56e58..5986694d 100644 --- a/tests/hipscat_import/soap/test_soap_resume_plan.py +++ b/tests/hats_import/soap/test_soap_resume_plan.py @@ -4,17 +4,17 @@ from pathlib import Path import pytest -from hipscat.catalog import Catalog -from hipscat.catalog.catalog_info import CatalogInfo -from hipscat.pixel_math.healpix_pixel import HealpixPixel +from hats.catalog import Catalog +from hats.catalog.catalog_info import CatalogInfo +from hats.pixel_math.healpix_pixel import HealpixPixel -from hipscat_import.soap.resume_plan import SoapPlan, source_to_object_map +from hats_import.soap.resume_plan import SoapPlan, source_to_object_map def test_source_to_object_map(small_sky_object_catalog, small_sky_source_catalog, small_sky_soap_maps): """Test creating plan map for object and source catalogs.""" - object_catalog = Catalog.read_from_hipscat(small_sky_object_catalog) - source_catalog = Catalog.read_from_hipscat(small_sky_source_catalog) + object_catalog = Catalog.read_hats(small_sky_object_catalog) + source_catalog = Catalog.read_hats(small_sky_source_catalog) source_to_object = source_to_object_map(object_catalog, source_catalog) assert source_to_object == small_sky_soap_maps @@ -41,8 +41,8 @@ def test_object_to_source_map(small_sky_object_catalog, small_sky_source_catalog ] } ## Oh, we're so silly! - object_catalog = Catalog.read_from_hipscat(small_sky_source_catalog) - source_catalog = Catalog.read_from_hipscat(small_sky_object_catalog) + object_catalog = Catalog.read_hats(small_sky_source_catalog) + source_catalog = Catalog.read_hats(small_sky_object_catalog) source_to_object = source_to_object_map(object_catalog, source_catalog) assert source_to_object == expected diff --git a/tests/hats_import/test_packaging.py b/tests/hats_import/test_packaging.py new file mode 100644 index 00000000..c19c92f7 --- /dev/null +++ b/tests/hats_import/test_packaging.py @@ -0,0 +1,6 @@ +import hats_import + + +def test_hats_import_version(): + """Check to see that we can get the hats-import version""" + assert hats_import.__version__ is not None diff --git a/tests/hipscat_import/test_pipeline_resume_plan.py b/tests/hats_import/test_pipeline_resume_plan.py similarity index 98% rename from tests/hipscat_import/test_pipeline_resume_plan.py rename to tests/hats_import/test_pipeline_resume_plan.py index 7334d6a2..b1bfeac7 100644 --- a/tests/hipscat_import/test_pipeline_resume_plan.py +++ b/tests/hats_import/test_pipeline_resume_plan.py @@ -5,7 +5,7 @@ import numpy.testing as npt import pytest -from hipscat_import.pipeline_resume_plan import PipelineResumePlan, get_formatted_stage_name +from hats_import.pipeline_resume_plan import PipelineResumePlan, get_formatted_stage_name def test_done_key(tmp_path): diff --git a/tests/hipscat_import/test_runtime_arguments.py b/tests/hats_import/test_runtime_arguments.py similarity index 98% rename from tests/hipscat_import/test_runtime_arguments.py rename to tests/hats_import/test_runtime_arguments.py index cea801cc..523ca933 100644 --- a/tests/hipscat_import/test_runtime_arguments.py +++ b/tests/hats_import/test_runtime_arguments.py @@ -2,7 +2,7 @@ import pytest -from hipscat_import.runtime_arguments import RuntimeArguments +from hats_import.runtime_arguments import RuntimeArguments # pylint: disable=protected-access diff --git a/tests/hipscat_import/verification/test_run_verification.py b/tests/hats_import/verification/test_run_verification.py similarity index 85% rename from tests/hipscat_import/verification/test_run_verification.py rename to tests/hats_import/verification/test_run_verification.py index c672af7f..33be14f5 100644 --- a/tests/hipscat_import/verification/test_run_verification.py +++ b/tests/hats_import/verification/test_run_verification.py @@ -1,7 +1,7 @@ import pytest -import hipscat_import.verification.run_verification as runner -from hipscat_import.verification.arguments import VerificationArguments +import hats_import.verification.run_verification as runner +from hats_import.verification.arguments import VerificationArguments def test_bad_args(): diff --git a/tests/hipscat_import/verification/test_verification_arguments.py b/tests/hats_import/verification/test_verification_arguments.py similarity index 93% rename from tests/hipscat_import/verification/test_verification_arguments.py rename to tests/hats_import/verification/test_verification_arguments.py index 8ebd6c81..2b41e853 100644 --- a/tests/hipscat_import/verification/test_verification_arguments.py +++ b/tests/hats_import/verification/test_verification_arguments.py @@ -1,9 +1,9 @@ """Tests of argument validation""" import pytest -from hipscat.catalog import Catalog +from hats.catalog import Catalog -from hipscat_import.verification.arguments import VerificationArguments +from hats_import.verification.arguments import VerificationArguments def test_none(): @@ -55,7 +55,7 @@ def test_good_paths(tmp_path, small_sky_object_catalog): def test_catalog_object(tmp_path, small_sky_object_catalog): """Required arguments are provided, and paths are found.""" - small_sky_catalog_object = Catalog.read_from_hipscat(catalog_path=small_sky_object_catalog) + small_sky_catalog_object = Catalog.read_hats(catalog_path=small_sky_object_catalog) tmp_path_str = str(tmp_path) args = VerificationArguments( input_catalog=small_sky_catalog_object, diff --git a/tests/hipscat_import/data/indexed_files/csv_list_double_1_of_2.txt b/tests/hipscat_import/data/indexed_files/csv_list_double_1_of_2.txt deleted file mode 100644 index 8e9c9d54..00000000 --- a/tests/hipscat_import/data/indexed_files/csv_list_double_1_of_2.txt +++ /dev/null @@ -1,3 +0,0 @@ -tests/hipscat_import/data/small_sky_parts/catalog_00_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_01_of_05.csv - diff --git a/tests/hipscat_import/data/indexed_files/csv_list_double_2_of_2.txt b/tests/hipscat_import/data/indexed_files/csv_list_double_2_of_2.txt deleted file mode 100644 index 352c08ea..00000000 --- a/tests/hipscat_import/data/indexed_files/csv_list_double_2_of_2.txt +++ /dev/null @@ -1,3 +0,0 @@ -tests/hipscat_import/data/small_sky_parts/catalog_02_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_03_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_04_of_05.csv \ No newline at end of file diff --git a/tests/hipscat_import/data/indexed_files/csv_list_single.txt b/tests/hipscat_import/data/indexed_files/csv_list_single.txt deleted file mode 100644 index 04817f83..00000000 --- a/tests/hipscat_import/data/indexed_files/csv_list_single.txt +++ /dev/null @@ -1,6 +0,0 @@ -tests/hipscat_import/data/small_sky_parts/catalog_00_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_01_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_02_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_03_of_05.csv -tests/hipscat_import/data/small_sky_parts/catalog_04_of_05.csv - diff --git a/tests/hipscat_import/data/indexed_files/parquet_list_single.txt b/tests/hipscat_import/data/indexed_files/parquet_list_single.txt deleted file mode 100644 index 63e5b84f..00000000 --- a/tests/hipscat_import/data/indexed_files/parquet_list_single.txt +++ /dev/null @@ -1,5 +0,0 @@ -tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_0_0.parquet -tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_1_0.parquet -tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_2_0.parquet -tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_3_0.parquet -tests/hipscat_import/data/parquet_shards/order_0/dir_0/pixel_11/shard_4_0.parquet diff --git a/tests/hipscat_import/data/small_sky_object_catalog/_common_metadata b/tests/hipscat_import/data/small_sky_object_catalog/_common_metadata deleted file mode 100644 index 4cf7a744..00000000 Binary files a/tests/hipscat_import/data/small_sky_object_catalog/_common_metadata and /dev/null differ diff --git a/tests/hipscat_import/data/small_sky_object_catalog/_metadata b/tests/hipscat_import/data/small_sky_object_catalog/_metadata deleted file mode 100644 index 26df207b..00000000 Binary files a/tests/hipscat_import/data/small_sky_object_catalog/_metadata and /dev/null differ diff --git a/tests/hipscat_import/data/small_sky_object_catalog/partition_info.csv b/tests/hipscat_import/data/small_sky_object_catalog/partition_info.csv deleted file mode 100644 index ed015721..00000000 --- a/tests/hipscat_import/data/small_sky_object_catalog/partition_info.csv +++ /dev/null @@ -1,2 +0,0 @@ -Norder,Dir,Npix,num_rows -0,0,11,131 diff --git a/tests/hipscat_import/data/small_sky_object_catalog/point_map.fits b/tests/hipscat_import/data/small_sky_object_catalog/point_map.fits deleted file mode 100644 index 1971966f..00000000 Binary files a/tests/hipscat_import/data/small_sky_object_catalog/point_map.fits and /dev/null differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/_common_metadata b/tests/hipscat_import/data/small_sky_source_catalog/_common_metadata deleted file mode 100644 index 3f78df69..00000000 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/_common_metadata and /dev/null differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/_metadata b/tests/hipscat_import/data/small_sky_source_catalog/_metadata deleted file mode 100644 index 9cc2015e..00000000 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/_metadata and /dev/null differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/point_map.fits b/tests/hipscat_import/data/small_sky_source_catalog/point_map.fits deleted file mode 100644 index e0ac82b9..00000000 Binary files a/tests/hipscat_import/data/small_sky_source_catalog/point_map.fits and /dev/null differ diff --git a/tests/hipscat_import/data/small_sky_source_catalog/provenance_info.json b/tests/hipscat_import/data/small_sky_source_catalog/provenance_info.json deleted file mode 100644 index bdc01d54..00000000 --- a/tests/hipscat_import/data/small_sky_source_catalog/provenance_info.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "catalog_name": "small_sky_source_catalog", - "catalog_type": "source", - "total_rows": 17161, - "epoch": "J2000", - "ra_column": "source_ra", - "dec_column": "source_dec", - "version": "0.2.6.dev9+ga051d36", - "generation_date": "2024.02.21", - "tool_args": { - "tool_name": "hipscat_import", - "version": "0.2.4.dev9+g5808d3f", - "runtime_args": { - "catalog_name": "small_sky_source_catalog", - "output_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data", - "output_artifact_name": "small_sky_source_catalog", - "tmp_dir": "", - "overwrite": true, - "dask_tmp": "/tmp/user/11115/pytest-of-delucchi/pytest-184/test_import_source_table0", - "dask_n_workers": 1, - "dask_threads_per_worker": 1, - "catalog_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky_source_catalog", - "tmp_path": "/tmp/user/11115/pytest-of-delucchi/pytest-184/test_import_source_table0/small_sky_source_catalog/intermediate", - "epoch": "J2000", - "catalog_type": "source", - "input_path": "/home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky_source", - "input_paths": [ - "file:///home/delucchi/git/hipscat-import/tests/hipscat_import/data/small_sky_source/small_sky_source.csv" - ], - "input_file_list": [], - "ra_column": "source_ra", - "dec_column": "source_dec", - "use_hipscat_index": false, - "sort_columns": "source_id", - "constant_healpix_order": -1, - "highest_healpix_order": 2, - "pixel_threshold": 3000, - "mapping_healpix_order": 2, - "debug_stats_only": false, - "file_reader_info": { - "input_reader_type": "CsvReader", - "chunksize": 500000, - "header": "infer", - "schema_file": null, - "separator": ",", - "column_names": null, - "type_map": {} - } - } - } -} diff --git a/tests/hipscat_import/data/test_formats/hipscat_index.parquet b/tests/hipscat_import/data/test_formats/hipscat_index.parquet deleted file mode 100644 index 44bdf663..00000000 Binary files a/tests/hipscat_import/data/test_formats/hipscat_index.parquet and /dev/null differ diff --git a/tests/hipscat_import/test_packaging.py b/tests/hipscat_import/test_packaging.py deleted file mode 100644 index 877c0ddf..00000000 --- a/tests/hipscat_import/test_packaging.py +++ /dev/null @@ -1,6 +0,0 @@ -import hipscat_import - - -def test_hipscat_import_version(): - """Check to see that we can get the hipscat-import version""" - assert hipscat_import.__version__ is not None