Skip to content

Commit

Permalink
Clean up test data generation (#16)
Browse files Browse the repository at this point in the history
* Clean up test data generation

* Almanac data generation.

* Linting
  • Loading branch information
delucchi-cmu committed Mar 19, 2024
1 parent 2a35ae8 commit 7f5e6ca
Show file tree
Hide file tree
Showing 28 changed files with 451 additions and 202 deletions.
86 changes: 0 additions & 86 deletions copy_data_to_fs.py

This file was deleted.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
dev = [
"asv==0.6.1", # Used to compute performance benchmarks
"black", # Used for static linting of files
"jupyter", # clear notebook result cells
"pre-commit", # Used to run checks before finalizing a git commit
"pylint", # Used for static linting of files
"pytest",
Expand Down
30 changes: 29 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import pytest

DATA_DIR_NAME = "data"
ALMANAC_DIR_NAME = "almanac"
SMALL_SKY_DIR_NAME = "small_sky"
SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1"


TEST_DIR = os.path.dirname(__file__)
SMALL_SKY_DIR_NAME = "small_sky"
Expand Down Expand Up @@ -49,3 +52,28 @@ def local_data_dir():
@pytest.fixture
def small_sky_dir_local(local_data_dir):
return os.path.join(local_data_dir, SMALL_SKY_DIR_NAME)


@pytest.fixture
def tmp_dir_cloud(example_cloud_path):
return os.path.join(example_cloud_path, "tmp")


@pytest.fixture
def test_data_dir_cloud(example_cloud_path):
return os.path.join(example_cloud_path, "data")


@pytest.fixture
def almanac_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, ALMANAC_DIR_NAME)


@pytest.fixture
def small_sky_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME)


@pytest.fixture
def small_sky_order1_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME)
170 changes: 170 additions & 0 deletions tests/data/generate_cloud_data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CLOUD unit test data\n",
"\n",
"There are two types of data used in unit tests in this repo: local and cloud. This notebook concerns itself only with the CLOUD versions of test data, so you can re-generate it.\n",
"\n",
"This also works to initialize data in a new cloud provider, instead of simply copying an existing data set.\n",
"\n",
"## Object catalog: small sky\n",
"\n",
"This is the same \"object catalog\" with 131 randomly generated radec values inside the order0-pixel11 healpix pixel that is used in hipscat and LSDB unit test suites."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import hipscat_import.pipeline as runner\n",
"from hipscat_import.catalog.arguments import ImportArguments\n",
"from hipscat_import.index.arguments import IndexArguments\n",
"from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments\n",
"import tempfile\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"tmp_path = tempfile.TemporaryDirectory()\n",
"tmp_dir = tmp_path.name\n",
"\n",
"storage_options = {\n",
" \"account_key\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_KEY\"),\n",
" \"account_name\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_NAME\"),\n",
"}\n",
"storage_options"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### small_sky\n",
"\n",
"This catalog was generated with the following snippet:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"args = ImportArguments(\n",
" input_path=\"small_sky_parts\",\n",
" highest_healpix_order=1,\n",
" file_reader=\"csv\",\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_artifact_name=\"small_sky\",\n",
" output_storage_options=storage_options,\n",
" overwrite=True,\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
")\n",
"runner.pipeline(args)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### small_sky_order1\n",
"\n",
"This catalog has the same data points as other small sky catalogs, but is coerced to spreading these data points over partitions at order 1, instead of order 0.\n",
"\n",
"This means there are 4 leaf partition files, instead of just 1, and so can be useful for confirming reads/writes over multiple leaf partition files.\n",
"\n",
"NB: Setting `constant_healpix_order` coerces the import pipeline to create leaf partitions at order 1.\n",
"\n",
"This catalog was generated with the following snippet:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"args = ImportArguments(\n",
" input_path=\"small_sky_parts\",\n",
" file_reader=\"csv\",\n",
" constant_healpix_order=1,\n",
" output_path=\"abfs://hipscat/pytests/data\",\n",
" output_storage_options=storage_options,\n",
" output_artifact_name=\"small_sky_order1\",\n",
" tmp_dir=tmp_dir,\n",
" dask_tmp=tmp_dir,\n",
" overwrite=True,\n",
")\n",
"runner.pipeline(args)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Almanac info\n",
"\n",
"For the above catalogs, create almanac data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hipscat.inspection.almanac import Almanac\n",
"from hipscat.inspection.almanac_info import AlmanacInfo\n",
"\n",
"almanac_info = AlmanacInfo.from_catalog_dir(\n",
" \"abfs://hipscat/pytests/data/small_sky\", storage_options=storage_options\n",
")\n",
"almanac_info.write_to_file(\n",
" directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n",
")\n",
"\n",
"almanac_info = AlmanacInfo.from_catalog_dir(\n",
" \"abfs://hipscat/pytests/data/small_sky_order1\", storage_options=storage_options\n",
")\n",
"almanac_info.write_to_file(\n",
" directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tmp_path.cleanup()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "hipscatenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 7f5e6ca

Please sign in to comment.