diff --git a/copy_data_to_fs.py b/copy_data_to_fs.py deleted file mode 100644 index a1c3202..0000000 --- a/copy_data_to_fs.py +++ /dev/null @@ -1,86 +0,0 @@ -import os - -from hipscat.io.file_io.file_io import get_fs - - -def copy_tree_fs_to_fs( - fs1_source: str, - fs2_destination: str, - storage_options1: dict = None, - storage_options2: dict = None, - verbose=False, -): - """Recursive Copies directory from one filesystem to the other. - - Args: - fs1_source: location of source directory to copy - fs2_destination: location of destination directory to for fs1 to be written two - storage_options1: dictionary that contains abstract filesystem1 credentials - storage_options2: dictionary that contains abstract filesystem2 credentials - """ - - source_fs, source_fp = get_fs(fs1_source, storage_options=storage_options1) - destination_fs, desintation_fp = get_fs(fs2_destination, storage_options=storage_options2) - copy_dir(source_fs, source_fp, destination_fs, desintation_fp, verbose=verbose) - - -def copy_dir( - source_fs, - source_fp, - destination_fs, - desintation_fp, - verbose=False, - chunksize=1024 * 1024, -): - """Recursive method to copy directories and their contents. - - Args: - fs1: fsspec.filesystem for the source directory contents - fs1_pointer: source directory to copy content files - fs2: fsspec.filesytem for destination directory - fs2_pointer: destination directory for copied contents - """ - destination_folder = os.path.join(desintation_fp, source_fp.split("/")[-1]) - if destination_folder[-1] != "/": - destination_folder += "/" - if not destination_fs.exists(destination_folder): - if verbose: - print(f"Creating destination folder: {destination_folder}") - destination_fs.makedirs(destination_folder, exist_ok=True) - - dir_contents = source_fs.listdir(source_fp) - files = [x for x in source_fs.listdir(source_fp) if x["type"] == "file"] - - for _file in files: - destination_fname = os.path.join(destination_folder, _file["name"].split("/")[-1]) - if verbose: - print(f'Copying file {_file["name"]} to {destination_fname}') - with source_fs.open(_file["name"], "rb") as source_file: - with destination_fs.open(destination_fname, "wb") as destination_file: - while True: - chunk = source_file.read(chunksize) - if not chunk: - break - destination_file.write(chunk) - - dirs = [x for x in dir_contents if x["type"] == "directory"] - for _dir in dirs: - copy_dir( - source_fs, - _dir["name"], - destination_fs, - destination_folder, - chunksize=chunksize, - verbose=verbose, - ) - - -if __name__ == "__main__": - source_pw = f"{os.getcwd()}/../tests/data" - target_pw = "abfs://hipscat/pytests/lsdb" - - target_so = { - "account_key": os.environ.get("ABFS_LINCCDATA_ACCOUNT_KEY"), - "account_name": os.environ.get("ABFS_LINCCDATA_ACCOUNT_NAME"), - } - copy_tree_fs_to_fs(source_pw, target_pw, {}, target_so, verbose=True) diff --git a/pyproject.toml b/pyproject.toml index 065a452..81ed41f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ dev = [ "asv==0.6.1", # Used to compute performance benchmarks "black", # Used for static linting of files + "jupyter", # clear notebook result cells "pre-commit", # Used to run checks before finalizing a git commit "pylint", # Used for static linting of files "pytest", diff --git a/tests/conftest.py b/tests/conftest.py index 8c09079..266b07e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,10 @@ import pytest -DATA_DIR_NAME = "data" +ALMANAC_DIR_NAME = "almanac" +SMALL_SKY_DIR_NAME = "small_sky" +SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" + TEST_DIR = os.path.dirname(__file__) SMALL_SKY_DIR_NAME = "small_sky" @@ -49,3 +52,28 @@ def local_data_dir(): @pytest.fixture def small_sky_dir_local(local_data_dir): return os.path.join(local_data_dir, SMALL_SKY_DIR_NAME) + + +@pytest.fixture +def tmp_dir_cloud(example_cloud_path): + return os.path.join(example_cloud_path, "tmp") + + +@pytest.fixture +def test_data_dir_cloud(example_cloud_path): + return os.path.join(example_cloud_path, "data") + + +@pytest.fixture +def almanac_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, ALMANAC_DIR_NAME) + + +@pytest.fixture +def small_sky_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME) + + +@pytest.fixture +def small_sky_order1_dir_cloud(test_data_dir_cloud): + return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME) diff --git a/tests/data/generate_cloud_data.ipynb b/tests/data/generate_cloud_data.ipynb new file mode 100644 index 0000000..c59b0c6 --- /dev/null +++ b/tests/data/generate_cloud_data.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CLOUD unit test data\n", + "\n", + "There are two types of data used in unit tests in this repo: local and cloud. This notebook concerns itself only with the CLOUD versions of test data, so you can re-generate it.\n", + "\n", + "This also works to initialize data in a new cloud provider, instead of simply copying an existing data set.\n", + "\n", + "## Object catalog: small sky\n", + "\n", + "This is the same \"object catalog\" with 131 randomly generated radec values inside the order0-pixel11 healpix pixel that is used in hipscat and LSDB unit test suites." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hipscat_import.pipeline as runner\n", + "from hipscat_import.catalog.arguments import ImportArguments\n", + "from hipscat_import.index.arguments import IndexArguments\n", + "from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments\n", + "import tempfile\n", + "from pathlib import Path\n", + "import os\n", + "\n", + "tmp_path = tempfile.TemporaryDirectory()\n", + "tmp_dir = tmp_path.name\n", + "\n", + "storage_options = {\n", + " \"account_key\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_KEY\"),\n", + " \"account_name\": os.environ.get(\"ABFS_LINCCDATA_ACCOUNT_NAME\"),\n", + "}\n", + "storage_options" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky\n", + "\n", + "This catalog was generated with the following snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "args = ImportArguments(\n", + " input_path=\"small_sky_parts\",\n", + " highest_healpix_order=1,\n", + " file_reader=\"csv\",\n", + " output_path=\"abfs://hipscat/pytests/data\",\n", + " output_artifact_name=\"small_sky\",\n", + " output_storage_options=storage_options,\n", + " overwrite=True,\n", + " tmp_dir=tmp_dir,\n", + " dask_tmp=tmp_dir,\n", + ")\n", + "runner.pipeline(args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky_order1\n", + "\n", + "This catalog has the same data points as other small sky catalogs, but is coerced to spreading these data points over partitions at order 1, instead of order 0.\n", + "\n", + "This means there are 4 leaf partition files, instead of just 1, and so can be useful for confirming reads/writes over multiple leaf partition files.\n", + "\n", + "NB: Setting `constant_healpix_order` coerces the import pipeline to create leaf partitions at order 1.\n", + "\n", + "This catalog was generated with the following snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "args = ImportArguments(\n", + " input_path=\"small_sky_parts\",\n", + " file_reader=\"csv\",\n", + " constant_healpix_order=1,\n", + " output_path=\"abfs://hipscat/pytests/data\",\n", + " output_storage_options=storage_options,\n", + " output_artifact_name=\"small_sky_order1\",\n", + " tmp_dir=tmp_dir,\n", + " dask_tmp=tmp_dir,\n", + " overwrite=True,\n", + ")\n", + "runner.pipeline(args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Almanac info\n", + "\n", + "For the above catalogs, create almanac data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from hipscat.inspection.almanac import Almanac\n", + "from hipscat.inspection.almanac_info import AlmanacInfo\n", + "\n", + "almanac_info = AlmanacInfo.from_catalog_dir(\n", + " \"abfs://hipscat/pytests/data/small_sky\", storage_options=storage_options\n", + ")\n", + "almanac_info.write_to_file(\n", + " directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n", + ")\n", + "\n", + "almanac_info = AlmanacInfo.from_catalog_dir(\n", + " \"abfs://hipscat/pytests/data/small_sky_order1\", storage_options=storage_options\n", + ")\n", + "almanac_info.write_to_file(\n", + " directory=\"abfs://hipscat/pytests/data/almanac\", default_dir=False, storage_options=storage_options\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tmp_path.cleanup()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hipscatenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/data/generate_local_data.ipynb b/tests/data/generate_local_data.ipynb new file mode 100644 index 0000000..d260fb0 --- /dev/null +++ b/tests/data/generate_local_data.ipynb @@ -0,0 +1,114 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LOCAL unit test data\n", + "\n", + "There are two types of data used in unit tests in this repo: local and cloud. This notebook concerns itself only with the local versions of test data, so you can re-generate it.\n", + "\n", + "## Object catalog: small sky\n", + "\n", + "This is the same \"object catalog\" with 131 randomly generated radec values inside the order0-pixel11 healpix pixel that is used in hipscat and LSDB unit test suites." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hipscat_import.pipeline as runner\n", + "from hipscat_import.catalog.arguments import ImportArguments\n", + "from hipscat_import.index.arguments import IndexArguments\n", + "from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "tmp_path = tempfile.TemporaryDirectory()\n", + "tmp_dir = tmp_path.name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky\n", + "\n", + "This catalog was generated with the following snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "args = ImportArguments(\n", + " input_path=\"small_sky_parts\",\n", + " output_path=\".\",\n", + " file_reader=\"csv\",\n", + " output_artifact_name=\"small_sky\",\n", + " overwrite=True,\n", + " tmp_dir=tmp_dir,\n", + ")\n", + "runner.pipeline(args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky_order1\n", + "\n", + "This catalog has the same data points as other small sky catalogs, but is coerced to spreading these data points over partitions at order 1, instead of order 0.\n", + "\n", + "This means there are 4 leaf partition files, instead of just 1, and so can be useful for confirming reads/writes over multiple leaf partition files.\n", + "\n", + "NB: Setting `constant_healpix_order` coerces the import pipeline to create leaf partitions at order 1.\n", + "\n", + "This catalog was generated with the following snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "args = ImportArguments(\n", + " input_path=\"small_sky_parts\",\n", + " output_path=\".\",\n", + " file_reader=\"csv\",\n", + " output_artifact_name=\"small_sky_order1\",\n", + " constant_healpix_order=1,\n", + " overwrite=True,\n", + " tmp_dir=tmp_dir,\n", + ")\n", + "runner.pipeline(args)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hipscatenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet b/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet index 3f46bcd..e0cb8d9 100644 Binary files a/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet and b/tests/data/small_sky/Norder=0/Dir=0/Npix=11.parquet differ diff --git a/tests/data/small_sky/_common_metadata b/tests/data/small_sky/_common_metadata new file mode 100644 index 0000000..4cf7a74 Binary files /dev/null and b/tests/data/small_sky/_common_metadata differ diff --git a/tests/data/small_sky/_metadata b/tests/data/small_sky/_metadata new file mode 100644 index 0000000..26df207 Binary files /dev/null and b/tests/data/small_sky/_metadata differ diff --git a/tests/data/small_sky/catalog_info.json b/tests/data/small_sky/catalog_info.json index ebfe52e..b3fd9a2 100644 --- a/tests/data/small_sky/catalog_info.json +++ b/tests/data/small_sky/catalog_info.json @@ -1,12 +1,8 @@ { "catalog_name": "small_sky", - "catalog_type": "source", - "version": "0.0.1", - "generation_date": "2022.12.20", + "catalog_type": "object", + "total_rows": 131, "epoch": "J2000", - "ra_kw": "ra", - "dec_kw": "dec", - "id_kw": "id", - "total_objects": 131, - "pixel_threshold": 1000000 -} \ No newline at end of file + "ra_column": "ra", + "dec_column": "dec" +} diff --git a/tests/data/small_sky/partition_info.csv b/tests/data/small_sky/partition_info.csv index ed01572..7c5eaac 100644 --- a/tests/data/small_sky/partition_info.csv +++ b/tests/data/small_sky/partition_info.csv @@ -1,2 +1,2 @@ -Norder,Dir,Npix,num_rows -0,0,11,131 +Norder,Npix,Dir +0,11,0 diff --git a/tests/data/small_sky/point_map.fits b/tests/data/small_sky/point_map.fits index 1a5b0a6..1971966 100644 Binary files a/tests/data/small_sky/point_map.fits and b/tests/data/small_sky/point_map.fits differ diff --git a/tests/data/small_sky/provenance_info.json b/tests/data/small_sky/provenance_info.json new file mode 100644 index 0000000..5cd5602 --- /dev/null +++ b/tests/data/small_sky/provenance_info.json @@ -0,0 +1,55 @@ +{ + "catalog_name": "small_sky", + "catalog_type": "object", + "total_rows": 131, + "epoch": "J2000", + "ra_column": "ra", + "dec_column": "dec", + "version": "0.2.9.dev2+g014342d", + "generation_date": "2024.03.18", + "tool_args": { + "tool_name": "hipscat_import", + "version": "0.2.6.dev6+gf95440a", + "runtime_args": { + "catalog_name": "small_sky", + "output_path": ".", + "output_artifact_name": "small_sky", + "tmp_dir": "/tmp/user/11115/tmpw6p6hl9p", + "overwrite": true, + "dask_tmp": "", + "dask_n_workers": 1, + "dask_threads_per_worker": 1, + "catalog_path": "./small_sky", + "tmp_path": "/tmp/user/11115/tmpw6p6hl9p/small_sky/intermediate", + "epoch": "J2000", + "catalog_type": "object", + "input_path": "small_sky_parts", + "input_paths": [ + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_00_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_01_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_02_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_03_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_04_of_05.csv" + ], + "input_file_list": [], + "ra_column": "ra", + "dec_column": "dec", + "use_hipscat_index": false, + "sort_columns": null, + "constant_healpix_order": -1, + "lowest_healpix_order": 0, + "highest_healpix_order": 7, + "pixel_threshold": 1000000, + "mapping_healpix_order": 7, + "debug_stats_only": false, + "file_reader_info": { + "input_reader_type": "CsvReader", + "chunksize": 500000, + "schema_file": null, + "column_names": null, + "parquet_kwargs": null, + "kwargs": {} + } + } + } +} diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet index 30ef36c..fb1e07a 100644 Binary files a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet index b0af099..0f69158 100644 Binary files a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet index 7d4ad21..fcef836 100644 Binary files a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet differ diff --git a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet index 01f723d..527d15d 100644 Binary files a/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet and b/tests/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet differ diff --git a/tests/data/small_sky_order1/_common_metadata b/tests/data/small_sky_order1/_common_metadata new file mode 100644 index 0000000..4cf7a74 Binary files /dev/null and b/tests/data/small_sky_order1/_common_metadata differ diff --git a/tests/data/small_sky_order1/_metadata b/tests/data/small_sky_order1/_metadata new file mode 100644 index 0000000..3ec7ff0 Binary files /dev/null and b/tests/data/small_sky_order1/_metadata differ diff --git a/tests/data/small_sky_order1/catalog_info.json b/tests/data/small_sky_order1/catalog_info.json index 614fa55..a61f882 100644 --- a/tests/data/small_sky_order1/catalog_info.json +++ b/tests/data/small_sky_order1/catalog_info.json @@ -1,12 +1,8 @@ { "catalog_name": "small_sky_order1", - "catalog_type": "source", - "version": "0.0.0", - "generation_date": "2022.12.21", + "catalog_type": "object", + "total_rows": 131, "epoch": "J2000", - "ra_kw": "ra", - "dec_kw": "dec", - "id_kw": "id", - "total_objects": 131, - "pixel_threshold": 50 -} \ No newline at end of file + "ra_column": "ra", + "dec_column": "dec" +} diff --git a/tests/data/small_sky_order1/partition_info.csv b/tests/data/small_sky_order1/partition_info.csv index d15927f..f8dac6d 100644 --- a/tests/data/small_sky_order1/partition_info.csv +++ b/tests/data/small_sky_order1/partition_info.csv @@ -1,5 +1,5 @@ -Norder,Dir,Npix,num_rows -1,0,44,42 -1,0,45,29 -1,0,46,42 -1,0,47,18 +Norder,Npix,Dir +1,44,0 +1,45,0 +1,46,0 +1,47,0 diff --git a/tests/data/small_sky_order1/point_map.fits b/tests/data/small_sky_order1/point_map.fits index 1a5b0a6..e7287c9 100644 Binary files a/tests/data/small_sky_order1/point_map.fits and b/tests/data/small_sky_order1/point_map.fits differ diff --git a/tests/data/small_sky_order1/provenance_info.json b/tests/data/small_sky_order1/provenance_info.json new file mode 100644 index 0000000..c4b7dc3 --- /dev/null +++ b/tests/data/small_sky_order1/provenance_info.json @@ -0,0 +1,55 @@ +{ + "catalog_name": "small_sky_order1", + "catalog_type": "object", + "total_rows": 131, + "epoch": "J2000", + "ra_column": "ra", + "dec_column": "dec", + "version": "0.2.9.dev2+g014342d", + "generation_date": "2024.03.18", + "tool_args": { + "tool_name": "hipscat_import", + "version": "0.2.6.dev6+gf95440a", + "runtime_args": { + "catalog_name": "small_sky_order1", + "output_path": ".", + "output_artifact_name": "small_sky_order1", + "tmp_dir": "/tmp/user/11115/tmpw6p6hl9p", + "overwrite": true, + "dask_tmp": "", + "dask_n_workers": 1, + "dask_threads_per_worker": 1, + "catalog_path": "./small_sky_order1", + "tmp_path": "/tmp/user/11115/tmpw6p6hl9p/small_sky_order1/intermediate", + "epoch": "J2000", + "catalog_type": "object", + "input_path": "small_sky_parts", + "input_paths": [ + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_00_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_01_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_02_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_03_of_05.csv", + "file:///home/delucchi/git/hipscat-cloudtests/tests/data/small_sky_parts/catalog_04_of_05.csv" + ], + "input_file_list": [], + "ra_column": "ra", + "dec_column": "dec", + "use_hipscat_index": false, + "sort_columns": null, + "constant_healpix_order": 1, + "lowest_healpix_order": 0, + "highest_healpix_order": 7, + "pixel_threshold": 1000000, + "mapping_healpix_order": 1, + "debug_stats_only": false, + "file_reader_info": { + "input_reader_type": "CsvReader", + "chunksize": 500000, + "schema_file": null, + "column_names": null, + "parquet_kwargs": null, + "kwargs": {} + } + } + } +} diff --git a/tests/hipscat/conftest.py b/tests/hipscat/conftest.py index 3a81295..bae789b 100644 --- a/tests/hipscat/conftest.py +++ b/tests/hipscat/conftest.py @@ -3,44 +3,9 @@ import pytest -ALMANAC_DIR_NAME = "almanac" -SMALL_SKY_DIR_NAME = "small_sky" -SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" -SMALL_SKY_TO_SMALL_SKY_ORDER1_DIR_NAME = "small_sky_to_small_sky_order1" - # pylint: disable=missing-function-docstring, redefined-outer-name -@pytest.fixture -def tmp_dir_cloud(example_cloud_path): - return os.path.join(example_cloud_path, "hipscat", "tmp") - - -@pytest.fixture -def test_data_dir_cloud(example_cloud_path): - return os.path.join(example_cloud_path, "hipscat", "data") - - -@pytest.fixture -def almanac_dir_cloud(test_data_dir_cloud): - return os.path.join(test_data_dir_cloud, ALMANAC_DIR_NAME) - - -@pytest.fixture -def small_sky_dir_cloud(test_data_dir_cloud): - return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME) - - -@pytest.fixture -def small_sky_order1_dir_cloud(test_data_dir_cloud): - return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME) - - @pytest.fixture def base_catalog_info_file_cloud(test_data_dir_cloud) -> str: return os.path.join(test_data_dir_cloud, "dataset", "catalog_info.json") - - -@pytest.fixture -def catalog_info_file_cloud(catalog_path_cloud) -> str: - return os.path.join(catalog_path_cloud, "catalog_info.json") diff --git a/tests/hipscat/inspection/test_almanac_cloud.py b/tests/hipscat/inspection/test_almanac_cloud.py index e2c6dca..c6386e6 100644 --- a/tests/hipscat/inspection/test_almanac_cloud.py +++ b/tests/hipscat/inspection/test_almanac_cloud.py @@ -14,7 +14,7 @@ def test_default(almanac_dir_cloud, test_data_dir_cloud, example_cloud_storage_o os.environ["HIPSCAT_ALMANAC_DIR"] = almanac_dir_cloud alms = Almanac(include_default_dir=True, storage_options=example_cloud_storage_options) - assert len(alms.catalogs()) == 8 + assert len(alms.catalogs()) == 2 os.environ.pop("HIPSCAT_ALMANAC_DIR") alms = Almanac(include_default_dir=True, storage_options=example_cloud_storage_options) diff --git a/tests/hipscat/io/file_io/test_file_pointers_cloud.py b/tests/hipscat/io/file_io/test_file_pointers_cloud.py index ce887aa..284479d 100644 --- a/tests/hipscat/io/file_io/test_file_pointers_cloud.py +++ b/tests/hipscat/io/file_io/test_file_pointers_cloud.py @@ -46,7 +46,7 @@ def test_find_files_matching_path(small_sky_dir_cloud, example_cloud_storage_opt == 1 ) - ## wilcard in the name + ## wilcard in the name, matches catalog_info.json and provenance_info.json assert ( len( find_files_matching_path( @@ -55,7 +55,7 @@ def test_find_files_matching_path(small_sky_dir_cloud, example_cloud_storage_opt storage_options=example_cloud_storage_options, ) ) - == 1 + == 2 ) @@ -101,7 +101,9 @@ def test_get_directory_contents(small_sky_order1_dir_cloud, example_cloud_storag "_common_metadata", "_metadata", "catalog_info.json", + "partition_info.csv", "point_map.fits", + "provenance_info.json", ] expected = [os.path.join(small_sky_order1_dir_cloud, file_name) for file_name in expected] diff --git a/tests/hipscat/io/test_write_metadata_cloud.py b/tests/hipscat/io/test_write_metadata_cloud.py index bcf540a..91393fc 100644 --- a/tests/hipscat/io/test_write_metadata_cloud.py +++ b/tests/hipscat/io/test_write_metadata_cloud.py @@ -23,7 +23,10 @@ def basic_catalog_parquet_metadata(): pa.field("dec", pa.float64()), pa.field("ra_error", pa.int64()), pa.field("dec_error", pa.int64()), - pa.field("__index_level_0__", pa.int64()), + pa.field("Norder", pa.uint8()), + pa.field("Dir", pa.uint64()), + pa.field("Npix", pa.uint64()), + pa.field("_hipscat_index", pa.uint64()), ] ) diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py index 55f917b..d4ed72b 100644 --- a/tests/hipscat_import/conftest.py +++ b/tests/hipscat_import/conftest.py @@ -4,9 +4,6 @@ import pytest from dask.distributed import Client -DATA_DIR_NAME = "data" -SMALL_SKY_DIR_NAME = "small_sky" - @pytest.fixture(scope="session", name="dask_client") def dask_client(): @@ -16,36 +13,6 @@ def dask_client(): client.close() -def pytest_collection_modifyitems(items): - """Modify dask unit tests to - - ignore event loop deprecation warnings - - have a longer timeout default timeout (5 seconds instead of 1 second) - - require use of the `dask_client` fixture, even if it's not requested - - Individual tests that will be particularly long-running can still override - the default timeout, by using an annotation like: - - @pytest.mark.dask(timeout=10) - def test_long_running(): - ... - """ - first_dask = True - for item in items: - timeout = None - for mark in item.iter_markers(name="dask"): - timeout = 15 - if "timeout" in mark.kwargs: - timeout = int(mark.kwargs.get("timeout")) - if timeout: - if first_dask: - ## The first test requires more time to set up the dask/ray client - timeout += 10 - first_dask = False - item.add_marker(pytest.mark.timeout(timeout)) - item.add_marker(pytest.mark.usefixtures("dask_client")) - item.add_marker(pytest.mark.filterwarnings("ignore::DeprecationWarning")) - - @pytest.fixture def tmp_dir_cloud(example_cloud_path): return os.path.join(example_cloud_path, "hipscat_import", "tmp") @@ -53,7 +20,7 @@ def tmp_dir_cloud(example_cloud_path): @pytest.fixture def test_data_dir_cloud(example_cloud_path): - return os.path.join(example_cloud_path, "hipscat_import", DATA_DIR_NAME) + return os.path.join(example_cloud_path, "hipscat_import", "data") @pytest.fixture @@ -64,8 +31,3 @@ def small_sky_parts_dir_cloud(test_data_dir_cloud): @pytest.fixture def small_sky_parts_dir_local(local_data_dir): return os.path.join(local_data_dir, "small_sky_parts") - - -@pytest.fixture -def small_sky_catalog_dir_cloud(test_data_dir_cloud): - return os.path.join(test_data_dir_cloud, "small_sky") diff --git a/tests/lsdb/conftest.py b/tests/lsdb/conftest.py index 7da3127..12bd02c 100644 --- a/tests/lsdb/conftest.py +++ b/tests/lsdb/conftest.py @@ -5,9 +5,7 @@ import pytest from hipscat.io.file_io import file_io -SMALL_SKY_DIR_NAME = "small_sky" SMALL_SKY_XMATCH_NAME = "small_sky_xmatch" -SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" XMATCH_CORRECT_FILE = "xmatch_correct.csv" @@ -16,21 +14,11 @@ def test_data_dir_cloud(example_cloud_path): return os.path.join(example_cloud_path, "lsdb", "data") -@pytest.fixture -def small_sky_dir_cloud(test_data_dir_cloud): - return os.path.join(test_data_dir_cloud, SMALL_SKY_DIR_NAME) - - @pytest.fixture def small_sky_xmatch_dir_cloud(test_data_dir_cloud): return os.path.join(test_data_dir_cloud, SMALL_SKY_XMATCH_NAME) -@pytest.fixture -def small_sky_order1_dir_cloud(test_data_dir_cloud): - return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME) - - @pytest.fixture def small_sky_catalog_cloud(small_sky_dir_cloud, example_cloud_storage_options): return lsdb.read_hipscat(small_sky_dir_cloud, storage_options=example_cloud_storage_options)