From 8253efc625f1ba3957ca93d53c9e2fd1d519f0cf Mon Sep 17 00:00:00 2001
From: Sandro Campos <scampos@andrew.cmu.edu>
Date: Wed, 24 Jul 2024 13:51:11 -0400
Subject: [PATCH 1/4] Fix smoke tests

---
 requirements.txt                                | 6 +++---
 src/hipscat_cloudtests/file_checks.py           | 5 +++--
 tests/hipscat/io/file_io/test_file_io_cloud.py  | 9 +++++++--
 tests/hipscat_import/test_run_catalog_import.py | 4 ++--
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b3c81f2..df98186 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-git+https://github.com/astronomy-commons/hipscat.git@main
-git+https://github.com/astronomy-commons/hipscat-import.git@main
-git+https://github.com/astronomy-commons/lsdb.git@main
\ No newline at end of file
+git+https://github.com/astronomy-commons/hipscat.git@add-arrow-schema-to-catalog
+git+https://github.com/astronomy-commons/hipscat-import.git@read-parquet-with-schema
+git+https://github.com/astronomy-commons/lsdb.git@add-arrow-schema
\ No newline at end of file
diff --git a/src/hipscat_cloudtests/file_checks.py b/src/hipscat_cloudtests/file_checks.py
index 7e366e9..7e3b210 100644
--- a/src/hipscat_cloudtests/file_checks.py
+++ b/src/hipscat_cloudtests/file_checks.py
@@ -4,6 +4,7 @@
 
 import numpy.testing as npt
 import pandas as pd
+import pyarrow as pa
 from hipscat.io.file_io.file_io import load_text_file
 from hipscat.io.file_io.file_pointer import does_file_or_directory_exist
 
@@ -40,7 +41,7 @@ def assert_text_file_matches(expected_lines, file_name, storage_options: dict =
 
 
 def assert_parquet_file_ids(
-    file_name, id_column, expected_ids, resort_ids=True, storage_options: dict = None
+    file_name, id_column, schema: pa.Schema, expected_ids, resort_ids=True, storage_options: dict = None
 ):
     """
     Convenience method to read a parquet file and compare the object IDs to
@@ -54,7 +55,7 @@ def assert_parquet_file_ids(
             is the same between the read IDs and expected_ids
         storage_options (dict): dictionary of filesystem storage options
     """
-    data_frame = pd.read_parquet(file_name, engine="pyarrow", storage_options=storage_options)
+    data_frame = pd.read_parquet(file_name, engine="pyarrow", schema=schema, storage_options=storage_options)
     assert id_column in data_frame.columns
     ids = data_frame[id_column].tolist()
     if resort_ids:
diff --git a/tests/hipscat/io/file_io/test_file_io_cloud.py b/tests/hipscat/io/file_io/test_file_io_cloud.py
index cbe0162..55c40e4 100644
--- a/tests/hipscat/io/file_io/test_file_io_cloud.py
+++ b/tests/hipscat/io/file_io/test_file_io_cloud.py
@@ -37,11 +37,16 @@ def test_load_json(small_sky_dir_local, small_sky_dir_cloud, storage_options):
     assert json_dict_cloud == json_dict_local
 
 
-def test_load_parquet_to_pandas(small_sky_dir_local, small_sky_dir_cloud, storage_options):
+def test_load_parquet_to_pandas(
+    small_sky_catalog_cloud, small_sky_dir_local, small_sky_dir_cloud, storage_options
+):
     pixel_data_path = pixel_catalog_file(small_sky_dir_local, 0, 11)
     pixel_data_path_cloud = pixel_catalog_file(small_sky_dir_cloud, 0, 11)
     parquet_df = pd.read_parquet(pixel_data_path)
-    loaded_df = load_parquet_to_pandas(pixel_data_path_cloud, storage_options=storage_options)
+    catalog_schema = small_sky_catalog_cloud.hc_structure.schema
+    loaded_df = load_parquet_to_pandas(
+        pixel_data_path_cloud, schema=catalog_schema, storage_options=storage_options
+    )
     pd.testing.assert_frame_equal(parquet_df, loaded_df)
 
 
diff --git a/tests/hipscat_import/test_run_catalog_import.py b/tests/hipscat_import/test_run_catalog_import.py
index 8d3e368..2c33d03 100644
--- a/tests/hipscat_import/test_run_catalog_import.py
+++ b/tests/hipscat_import/test_run_catalog_import.py
@@ -46,7 +46,7 @@ def test_catalog_import_write_to_cloud(
     output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet")
 
     expected_ids = [*range(700, 831)]
-    assert_parquet_file_ids(output_file, "id", expected_ids, storage_options=storage_options)
+    assert_parquet_file_ids(output_file, "id", catalog.schema, expected_ids, storage_options=storage_options)
 
 
 @pytest.mark.dask
@@ -85,7 +85,7 @@ def test_catalog_import_read_from_cloud(
     output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet")
 
     expected_ids = [*range(700, 831)]
-    assert_parquet_file_ids(output_file, "id", expected_ids)
+    assert_parquet_file_ids(output_file, "id", catalog.schema, expected_ids)
 
 
 def test_read_csv_cloud(storage_options, small_sky_parts_dir_cloud):

From f67c412a988ca2675e0d0d2d6ecb258c9b8d16e8 Mon Sep 17 00:00:00 2001
From: Sandro Campos <scampos@andrew.cmu.edu>
Date: Wed, 31 Jul 2024 10:46:41 -0400
Subject: [PATCH 2/4] Revert changes to requirements file

---
 requirements.txt                              | 6 +++---
 tests/hipscat/io/test_write_metadata_cloud.py | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index df98186..b3c81f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-git+https://github.com/astronomy-commons/hipscat.git@add-arrow-schema-to-catalog
-git+https://github.com/astronomy-commons/hipscat-import.git@read-parquet-with-schema
-git+https://github.com/astronomy-commons/lsdb.git@add-arrow-schema
\ No newline at end of file
+git+https://github.com/astronomy-commons/hipscat.git@main
+git+https://github.com/astronomy-commons/hipscat-import.git@main
+git+https://github.com/astronomy-commons/lsdb.git@main
\ No newline at end of file
diff --git a/tests/hipscat/io/test_write_metadata_cloud.py b/tests/hipscat/io/test_write_metadata_cloud.py
index e516624..66b0b88 100644
--- a/tests/hipscat/io/test_write_metadata_cloud.py
+++ b/tests/hipscat/io/test_write_metadata_cloud.py
@@ -6,9 +6,11 @@
 import hipscat.pixel_math as hist
 import numpy.testing as npt
 import pyarrow as pa
+import pyarrow.parquet as pq
 import pytest
 from hipscat.catalog.catalog_info import CatalogInfo
 from hipscat.io import file_io
+from hipscat.io.file_io.file_pointer import get_fs
 from hipscat.io.parquet_metadata import write_parquet_metadata
 
 from hipscat_cloudtests import assert_text_file_matches
@@ -175,7 +177,8 @@ def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1,
 
     assert schema.equals(expected_schema, check_metadata=False)
 
-    parquet_file = file_io.read_parquet_file(file_pointer=file_name, storage_options=storage_options)
+    file_system, file_pointer = get_fs(file_name, storage_options=storage_options)
+    parquet_file = pq.ParquetFile(file_pointer, filesystem=file_system)
     assert parquet_file.metadata.num_row_groups == expected_num_row_groups
 
     for row_index in range(0, parquet_file.metadata.num_row_groups):

From 2bd031bc1b883d060040cde0e4dec6de7da89006 Mon Sep 17 00:00:00 2001
From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com>
Date: Wed, 14 Aug 2024 13:44:58 -0400
Subject: [PATCH 3/4] Address smoke test failures. (#35)

* Address smoke test failures.

* Remove unused import.
---
 .../hipscat/io/file_io/test_file_io_cloud.py  |  6 +--
 tests/hipscat_import/test_create_margin.py    | 47 +++++++++----------
 2 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/tests/hipscat/io/file_io/test_file_io_cloud.py b/tests/hipscat/io/file_io/test_file_io_cloud.py
index 55c40e4..91e7f58 100644
--- a/tests/hipscat/io/file_io/test_file_io_cloud.py
+++ b/tests/hipscat/io/file_io/test_file_io_cloud.py
@@ -6,8 +6,8 @@
     get_file_pointer_from_path,
     load_csv_to_pandas,
     load_json_file,
-    load_parquet_to_pandas,
     load_text_file,
+    read_parquet_file_to_pandas,
     write_dataframe_to_csv,
     write_string_to_file,
 )
@@ -37,14 +37,14 @@ def test_load_json(small_sky_dir_local, small_sky_dir_cloud, storage_options):
     assert json_dict_cloud == json_dict_local
 
 
-def test_load_parquet_to_pandas(
+def test_read_parquet_to_pandas(
     small_sky_catalog_cloud, small_sky_dir_local, small_sky_dir_cloud, storage_options
 ):
     pixel_data_path = pixel_catalog_file(small_sky_dir_local, 0, 11)
     pixel_data_path_cloud = pixel_catalog_file(small_sky_dir_cloud, 0, 11)
     parquet_df = pd.read_parquet(pixel_data_path)
     catalog_schema = small_sky_catalog_cloud.hc_structure.schema
-    loaded_df = load_parquet_to_pandas(
+    loaded_df = read_parquet_file_to_pandas(
         pixel_data_path_cloud, schema=catalog_schema, storage_options=storage_options
     )
     pd.testing.assert_frame_equal(parquet_df, loaded_df)
diff --git a/tests/hipscat_import/test_create_margin.py b/tests/hipscat_import/test_create_margin.py
index 57ba5be..b264fff 100644
--- a/tests/hipscat_import/test_create_margin.py
+++ b/tests/hipscat_import/test_create_margin.py
@@ -1,5 +1,4 @@
 import hipscat_import.margin_cache.margin_cache as mc
-import pytest
 from hipscat.catalog.healpix_dataset.healpix_dataset import HealpixDataset
 from hipscat_import.margin_cache.margin_cache_arguments import MarginCacheArguments
 
@@ -17,18 +16,17 @@ def test_margin_cache_gen(
     - local origin catalog.
     - writing to CLOUD.
     """
-    with pytest.warns(UserWarning, match="smaller resolution"):
-        args = MarginCacheArguments(
-            margin_threshold=7200.0,
-            input_catalog_path=small_sky_order1_dir_local,
-            output_path=tmp_cloud_path,
-            output_artifact_name="small_sky_order1_margin",
-            output_storage_options=storage_options,
-            dask_tmp=tmp_path,
-            tmp_dir=tmp_path,
-            margin_order=8,
-            progress_bar=False,
-        )
+    args = MarginCacheArguments(
+        input_catalog_path=small_sky_order1_dir_local,
+        output_path=tmp_cloud_path,
+        output_artifact_name="small_sky_order1_margin",
+        output_storage_options=storage_options,
+        dask_tmp=tmp_path,
+        tmp_dir=tmp_path,
+        margin_order=8,
+        fine_filtering=False,
+        progress_bar=False,
+    )
 
     assert args.catalog.catalog_info.ra_column == "ra"
 
@@ -51,18 +49,17 @@ def test_margin_cache_gen_read_from_cloud(
     - CLOUD origin catalog
     - writing to local tmp
     """
-    with pytest.warns(UserWarning, match="smaller resolution"):
-        args = MarginCacheArguments(
-            margin_threshold=7200.0,
-            input_catalog_path=small_sky_order1_dir_cloud,
-            input_storage_options=storage_options,
-            output_path=tmp_path,
-            output_artifact_name="small_sky_order1_margin",
-            dask_tmp=tmp_path,
-            tmp_dir=tmp_path,
-            margin_order=8,
-            progress_bar=False,
-        )
+    args = MarginCacheArguments(
+        input_catalog_path=small_sky_order1_dir_cloud,
+        input_storage_options=storage_options,
+        output_path=tmp_path,
+        output_artifact_name="small_sky_order1_margin",
+        dask_tmp=tmp_path,
+        tmp_dir=tmp_path,
+        margin_order=8,
+        fine_filtering=False,
+        progress_bar=False,
+    )
 
     assert args.catalog.catalog_info.ra_column == "ra"
 

From 35598b24994e5954ff2dd69dd6379943601aa492 Mon Sep 17 00:00:00 2001
From: Sandro Campos <scampos@andrew.cmu.edu>
Date: Tue, 27 Aug 2024 11:08:02 -0400
Subject: [PATCH 4/4] Create temp directory for each soap args

---
 tests/hipscat_import/test_run_soap.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/hipscat_import/test_run_soap.py b/tests/hipscat_import/test_run_soap.py
index 19c0033..f43c7df 100644
--- a/tests/hipscat_import/test_run_soap.py
+++ b/tests/hipscat_import/test_run_soap.py
@@ -10,7 +10,7 @@
 @pytest.mark.dask
 def test_object_to_self_write_to_cloud(
     dask_client,
-    tmp_path,
+    tmp_path_factory,
     tmp_cloud_path,
     small_sky_dir_local,
     small_sky_order1_dir_local,
@@ -35,7 +35,7 @@ def test_object_to_self_write_to_cloud(
         output_path=tmp_cloud_path,
         output_storage_options=storage_options,
         progress_bar=False,
-        tmp_dir=tmp_path,
+        tmp_dir=tmp_path_factory.mktemp("small_sky_order_to_order1"),
     )
     runner.run(small_sky_soap_args, dask_client)
 
@@ -66,7 +66,7 @@ def test_object_to_self_write_to_cloud(
         output_path=tmp_cloud_path,
         output_storage_options=storage_options,
         progress_bar=False,
-        tmp_dir=tmp_path,
+        tmp_dir=tmp_path_factory.mktemp("small_sky_to_order1_soft"),
     )
     runner.run(small_sky_soap_args, dask_client)