From 6092c3afa3c675f4bf96e0bff092afa784e7a238 Mon Sep 17 00:00:00 2001
From: Melissa DeLucchi <delucchi@andrew.cmu.edu>
Date: Mon, 10 Jun 2024 15:38:29 -0400
Subject: [PATCH 1/2] Use pathlib for pytest fixtures.

---
 .../catalog/test_argument_validation.py       |  2 +-
 .../catalog/test_file_readers.py              |  8 +--
 .../hipscat_import/catalog/test_map_reduce.py | 49 +++++++++--------
 .../hipscat_import/catalog/test_run_import.py | 50 ++++++------------
 .../catalog/test_run_round_trip.py            | 28 +++++-----
 .../catalog/test_sparse_histogram.py          |  4 +-
 tests/hipscat_import/conftest.py              | 52 ++++++++-----------
 .../index/test_index_map_reduce.py            | 16 +++---
 .../test_margin_cache_map_reduce.py           | 16 +++---
 .../soap/test_soap_map_reduce.py              | 31 +++++------
 .../test_pipeline_resume_plan.py              |  2 +-
 .../hipscat_import/test_runtime_arguments.py  |  6 +--
 .../test_verification_arguments.py            |  2 +-
 13 files changed, 120 insertions(+), 146 deletions(-)

diff --git a/tests/hipscat_import/catalog/test_argument_validation.py b/tests/hipscat_import/catalog/test_argument_validation.py
index 65625eec..ee81d07f 100644
--- a/tests/hipscat_import/catalog/test_argument_validation.py
+++ b/tests/hipscat_import/catalog/test_argument_validation.py
@@ -78,7 +78,7 @@ def test_good_paths(blank_data_dir, blank_data_file, tmp_path):
     )
     assert args.input_path == blank_data_dir
     assert len(args.input_paths) == 1
-    assert blank_data_file in args.input_paths[0]
+    assert str(blank_data_file) in args.input_paths[0]
 
 
 def test_multiple_files_in_path(small_sky_parts_dir, tmp_path):
diff --git a/tests/hipscat_import/catalog/test_file_readers.py b/tests/hipscat_import/catalog/test_file_readers.py
index 677b4a7d..a4a8bf4e 100644
--- a/tests/hipscat_import/catalog/test_file_readers.py
+++ b/tests/hipscat_import/catalog/test_file_readers.py
@@ -91,7 +91,7 @@ def test_csv_reader_parquet_metadata(small_sky_single_file, tmp_path):
             pa.field("dec_error", pa.float64()),
         ]
     )
-    schema_file = os.path.join(tmp_path, "metadata.parquet")
+    schema_file = tmp_path / "metadata.parquet"
     pq.write_metadata(
         small_sky_schema,
         schema_file,
@@ -187,7 +187,7 @@ def test_csv_reader_pipe_delimited(formats_pipe_csv, tmp_path):
             pa.field("numeric", pa.int64()),
         ]
     )
-    schema_file = os.path.join(tmp_path, "metadata.parquet")
+    schema_file = tmp_path / "metadata.parquet"
     pq.write_metadata(parquet_schema_types, schema_file)
 
     frame = next(
@@ -258,7 +258,7 @@ def test_parquet_reader_provenance_info(tmp_path, basic_catalog_info):
     """Test that we get some provenance info and it is parseable into JSON."""
     reader = ParquetReader(chunksize=1)
     provenance_info = reader.provenance_info()
-    catalog_base_dir = os.path.join(tmp_path, "test_catalog")
+    catalog_base_dir = tmp_path / "test_catalog"
     os.makedirs(catalog_base_dir)
     io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
 
@@ -309,6 +309,6 @@ def test_fits_reader_provenance_info(tmp_path, basic_catalog_info):
     """Test that we get some provenance info and it is parseable into JSON."""
     reader = FitsReader()
     provenance_info = reader.provenance_info()
-    catalog_base_dir = os.path.join(tmp_path, "test_catalog")
+    catalog_base_dir = tmp_path / "test_catalog"
     os.makedirs(catalog_base_dir)
     io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
diff --git a/tests/hipscat_import/catalog/test_map_reduce.py b/tests/hipscat_import/catalog/test_map_reduce.py
index 95a27347..64a8c1a5 100644
--- a/tests/hipscat_import/catalog/test_map_reduce.py
+++ b/tests/hipscat_import/catalog/test_map_reduce.py
@@ -20,7 +20,7 @@
 
 def pickle_file_reader(tmp_path, file_reader) -> str:
     """Utility method to pickle a file reader, and return path to pickle."""
-    pickled_reader_file = os.path.join(tmp_path, "reader.pickle")
+    pickled_reader_file = tmp_path / "reader.pickle"
     with open(pickled_reader_file, "wb") as pickle_file:
         pickle.dump(file_reader, pickle_file)
     return pickled_reader_file
@@ -86,14 +86,14 @@ def test_read_bad_fileformat(blank_data_file, capsys, tmp_path):
 
 def read_partial_histogram(tmp_path, mapping_key):
     """Helper to read in the former result of a map operation."""
-    histogram_file = os.path.join(tmp_path, "histograms", f"{mapping_key}.npz")
+    histogram_file = tmp_path / "histograms" / f"{mapping_key}.npz"
     hist = SparseHistogram.from_file(histogram_file)
     return hist.to_array()
 
 
 def test_read_single_fits(tmp_path, formats_fits):
     """Success case - fits file that exists being read as fits"""
-    os.makedirs(os.path.join(tmp_path, "histograms"))
+    os.makedirs(tmp_path / "histograms")
     mr.map_to_pixels(
         input_file=formats_fits,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("fits")),
@@ -127,7 +127,7 @@ def test_map_headers_wrong(formats_headers_csv, tmp_path):
 
 def test_map_headers(tmp_path, formats_headers_csv):
     """Test loading the a file with non-default headers"""
-    os.makedirs(os.path.join(tmp_path, "histograms"))
+    os.makedirs(tmp_path / "histograms")
     mr.map_to_pixels(
         input_file=formats_headers_csv,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -149,8 +149,8 @@ def test_map_headers(tmp_path, formats_headers_csv):
 
 
 def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file):
-    os.makedirs(os.path.join(tmp_path, "histograms"))
-    input_file = os.path.join(formats_dir, "hipscat_index.csv")
+    os.makedirs(tmp_path / "histograms")
+    input_file = formats_dir / "hipscat_index.csv"
     mr.map_to_pixels(
         input_file=input_file,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -183,8 +183,8 @@ def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file):
 
 def test_map_with_schema(tmp_path, mixed_schema_csv_dir, mixed_schema_csv_parquet):
     """Test loading the a file when using a parquet schema file for dtypes"""
-    os.makedirs(os.path.join(tmp_path, "histograms"))
-    input_file = os.path.join(mixed_schema_csv_dir, "input_01.csv")
+    os.makedirs(tmp_path / "histograms")
+    input_file = mixed_schema_csv_dir / "input_01.csv"
     mr.map_to_pixels(
         input_file=input_file,
         pickled_reader_file=pickle_file_reader(
@@ -213,7 +213,7 @@ def test_map_with_schema(tmp_path, mixed_schema_csv_dir, mixed_schema_csv_parque
 
 def test_map_small_sky_order0(tmp_path, small_sky_single_file):
     """Test loading the small sky catalog and partitioning each object into the same large bucket"""
-    os.makedirs(os.path.join(tmp_path, "histograms"))
+    os.makedirs(tmp_path / "histograms")
     mr.map_to_pixels(
         input_file=small_sky_single_file,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -239,7 +239,7 @@ def test_map_small_sky_part_order1(tmp_path, small_sky_file0):
     Test loading a small portion of the small sky catalog and
     partitioning objects into four smaller buckets
     """
-    os.makedirs(os.path.join(tmp_path, "histograms"))
+    os.makedirs(tmp_path / "histograms")
     mr.map_to_pixels(
         input_file=small_sky_file0,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -279,7 +279,6 @@ def test_split_pixels_bad_format(blank_data_file, tmp_path, capsys):
         )
     captured = capsys.readouterr()
     assert "No such file or directory" in captured.out
-    os.makedirs(os.path.join(tmp_path, "splitting"))
 
 
 def test_split_pixels_headers(formats_headers_csv, assert_parquet_file_ids, tmp_path):
@@ -300,17 +299,17 @@ def test_split_pixels_headers(formats_headers_csv, assert_parquet_file_ids, tmp_
         alignment_file=alignment_file,
     )
 
-    file_name = os.path.join(tmp_path, "order_0", "dir_0", "pixel_11", "shard_0_0.parquet")
+    file_name = tmp_path / "order_0" / "dir_0" / "pixel_11" / "shard_0_0.parquet"
     expected_ids = [*range(700, 708)]
     assert_parquet_file_ids(file_name, "object_id", expected_ids)
 
-    file_name = os.path.join(tmp_path, "order_0", "dir_0", "pixel_1", "shard_0_0.parquet")
+    file_name = tmp_path / "order_0" / "dir_0" / "pixel_1" / "shard_0_0.parquet"
     assert not os.path.exists(file_name)
 
 
 def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     """Test reducing into one large pixel"""
-    os.makedirs(os.path.join(tmp_path, "reducing"))
+    os.makedirs(tmp_path / "reducing")
     mr.reduce_pixel_shards(
         cache_shard_path=parquet_shards_dir,
         resume_path=tmp_path,
@@ -326,7 +325,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
         delete_input_files=False,
     )
 
-    output_file = os.path.join(tmp_path, "Norder=0", "Dir=0", "Npix=11.parquet")
+    output_file = tmp_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
 
     expected_ids = [*range(700, 831)]
     assert_parquet_file_ids(output_file, "id", expected_ids)
@@ -334,7 +333,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
 
 def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     """Test reducing with or without a _hipscat_index field"""
-    os.makedirs(os.path.join(tmp_path, "reducing"))
+    os.makedirs(tmp_path / "reducing")
     mr.reduce_pixel_shards(
         cache_shard_path=parquet_shards_dir,
         resume_path=tmp_path,
@@ -349,7 +348,7 @@ def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_p
         delete_input_files=False,
     )
 
-    output_file = os.path.join(tmp_path, "Norder=0", "Dir=0", "Npix=11.parquet")
+    output_file = tmp_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
 
     expected_ids = [*range(700, 831)]
     assert_parquet_file_ids(output_file, "id", expected_ids)
@@ -414,10 +413,10 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     First, we take some time to set up these silly data points, then we test out
     reducing them into a single parquet file using a mix of reduction options.
     """
-    os.makedirs(os.path.join(tmp_path, "reducing"))
-    shard_dir = os.path.join(tmp_path, "reduce_shards", "order_0", "dir_0", "pixel_11")
+    os.makedirs(tmp_path / "reducing")
+    shard_dir = tmp_path / "reduce_shards" / "order_0" / "dir_0" / "pixel_11"
     os.makedirs(shard_dir)
-    output_file = os.path.join(tmp_path, "Norder=0", "Dir=0", "Npix=11.parquet")
+    output_file = tmp_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
 
     file1_string = """source_id,object_id,time,ra,dec
 1200,700,3000,282.5,-58.5
@@ -427,7 +426,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
 1404,702,3200,310.5,-27.5
 1505,703,4000,286.5,-69.5"""
     file1_data = pd.read_csv(StringIO(file1_string))
-    file1_data.to_parquet(os.path.join(shard_dir, "file_1_shard_1.parquet"))
+    file1_data.to_parquet(shard_dir / "file_1_shard_1.parquet")
 
     file2_string = """source_id,object_id,time,ra,dec
 1206,700,2000,282.5,-58.5
@@ -435,7 +434,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
 1308,701,2100,299.5,-48.5
 1309,701,2000,299.5,-48.5"""
     file2_data = pd.read_csv(StringIO(file2_string))
-    file2_data.to_parquet(os.path.join(shard_dir, "file_2_shard_1.parquet"))
+    file2_data.to_parquet(shard_dir / "file_2_shard_1.parquet")
 
     combined_data = pd.concat([file1_data, file2_data])
     combined_data["norder19_healpix"] = hp.ang2pix(
@@ -452,7 +451,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     ## This will sort WITHIN an order 19 healpix pixel. In that ordering, the objects are
     ## (703, 700, 701, 702)
     mr.reduce_pixel_shards(
-        cache_shard_path=os.path.join(tmp_path, "reduce_shards"),
+        cache_shard_path=tmp_path / "reduce_shards",
         resume_path=tmp_path,
         reducing_key="0_11",
         destination_pixel_order=0,
@@ -489,7 +488,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     ######################## Sort option 2: by object id and time
     ## sort order is effectively (norder19 healpix, object id, time)
     mr.reduce_pixel_shards(
-        cache_shard_path=os.path.join(tmp_path, "reduce_shards"),
+        cache_shard_path=tmp_path / "reduce_shards",
         resume_path=tmp_path,
         reducing_key="0_11",
         destination_pixel_order=0,
@@ -526,7 +525,7 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     ## spatial properties for sorting, only numeric.
     ## sort order is effectively (object id, time)
     mr.reduce_pixel_shards(
-        cache_shard_path=os.path.join(tmp_path, "reduce_shards"),
+        cache_shard_path=tmp_path / "reduce_shards",
         resume_path=tmp_path,
         reducing_key="0_11",
         destination_pixel_order=0,
diff --git a/tests/hipscat_import/catalog/test_run_import.py b/tests/hipscat_import/catalog/test_run_import.py
index 587650c8..4821a0af 100644
--- a/tests/hipscat_import/catalog/test_run_import.py
+++ b/tests/hipscat_import/catalog/test_run_import.py
@@ -2,6 +2,7 @@
 
 import os
 import shutil
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -41,13 +42,10 @@ def test_resume_dask_runner(
     """Test execution in the presence of some resume files."""
     ## First, copy over our intermediate files.
     ## This prevents overwriting source-controlled resume files.
-    intermediate_dir = os.path.join(tmp_path, "resume_catalog", "intermediate")
-    shutil.copytree(
-        os.path.join(resume_dir, "intermediate"),
-        intermediate_dir,
-    )
+    intermediate_dir = tmp_path / "resume_catalog" / "intermediate"
+    shutil.copytree(resume_dir / "intermediate", intermediate_dir)
     ## Now set up our resume files to match previous work.
-    resume_tmp = os.path.join(tmp_path, "tmp", "resume_catalog")
+    resume_tmp = tmp_path / "tmp" / "resume_catalog"
     plan = ResumePlan(tmp_path=resume_tmp, progress_bar=False)
     histogram = SparseHistogram.make_from_counts([11], [131], 0)
     empty = SparseHistogram.make_empty(0)
@@ -63,10 +61,7 @@ def test_resume_dask_runner(
 
     ResumePlan.touch_key_done_file(resume_tmp, ResumePlan.REDUCING_STAGE, "0_11")
 
-    shutil.copytree(
-        os.path.join(resume_dir, "Norder=0"),
-        os.path.join(tmp_path, "resume_catalog", "Norder=0"),
-    )
+    shutil.copytree(resume_dir / "Norder=0", tmp_path / "resume_catalog" / "Norder=0")
 
     args = ImportArguments(
         output_artifact_name="resume_catalog",
@@ -75,7 +70,7 @@ def test_resume_dask_runner(
         output_path=tmp_path,
         dask_tmp=tmp_path,
         tmp_dir=tmp_path,
-        resume_tmp=os.path.join(tmp_path, "tmp"),
+        resume_tmp=tmp_path / "tmp",
         highest_healpix_order=0,
         pixel_threshold=1000,
         progress_bar=False,
@@ -93,17 +88,14 @@ def test_resume_dask_runner(
     assert len(catalog.get_healpix_pixels()) == 1
 
     # Check that the catalog parquet file exists and contains correct object IDs
-    output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet")
+    output_file = Path(args.catalog_path) / "Norder=0" / "Dir=0" / "Npix=11.parquet"
 
     expected_ids = [*range(700, 831)]
     assert_parquet_file_ids(output_file, "id", expected_ids)
 
     ## Re-running the pipeline with fully done intermediate files
     ## should result in no changes to output files.
-    shutil.copytree(
-        os.path.join(resume_dir, "intermediate"),
-        resume_tmp,
-    )
+    shutil.copytree(resume_dir / "intermediate", resume_tmp)
     plan = args.resume_plan
     plan.touch_stage_done_file(ResumePlan.MAPPING_STAGE)
     plan.touch_stage_done_file(ResumePlan.SPLITTING_STAGE)
@@ -145,25 +137,17 @@ def test_resume_dask_runner_diff_pixel_order(
     with the current HEALPix order."""
     ## First, copy over our intermediate files.
     ## This prevents overwriting source-controlled resume files.
-    intermediate_dir = os.path.join(tmp_path, "resume_catalog", "intermediate")
-    shutil.copytree(
-        os.path.join(resume_dir, "intermediate"),
-        intermediate_dir,
-    )
+    intermediate_dir = tmp_path / "resume_catalog" / "intermediate"
+    shutil.copytree(resume_dir / "intermediate", intermediate_dir)
 
     ## Now set up our resume files to match previous work.
-    resume_tmp = os.path.join(tmp_path, "tmp", "resume_catalog")
+    resume_tmp = tmp_path / "tmp" / "resume_catalog"
     ResumePlan(tmp_path=resume_tmp, progress_bar=False)
-    SparseHistogram.make_from_counts([11], [131], 0).to_file(
-        os.path.join(resume_tmp, "mapping_histogram.npz")
-    )
+    SparseHistogram.make_from_counts([11], [131], 0).to_file(resume_tmp / "mapping_histogram.npz")
     for file_index in range(0, 5):
         ResumePlan.touch_key_done_file(resume_tmp, ResumePlan.SPLITTING_STAGE, f"split_{file_index}")
 
-    shutil.copytree(
-        os.path.join(resume_dir, "Norder=0"),
-        os.path.join(tmp_path, "resume_catalog", "Norder=0"),
-    )
+    shutil.copytree(resume_dir / "Norder=0", tmp_path / "resume_catalog" / "Norder=0")
 
     with pytest.raises(ValueError, match="incompatible with the highest healpix order"):
         args = ImportArguments(
@@ -173,7 +157,7 @@ def test_resume_dask_runner_diff_pixel_order(
             output_path=tmp_path,
             dask_tmp=tmp_path,
             tmp_dir=tmp_path,
-            resume_tmp=os.path.join(tmp_path, "tmp"),
+            resume_tmp=tmp_path / "tmp",
             constant_healpix_order=1,
             pixel_threshold=1000,
             progress_bar=False,
@@ -188,7 +172,7 @@ def test_resume_dask_runner_diff_pixel_order(
         output_path=tmp_path,
         dask_tmp=tmp_path,
         tmp_dir=tmp_path,
-        resume_tmp=os.path.join(tmp_path, "tmp"),
+        resume_tmp=tmp_path / "tmp",
         constant_healpix_order=1,
         pixel_threshold=1000,
         progress_bar=False,
@@ -220,7 +204,7 @@ def test_resume_dask_runner_histograms_diff_size(
     tmp_path,
 ):
     """Tests that the pipeline errors if the partial histograms have different sizes."""
-    resume_tmp = os.path.join(tmp_path, "tmp", "resume_catalog")
+    resume_tmp = tmp_path / "tmp" / "resume_catalog"
     ResumePlan(tmp_path=resume_tmp, progress_bar=False)
 
     # We'll create mock partial histograms of size 0 and 2
@@ -246,7 +230,7 @@ def test_resume_dask_runner_histograms_diff_size(
             output_path=tmp_path,
             dask_tmp=tmp_path,
             tmp_dir=tmp_path,
-            resume_tmp=os.path.join(tmp_path, "tmp"),
+            resume_tmp=tmp_path / "tmp",
             constant_healpix_order=1,
             pixel_threshold=1000,
             progress_bar=False,
diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
index 0d3a1e1c..bb683286 100644
--- a/tests/hipscat_import/catalog/test_run_round_trip.py
+++ b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -79,8 +79,8 @@ def test_import_mixed_schema_csv(
             Path(mixed_schema_csv_dir) / "input_01.csv",
             Path(mixed_schema_csv_dir) / "input_02.csv",
         ],
-        output_path=Path(tmp_path),
-        dask_tmp=Path(tmp_path),
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
         highest_healpix_order=1,
         file_reader=get_file_reader(
             "csv",
@@ -260,7 +260,7 @@ def test_import_keep_intermediate_files(
     """Test that ALL intermediate files are still around on-disk after
     successful import, when setting the appropriate flags.
     """
-    temp = os.path.join(tmp_path, "intermediate_files")
+    temp = tmp_path / "intermediate_files"
     os.makedirs(temp)
     args = ImportArguments(
         output_artifact_name="small_sky_object_catalog",
@@ -282,7 +282,7 @@ def test_import_keep_intermediate_files(
     assert catalog.catalog_path == args.catalog_path
 
     ## Check that stage-level done files are still around.
-    base_intermediate_dir = os.path.join(temp, "small_sky_object_catalog", "intermediate")
+    base_intermediate_dir = temp / "small_sky_object_catalog" / "intermediate"
     expected_contents = [
         "alignment.pickle",
         "histograms",  # directory containing sub-histograms
@@ -298,21 +298,21 @@ def test_import_keep_intermediate_files(
     ]
     assert_directory_contains(base_intermediate_dir, expected_contents)
 
-    checking_dir = os.path.join(base_intermediate_dir, "histograms")
+    checking_dir = base_intermediate_dir / "histograms"
     assert_directory_contains(
         checking_dir, ["map_0.npz", "map_1.npz", "map_2.npz", "map_3.npz", "map_4.npz", "map_5.npz"]
     )
-    checking_dir = os.path.join(base_intermediate_dir, "splitting")
+    checking_dir = base_intermediate_dir / "splitting"
     assert_directory_contains(
         checking_dir,
         ["split_0_done", "split_1_done", "split_2_done", "split_3_done", "split_4_done", "split_5_done"],
     )
 
-    checking_dir = os.path.join(base_intermediate_dir, "reducing")
+    checking_dir = base_intermediate_dir / "reducing"
     assert_directory_contains(checking_dir, ["0_11_done"])
 
     # Check that all of the intermediate parquet shards are still around.
-    checking_dir = os.path.join(base_intermediate_dir, "order_0", "dir_0", "pixel_11")
+    checking_dir = base_intermediate_dir / "order_0" / "dir_0" / "pixel_11"
     assert_directory_contains(
         checking_dir,
         [
@@ -424,7 +424,7 @@ def test_import_hipscat_index(
     ## First, let's just check the assumptions we have about our input file:
     ## - should have _hipscat_index as the indexed column
     ## - should NOT have any columns like "ra" or "dec"
-    input_file = os.path.join(formats_dir, "hipscat_index.parquet")
+    input_file = formats_dir / "hipscat_index.parquet"
 
     expected_ids = [*range(700, 831)]
     assert_parquet_file_ids(input_file, "id", expected_ids)
@@ -475,7 +475,7 @@ def test_import_hipscat_index_no_pandas(
     tmp_path,
 ):
     """Test basic execution, using a previously-computed _hipscat_index column for spatial partitioning."""
-    input_file = os.path.join(formats_dir, "hipscat_index.csv")
+    input_file = formats_dir / "hipscat_index.csv"
     args = ImportArguments(
         output_artifact_name="using_hipscat_index",
         input_file_list=[input_file],
@@ -517,8 +517,8 @@ def test_import_gaia_minimum(
     tmp_path,
 ):
     """Test end-to-end import, using a representative chunk of gaia data."""
-    input_file = os.path.join(formats_dir, "gaia_minimum.csv")
-    schema_file = os.path.join(formats_dir, "gaia_minimum_schema.parquet")
+    input_file = formats_dir / "gaia_minimum.csv"
+    schema_file = formats_dir / "gaia_minimum_schema.parquet"
 
     args = ImportArguments(
         output_artifact_name="gaia_minimum",
@@ -569,7 +569,7 @@ def test_gaia_ecsv(
     tmp_path,
     assert_parquet_file_ids,
 ):
-    input_file = os.path.join(formats_dir, "gaia_epoch.ecsv")
+    input_file = formats_dir / "gaia_epoch.ecsv"
 
     args = ImportArguments(
         output_artifact_name="gaia_e_astropy",
@@ -661,7 +661,7 @@ def test_gaia_ecsv(
     # In-memory schema uses list<item> naming convention, but pyarrow converts to
     # the parquet-compliant list<element> convention when writing to disk.
     # Round trip the schema to get a schema with compliant nested naming convention.
-    schema_path = os.path.join(tmp_path, "temp_schema.parquet")
+    schema_path = tmp_path / "temp_schema.parquet"
     pq.write_table(expected_parquet_schema.empty_table(), where=schema_path)
     expected_parquet_schema = pq.read_metadata(schema_path).schema.to_arrow_schema()
 
diff --git a/tests/hipscat_import/catalog/test_sparse_histogram.py b/tests/hipscat_import/catalog/test_sparse_histogram.py
index 52e22164..57ce78f2 100644
--- a/tests/hipscat_import/catalog/test_sparse_histogram.py
+++ b/tests/hipscat_import/catalog/test_sparse_histogram.py
@@ -1,7 +1,5 @@
 """Test sparse histogram behavior."""
 
-import os
-
 import numpy as np
 import numpy.testing as npt
 import pytest
@@ -12,7 +10,7 @@
 
 def test_read_write_round_trip(tmp_path):
     """Test that we can read what we write into a histogram file."""
-    file_name = os.path.join(tmp_path, "round_trip.npz")
+    file_name = tmp_path / "round_trip.npz"
     histogram = SparseHistogram.make_from_counts([11], [131], 0)
     histogram.to_file(file_name)
 
diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py
index 303144bc..7ef2fc20 100644
--- a/tests/hipscat_import/conftest.py
+++ b/tests/hipscat_import/conftest.py
@@ -2,6 +2,7 @@
 
 import os
 import re
+from pathlib import Path
 
 import healpy as hp
 import numpy as np
@@ -52,119 +53,112 @@ def test_long_running():
 
 @pytest.fixture
 def test_data_dir():
-    return os.path.join(TEST_DIR, "data")
+    return Path(TEST_DIR) / "data"
 
 
 @pytest.fixture
 def small_sky_dir(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky")
+    return test_data_dir / "small_sky"
 
 
 @pytest.fixture
 def small_sky_single_file(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky", "catalog.csv")
+    return test_data_dir / "small_sky" / "catalog.csv"
 
 
 @pytest.fixture
 def small_sky_object_catalog(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky_object_catalog")
+    return test_data_dir / "small_sky_object_catalog"
 
 
 @pytest.fixture
 def small_sky_source_dir(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky_source")
+    return test_data_dir / "small_sky_source"
 
 
 @pytest.fixture
 def small_sky_source_catalog(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky_source_catalog")
+    return test_data_dir / "small_sky_source_catalog"
 
 
 @pytest.fixture
 def blank_data_dir(test_data_dir):
-    return os.path.join(test_data_dir, "blank")
+    return test_data_dir / "blank"
 
 
 @pytest.fixture
 def blank_data_file(test_data_dir):
-    return os.path.join(test_data_dir, "blank", "blank.csv")
+    return test_data_dir / "blank" / "blank.csv"
 
 
 @pytest.fixture
 def empty_data_dir(test_data_dir):
-    return os.path.join(test_data_dir, "empty")
+    return test_data_dir / "empty"
 
 
 @pytest.fixture
 def formats_dir(test_data_dir):
-    return os.path.join(test_data_dir, "test_formats")
+    return test_data_dir / "test_formats"
 
 
 @pytest.fixture
 def formats_headers_csv(test_data_dir):
-    return os.path.join(test_data_dir, "test_formats", "headers.csv")
+    return test_data_dir / "test_formats" / "headers.csv"
 
 
 @pytest.fixture
 def formats_pipe_csv(test_data_dir):
-    return os.path.join(test_data_dir, "test_formats", "pipe_delimited.csv")
+    return test_data_dir / "test_formats" / "pipe_delimited.csv"
 
 
 @pytest.fixture
 def formats_fits(test_data_dir):
-    return os.path.join(test_data_dir, "test_formats", "small_sky.fits")
+    return test_data_dir / "test_formats" / "small_sky.fits"
 
 
 @pytest.fixture
 def formats_pandasindex(test_data_dir):
-    return os.path.join(test_data_dir, "test_formats", "pandasindex.parquet")
+    return test_data_dir / "test_formats" / "pandasindex.parquet"
 
 
 @pytest.fixture
 def small_sky_parts_dir(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky_parts")
+    return test_data_dir / "small_sky_parts"
 
 
 @pytest.fixture
 def small_sky_file0(test_data_dir):
-    return os.path.join(test_data_dir, "small_sky_parts", "catalog_00_of_05.csv")
+    return test_data_dir / "small_sky_parts" / "catalog_00_of_05.csv"
 
 
 @pytest.fixture
 def parquet_shards_dir(test_data_dir):
-    return os.path.join(test_data_dir, "parquet_shards")
+    return test_data_dir / "parquet_shards"
 
 
 @pytest.fixture
 def soap_intermediate_dir(test_data_dir):
-    return os.path.join(test_data_dir, "soap_intermediate")
+    return test_data_dir / "soap_intermediate"
 
 
 @pytest.fixture
 def parquet_shards_shard_44_0(test_data_dir):
-    return os.path.join(
-        test_data_dir,
-        "parquet_shards",
-        "order_1",
-        "dir_0",
-        "pixel_44",
-        "shard_3_0.parquet",
-    )
+    return test_data_dir / "parquet_shards" / "order_1" / "dir_0" / "pixel_44" / "shard_3_0.parquet"
 
 
 @pytest.fixture
 def mixed_schema_csv_dir(test_data_dir):
-    return os.path.join(test_data_dir, "mixed_schema")
+    return test_data_dir / "mixed_schema"
 
 
 @pytest.fixture
 def mixed_schema_csv_parquet(test_data_dir):
-    return os.path.join(test_data_dir, "mixed_schema", "schema.parquet")
+    return test_data_dir / "mixed_schema" / "schema.parquet"
 
 
 @pytest.fixture
 def resume_dir(test_data_dir):
-    return os.path.join(test_data_dir, "resume")
+    return test_data_dir / "resume"
 
 
 @pytest.fixture
diff --git a/tests/hipscat_import/index/test_index_map_reduce.py b/tests/hipscat_import/index/test_index_map_reduce.py
index 0dbba81d..025d0f02 100644
--- a/tests/hipscat_import/index/test_index_map_reduce.py
+++ b/tests/hipscat_import/index/test_index_map_reduce.py
@@ -1,7 +1,5 @@
 """Tests of map reduce operations"""
 
-import os
-
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
@@ -28,7 +26,7 @@ def test_create_index(
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_object_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_object_index" / "index" / "part.0.parquet"
 
     expected_ids = [*range(700, 831)]
     assert_parquet_file_index(output_file, expected_ids)
@@ -55,7 +53,7 @@ def test_create_index_no_hipscat_index(small_sky_object_catalog, tmp_path, dask_
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_object_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_object_index" / "index" / "part.0.parquet"
 
     data_frame = pd.read_parquet(output_file, engine="pyarrow")
     npt.assert_array_equal(data_frame.columns, ["Norder", "Dir", "Npix"])
@@ -76,7 +74,7 @@ def test_create_index_no_order_pixel(small_sky_object_catalog, tmp_path, dask_cl
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_object_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_object_index" / "index" / "part.0.parquet"
 
     data_frame = pd.read_parquet(output_file, engine="pyarrow")
     npt.assert_array_equal(data_frame.columns, ["_hipscat_index"])
@@ -95,7 +93,7 @@ def test_create_index_source(small_sky_source_catalog, assert_parquet_file_index
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_source_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_source_index" / "index" / "part.0.parquet"
 
     expected_ids = [*range(70_000, 87_161)]
     assert_parquet_file_index(output_file, expected_ids)
@@ -134,7 +132,7 @@ def test_create_index_with_divisions(
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_source_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_source_index" / "index" / "part.0.parquet"
 
     expected_ids = [*range(70_000, 87_161)]
     assert_parquet_file_index(output_file, expected_ids)
@@ -167,7 +165,7 @@ def test_create_index_source_by_object(
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_source_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_source_index" / "index" / "part.0.parquet"
 
     expected_ids = np.repeat([*range(700, 831)], 131)
     assert_parquet_file_index(output_file, expected_ids)
@@ -199,7 +197,7 @@ def test_create_index_extra_columns(
     )
     mr.create_index(args, dask_client)
 
-    output_file = os.path.join(tmp_path, "small_sky_source_index", "index", "part.0.parquet")
+    output_file = tmp_path / "small_sky_source_index" / "index" / "part.0.parquet"
 
     expected_ids = np.repeat([*range(700, 831)], 131)
     assert_parquet_file_index(output_file, expected_ids)
diff --git a/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py b/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py
index c6332c21..72b93dbc 100644
--- a/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py
+++ b/tests/hipscat_import/margin_cache/test_margin_cache_map_reduce.py
@@ -46,7 +46,7 @@ def test_to_pixel_shard_equator(tmp_path, basic_data_shard_df):
         dec_column="weird_dec",
     )
 
-    path = os.path.join(tmp_path, "order_1", "dir_0", "pixel_21", "Norder=1", "Dir=0", "Npix=0.parquet")
+    path = tmp_path / "order_1" / "dir_0" / "pixel_21" / "Norder=1" / "Dir=0" / "Npix=0.parquet"
 
     assert os.path.exists(path)
 
@@ -63,7 +63,7 @@ def test_to_pixel_shard_polar(tmp_path, polar_data_shard_df):
         dec_column="weird_dec",
     )
 
-    path = os.path.join(tmp_path, "order_2", "dir_0", "pixel_15", "Norder=2", "Dir=0", "Npix=0.parquet")
+    path = tmp_path / "order_2" / "dir_0" / "pixel_15" / "Norder=2" / "Dir=0" / "Npix=0.parquet"
 
     assert os.path.exists(path)
 
@@ -92,12 +92,12 @@ def test_map_pixel_shards_error(tmp_path, capsys):
 
 
 def test_reduce_margin_shards(tmp_path):
-    intermediate_dir = os.path.join(tmp_path, "intermediate")
+    intermediate_dir = tmp_path / "intermediate"
     partition_dir = get_pixel_cache_directory(intermediate_dir, HealpixPixel(1, 21))
     shard_dir = paths.pixel_directory(partition_dir, 1, 21)
 
     os.makedirs(shard_dir)
-    os.makedirs(os.path.join(intermediate_dir, "reducing"))
+    os.makedirs(intermediate_dir / "reducing")
 
     first_shard_path = paths.pixel_catalog_file(partition_dir, 1, 0)
     second_shard_path = paths.pixel_catalog_file(partition_dir, 1, 1)
@@ -128,7 +128,7 @@ def test_reduce_margin_shards(tmp_path):
     )
 
     # Create a schema parquet file.
-    schema_path = os.path.join(tmp_path, "metadata.parquet")
+    schema_path = tmp_path / "metadata.parquet"
     schema_df = test_df.drop(columns=["margin_Norder", "margin_Dir", "margin_Npix"])
     schema_df.to_parquet(schema_path)
 
@@ -176,14 +176,14 @@ def test_reduce_margin_shards(tmp_path):
 def test_reduce_margin_shards_error(tmp_path, basic_data_shard_df, capsys):
     """Test error behavior on reduce stage. e.g. by not creating the original
     catalog metadata."""
-    intermediate_dir = os.path.join(tmp_path, "intermediate")
+    intermediate_dir = tmp_path / "intermediate"
     partition_dir = get_pixel_cache_directory(intermediate_dir, HealpixPixel(1, 21))
     shard_dir = paths.pixel_directory(partition_dir, 1, 21)
     os.makedirs(shard_dir)
-    os.makedirs(os.path.join(intermediate_dir, "reducing"))
+    os.makedirs(intermediate_dir / "reducing")
 
     # Don't write anything at the metadata path!
-    schema_path = os.path.join(tmp_path, "metadata.parquet")
+    schema_path = tmp_path / "metadata.parquet"
 
     basic_data_shard_df.to_parquet(paths.pixel_catalog_file(partition_dir, 1, 0))
     basic_data_shard_df.to_parquet(paths.pixel_catalog_file(partition_dir, 1, 1))
diff --git a/tests/hipscat_import/soap/test_soap_map_reduce.py b/tests/hipscat_import/soap/test_soap_map_reduce.py
index b95745e7..da53f10e 100644
--- a/tests/hipscat_import/soap/test_soap_map_reduce.py
+++ b/tests/hipscat_import/soap/test_soap_map_reduce.py
@@ -2,6 +2,7 @@
 
 import os
 import shutil
+from pathlib import Path
 
 import numpy.testing as npt
 import pandas as pd
@@ -19,9 +20,7 @@ def test_count_joins(small_sky_soap_args, tmp_path, small_sky_soap_maps):
         count_joins(small_sky_soap_args, source, objects)
 
         result = pd.read_csv(
-            os.path.join(
-                tmp_path, "small_sky_association", "intermediate", f"{source.order}_{source.pixel}.csv"
-            )
+            tmp_path / "small_sky_association" / "intermediate" / f"{source.order}_{source.pixel}.csv"
         )
         assert len(result) == 1
         assert result["num_rows"].sum() > 0
@@ -32,16 +31,20 @@ def test_count_joins_with_leaf(small_sky_soap_args, small_sky_soap_maps):
     small_sky_soap_args.write_leaf_files = True
     small_sky_soap_args.source_id_column = "source_id"
 
-    intermediate_dir = small_sky_soap_args.tmp_path
+    intermediate_dir = Path(small_sky_soap_args.tmp_path)
     for source, objects in small_sky_soap_maps.items():
         count_joins(small_sky_soap_args, source, objects)
 
-        result = pd.read_csv(os.path.join(intermediate_dir, f"{source.order}_{source.pixel}.csv"))
+        result = pd.read_csv(intermediate_dir / f"{source.order}_{source.pixel}.csv")
         assert len(result) == 1
         assert result["num_rows"].sum() > 0
 
-        parquet_file_name = os.path.join(
-            intermediate_dir, "order_0", "dir_0", "pixel_11", f"source_{source.order}_{source.pixel}.parquet"
+        parquet_file_name = (
+            intermediate_dir
+            / "order_0"
+            / "dir_0"
+            / "pixel_11"
+            / f"source_{source.order}_{source.pixel}.parquet"
         )
         assert os.path.exists(parquet_file_name), f"file not found [{parquet_file_name}]"
 
@@ -69,9 +72,7 @@ def test_count_joins_missing(small_sky_source_catalog, tmp_path):
     source = HealpixPixel(2, 176)
     count_joins(args, source, [HealpixPixel(2, 177), HealpixPixel(2, 178)])
 
-    result_csv = os.path.join(
-        tmp_path, "small_sky_association", "intermediate", f"{source.order}_{source.pixel}.csv"
-    )
+    result_csv = tmp_path / "small_sky_association" / "intermediate" / f"{source.order}_{source.pixel}.csv"
 
     result = pd.read_csv(result_csv)
     assert len(result) == 3
@@ -91,10 +92,10 @@ def test_count_joins_missing(small_sky_source_catalog, tmp_path):
 
 def test_combine_results(tmp_path):
     """Test combining many CSVs into a single one"""
-    input_path = os.path.join(tmp_path, "input")
+    input_path = tmp_path / "input"
     os.makedirs(input_path, exist_ok=True)
 
-    output_path = os.path.join(tmp_path, "output")
+    output_path = tmp_path / "output"
     os.makedirs(output_path, exist_ok=True)
 
     join_info = pd.DataFrame(
@@ -113,16 +114,16 @@ def test_combine_results(tmp_path):
             "num_rows",
         ],
     )
-    partitions_csv_file = os.path.join(input_path, "0_11.csv")
+    partitions_csv_file = input_path / "0_11.csv"
     join_info.to_csv(partitions_csv_file, index=False)
 
     total_num_rows = combine_partial_results(input_path, output_path, None)
     assert total_num_rows == 131
 
-    result = pd.read_csv(os.path.join(output_path, "partition_join_info.csv"))
+    result = pd.read_csv(output_path / "partition_join_info.csv")
     assert len(result) == 2
 
-    result = pd.read_csv(os.path.join(output_path, "unmatched_sources.csv"))
+    result = pd.read_csv(output_path / "unmatched_sources.csv")
     assert len(result) == 1
 
 
diff --git a/tests/hipscat_import/test_pipeline_resume_plan.py b/tests/hipscat_import/test_pipeline_resume_plan.py
index b694f33c..0fa80f5d 100644
--- a/tests/hipscat_import/test_pipeline_resume_plan.py
+++ b/tests/hipscat_import/test_pipeline_resume_plan.py
@@ -13,7 +13,7 @@ def test_done_key(tmp_path):
     """Verify expected behavior of marking stage progress via done files."""
     plan = PipelineResumePlan(tmp_path=tmp_path, progress_bar=False)
     stage = "testing"
-    os.makedirs(os.path.join(tmp_path, stage))
+    os.makedirs(tmp_path / stage)
 
     keys = plan.read_done_keys(stage)
     assert len(keys) == 0
diff --git a/tests/hipscat_import/test_runtime_arguments.py b/tests/hipscat_import/test_runtime_arguments.py
index 69aded26..30912459 100644
--- a/tests/hipscat_import/test_runtime_arguments.py
+++ b/tests/hipscat_import/test_runtime_arguments.py
@@ -70,9 +70,9 @@ def test_good_paths(tmp_path):
 
 def test_tmp_path_creation(tmp_path):
     """Check that we create a new temp path for this catalog."""
-    output_path = os.path.join(tmp_path, "unique_output_directory")
-    temp_path = os.path.join(tmp_path, "unique_tmp_directory")
-    dask_tmp_path = os.path.join(tmp_path, "unique_dask_directory")
+    output_path = tmp_path / "unique_output_directory"
+    temp_path = tmp_path / "unique_tmp_directory"
+    dask_tmp_path = tmp_path / "unique_dask_directory"
     os.makedirs(output_path, exist_ok=True)
     os.makedirs(temp_path, exist_ok=True)
     os.makedirs(dask_tmp_path, exist_ok=True)
diff --git a/tests/hipscat_import/verification/test_verification_arguments.py b/tests/hipscat_import/verification/test_verification_arguments.py
index 8ebd6c81..303a63f7 100644
--- a/tests/hipscat_import/verification/test_verification_arguments.py
+++ b/tests/hipscat_import/verification/test_verification_arguments.py
@@ -62,7 +62,7 @@ def test_catalog_object(tmp_path, small_sky_object_catalog):
         output_path=tmp_path,
         output_artifact_name="small_sky_object_verification_report",
     )
-    assert args.input_catalog_path == small_sky_object_catalog
+    assert args.input_catalog_path == str(small_sky_object_catalog)
     assert str(args.output_path) == tmp_path_str
     assert str(args.tmp_path).startswith(tmp_path_str)
 

From 4439aca929afb7cfe9957b55a6e209a8a9dfb793 Mon Sep 17 00:00:00 2001
From: Melissa DeLucchi <delucchi@andrew.cmu.edu>
Date: Tue, 11 Jun 2024 09:24:12 -0400
Subject: [PATCH 2/2] Use Path.mkdirs

---
 .../catalog/test_file_readers.py              |  8 +++-----
 .../hipscat_import/catalog/test_map_reduce.py | 20 +++++++++----------
 .../catalog/test_run_round_trip.py            |  2 +-
 .../soap/test_soap_map_reduce.py              |  4 ++--
 .../test_pipeline_resume_plan.py              |  3 +--
 .../hipscat_import/test_runtime_arguments.py  |  8 +++-----
 6 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/tests/hipscat_import/catalog/test_file_readers.py b/tests/hipscat_import/catalog/test_file_readers.py
index a4a8bf4e..fbd056cc 100644
--- a/tests/hipscat_import/catalog/test_file_readers.py
+++ b/tests/hipscat_import/catalog/test_file_readers.py
@@ -1,7 +1,5 @@
 """Test dataframe-generating file readers"""
 
-import os
-
 import hipscat.io.write_metadata as io
 import numpy as np
 import pandas as pd
@@ -224,7 +222,7 @@ def test_csv_reader_provenance_info(tmp_path, basic_catalog_info):
     )
     provenance_info = reader.provenance_info()
     catalog_base_dir = tmp_path / "test_catalog"
-    os.makedirs(catalog_base_dir)
+    catalog_base_dir.mkdir(parents=True)
     io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
 
     with open(catalog_base_dir / "provenance_info.json", "r", encoding="utf-8") as file:
@@ -259,7 +257,7 @@ def test_parquet_reader_provenance_info(tmp_path, basic_catalog_info):
     reader = ParquetReader(chunksize=1)
     provenance_info = reader.provenance_info()
     catalog_base_dir = tmp_path / "test_catalog"
-    os.makedirs(catalog_base_dir)
+    catalog_base_dir.mkdir(parents=True)
     io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
 
 
@@ -310,5 +308,5 @@ def test_fits_reader_provenance_info(tmp_path, basic_catalog_info):
     reader = FitsReader()
     provenance_info = reader.provenance_info()
     catalog_base_dir = tmp_path / "test_catalog"
-    os.makedirs(catalog_base_dir)
+    catalog_base_dir.mkdir(parents=True)
     io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)
diff --git a/tests/hipscat_import/catalog/test_map_reduce.py b/tests/hipscat_import/catalog/test_map_reduce.py
index 64a8c1a5..34c72147 100644
--- a/tests/hipscat_import/catalog/test_map_reduce.py
+++ b/tests/hipscat_import/catalog/test_map_reduce.py
@@ -93,7 +93,7 @@ def read_partial_histogram(tmp_path, mapping_key):
 
 def test_read_single_fits(tmp_path, formats_fits):
     """Success case - fits file that exists being read as fits"""
-    os.makedirs(tmp_path / "histograms")
+    (tmp_path / "histograms").mkdir(parents=True)
     mr.map_to_pixels(
         input_file=formats_fits,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("fits")),
@@ -127,7 +127,7 @@ def test_map_headers_wrong(formats_headers_csv, tmp_path):
 
 def test_map_headers(tmp_path, formats_headers_csv):
     """Test loading the a file with non-default headers"""
-    os.makedirs(tmp_path / "histograms")
+    (tmp_path / "histograms").mkdir(parents=True)
     mr.map_to_pixels(
         input_file=formats_headers_csv,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -149,7 +149,7 @@ def test_map_headers(tmp_path, formats_headers_csv):
 
 
 def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file):
-    os.makedirs(tmp_path / "histograms")
+    (tmp_path / "histograms").mkdir(parents=True)
     input_file = formats_dir / "hipscat_index.csv"
     mr.map_to_pixels(
         input_file=input_file,
@@ -183,7 +183,7 @@ def test_map_with_hipscat_index(tmp_path, formats_dir, small_sky_single_file):
 
 def test_map_with_schema(tmp_path, mixed_schema_csv_dir, mixed_schema_csv_parquet):
     """Test loading the a file when using a parquet schema file for dtypes"""
-    os.makedirs(tmp_path / "histograms")
+    (tmp_path / "histograms").mkdir(parents=True)
     input_file = mixed_schema_csv_dir / "input_01.csv"
     mr.map_to_pixels(
         input_file=input_file,
@@ -213,7 +213,7 @@ def test_map_with_schema(tmp_path, mixed_schema_csv_dir, mixed_schema_csv_parque
 
 def test_map_small_sky_order0(tmp_path, small_sky_single_file):
     """Test loading the small sky catalog and partitioning each object into the same large bucket"""
-    os.makedirs(tmp_path / "histograms")
+    (tmp_path / "histograms").mkdir(parents=True)
     mr.map_to_pixels(
         input_file=small_sky_single_file,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -239,7 +239,7 @@ def test_map_small_sky_part_order1(tmp_path, small_sky_file0):
     Test loading a small portion of the small sky catalog and
     partitioning objects into four smaller buckets
     """
-    os.makedirs(tmp_path / "histograms")
+    (tmp_path / "histograms").mkdir(parents=True)
     mr.map_to_pixels(
         input_file=small_sky_file0,
         pickled_reader_file=pickle_file_reader(tmp_path, get_file_reader("csv")),
@@ -309,7 +309,7 @@ def test_split_pixels_headers(formats_headers_csv, assert_parquet_file_ids, tmp_
 
 def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     """Test reducing into one large pixel"""
-    os.makedirs(tmp_path / "reducing")
+    (tmp_path / "reducing").mkdir(parents=True)
     mr.reduce_pixel_shards(
         cache_shard_path=parquet_shards_dir,
         resume_path=tmp_path,
@@ -333,7 +333,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
 
 def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     """Test reducing with or without a _hipscat_index field"""
-    os.makedirs(tmp_path / "reducing")
+    (tmp_path / "reducing").mkdir(parents=True)
     mr.reduce_pixel_shards(
         cache_shard_path=parquet_shards_dir,
         resume_path=tmp_path,
@@ -413,9 +413,9 @@ def test_reduce_with_sorting_complex(assert_parquet_file_ids, tmp_path):
     First, we take some time to set up these silly data points, then we test out
     reducing them into a single parquet file using a mix of reduction options.
     """
-    os.makedirs(tmp_path / "reducing")
+    (tmp_path / "reducing").mkdir(parents=True)
     shard_dir = tmp_path / "reduce_shards" / "order_0" / "dir_0" / "pixel_11"
-    os.makedirs(shard_dir)
+    shard_dir.mkdir(parents=True)
     output_file = tmp_path / "Norder=0" / "Dir=0" / "Npix=11.parquet"
 
     file1_string = """source_id,object_id,time,ra,dec
diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
index bb683286..d9862067 100644
--- a/tests/hipscat_import/catalog/test_run_round_trip.py
+++ b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -261,7 +261,7 @@ def test_import_keep_intermediate_files(
     successful import, when setting the appropriate flags.
     """
     temp = tmp_path / "intermediate_files"
-    os.makedirs(temp)
+    temp.mkdir(parents=True)
     args = ImportArguments(
         output_artifact_name="small_sky_object_catalog",
         input_path=small_sky_parts_dir,
diff --git a/tests/hipscat_import/soap/test_soap_map_reduce.py b/tests/hipscat_import/soap/test_soap_map_reduce.py
index da53f10e..ab88f176 100644
--- a/tests/hipscat_import/soap/test_soap_map_reduce.py
+++ b/tests/hipscat_import/soap/test_soap_map_reduce.py
@@ -93,10 +93,10 @@ def test_count_joins_missing(small_sky_source_catalog, tmp_path):
 def test_combine_results(tmp_path):
     """Test combining many CSVs into a single one"""
     input_path = tmp_path / "input"
-    os.makedirs(input_path, exist_ok=True)
+    input_path.mkdir(parents=True)
 
     output_path = tmp_path / "output"
-    os.makedirs(output_path, exist_ok=True)
+    output_path.mkdir(parents=True)
 
     join_info = pd.DataFrame(
         data=[
diff --git a/tests/hipscat_import/test_pipeline_resume_plan.py b/tests/hipscat_import/test_pipeline_resume_plan.py
index 0fa80f5d..c5e0b77c 100644
--- a/tests/hipscat_import/test_pipeline_resume_plan.py
+++ b/tests/hipscat_import/test_pipeline_resume_plan.py
@@ -1,6 +1,5 @@
 """Test resume file operations"""
 
-import os
 from pathlib import Path
 
 import numpy.testing as npt
@@ -13,7 +12,7 @@ def test_done_key(tmp_path):
     """Verify expected behavior of marking stage progress via done files."""
     plan = PipelineResumePlan(tmp_path=tmp_path, progress_bar=False)
     stage = "testing"
-    os.makedirs(tmp_path / stage)
+    (tmp_path / stage).mkdir(parents=True)
 
     keys = plan.read_done_keys(stage)
     assert len(keys) == 0
diff --git a/tests/hipscat_import/test_runtime_arguments.py b/tests/hipscat_import/test_runtime_arguments.py
index 30912459..cea801cc 100644
--- a/tests/hipscat_import/test_runtime_arguments.py
+++ b/tests/hipscat_import/test_runtime_arguments.py
@@ -1,7 +1,5 @@
 """Tests of argument validation"""
 
-import os
-
 import pytest
 
 from hipscat_import.runtime_arguments import RuntimeArguments
@@ -73,9 +71,9 @@ def test_tmp_path_creation(tmp_path):
     output_path = tmp_path / "unique_output_directory"
     temp_path = tmp_path / "unique_tmp_directory"
     dask_tmp_path = tmp_path / "unique_dask_directory"
-    os.makedirs(output_path, exist_ok=True)
-    os.makedirs(temp_path, exist_ok=True)
-    os.makedirs(dask_tmp_path, exist_ok=True)
+    output_path.mkdir(parents=True)
+    temp_path.mkdir(parents=True)
+    dask_tmp_path.mkdir(parents=True)
 
     ## If no tmp paths are given, use the output directory
     args = RuntimeArguments(