Skip to content

Commit

Permalink
Spruce up the way we check for original input files.
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed May 17, 2024
1 parent 06b8379 commit 48dc9ac
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/hipscat_import/pipeline_resume_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def check_original_input_paths(self, input_paths):
ValueError: if the retrieved file set differs from `input_paths`.
"""
unique_file_paths = set(input_paths)
unique_file_paths = [str(p) for p in unique_file_paths]

original_input_paths = []

Expand All @@ -181,7 +182,7 @@ def check_original_input_paths(self, input_paths):
with open(file_path, "r", encoding="utf-8") as file_handle:
contents = file_handle.readlines()
contents = [path.strip() for path in contents]
original_input_paths = set(contents)
original_input_paths = list(set(contents))
except FileNotFoundError:
pass

Expand Down
16 changes: 16 additions & 0 deletions tests/hipscat_import/test_pipeline_resume_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
from pathlib import Path

import numpy.testing as npt
import pytest

from hipscat_import.pipeline_resume_plan import PipelineResumePlan
Expand Down Expand Up @@ -135,3 +136,18 @@ def test_formatted_stage_name():

formatted = PipelineResumePlan.get_formatted_stage_name("very long stage name")
assert formatted == "Very long stage name"


def test_check_original_input_paths(tmp_path, mixed_schema_csv_dir):
plan = PipelineResumePlan(tmp_path=tmp_path, progress_bar=False, resume=False)

input_file_list = [
Path(mixed_schema_csv_dir) / "input_01.csv",
Path(mixed_schema_csv_dir) / "input_02.csv",
]

checked_files = plan.check_original_input_paths(input_file_list)

round_trip_files = plan.check_original_input_paths(checked_files)

npt.assert_array_equal(checked_files, round_trip_files)

0 comments on commit 48dc9ac

Please sign in to comment.