Skip to content

Commit

Permalink
Move shared logic for locating input files.
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed Oct 31, 2023
1 parent 3c29268 commit c02c582
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 20 deletions.
13 changes: 3 additions & 10 deletions src/hipscat_import/catalog/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from typing import List

from hipscat.catalog.catalog import CatalogInfo
from hipscat.io import FilePointer, file_io
from hipscat.io import FilePointer
from hipscat.pixel_math import hipscat_id

from hipscat_import.catalog.file_readers import InputReader, get_file_reader
from hipscat_import.catalog.resume_plan import ResumePlan
from hipscat_import.runtime_arguments import RuntimeArguments
from hipscat_import.runtime_arguments import RuntimeArguments, find_input_paths

# pylint: disable=too-many-locals,too-many-arguments,too-many-instance-attributes,too-many-branches,too-few-public-methods

Expand Down Expand Up @@ -102,14 +102,7 @@ def _check_arguments(self):
self.file_reader = get_file_reader(self.input_format)

# Basic checks complete - make more checks and create directories where necessary
if self.input_path:
if not file_io.does_file_or_directory_exist(self.input_path):
raise FileNotFoundError("input_path not found on local storage")
self.input_paths = file_io.find_files_matching_path(self.input_path, f"*{self.input_format}")
elif self.input_file_list:
self.input_paths = self.input_file_list
if len(self.input_paths) == 0:
raise FileNotFoundError("No input files found")
self.input_paths = find_input_paths(self.input_path, f"*{self.input_format}", self.input_file_list)
self.resume_plan = ResumePlan(
resume=self.resume,
progress_bar=self.progress_bar,
Expand Down
13 changes: 3 additions & 10 deletions src/hipscat_import/cross_match/macauff_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from os import path
from typing import List

from hipscat.io import FilePointer, file_io
from hipscat.io import FilePointer
from hipscat.io.validation import is_valid_catalog

from hipscat_import.runtime_arguments import RuntimeArguments
from hipscat_import.runtime_arguments import RuntimeArguments, find_input_paths

# pylint: disable=too-many-instance-attributes
# pylint: disable=unsupported-binary-operation
Expand Down Expand Up @@ -87,14 +87,7 @@ def _check_arguments(self):
raise ValueError("Macauff column metadata file must point to valid file path.")

# Basic checks complete - make more checks and create directories where necessary
if self.input_path:
if not file_io.does_file_or_directory_exist(self.input_path):
raise FileNotFoundError("input_path not found on local storage")
self.input_paths = file_io.find_files_matching_path(self.input_path, f"*{self.input_format}")
elif self.input_file_list:
self.input_paths = self.input_file_list
if len(self.input_paths) == 0:
raise FileNotFoundError("No input files found")
self.input_paths = find_input_paths(self.input_path, f"*{self.input_format}", self.input_file_list)

self.column_names = self.get_column_names()

Expand Down
24 changes: 24 additions & 0 deletions src/hipscat_import/runtime_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,27 @@ def provenance_info(self) -> dict:
def additional_runtime_provenance_info(self):
"""Any additional runtime args to be included in provenance info from subclasses"""
return {}


def find_input_paths(input_path="", file_matcher="", input_file_list=None):
"""Helper method to find input paths, given either a prefix and format, or an
explicit list of paths.
Args:
input_path (str): prefix to search for
file_matcher (str): matcher to use when searching for files
input_file_list (List[str]): list of input paths
Returns:
matching files, if input_path is provided, otherwise, input_file_list
Raises:
FileNotFoundError if no files are found at the input_path and the provided list is empty.
"""
if input_path:
if not file_io.does_file_or_directory_exist(input_path):
raise FileNotFoundError("input_path not found on local storage")
input_paths = file_io.find_files_matching_path(input_path, file_matcher)
elif input_file_list:
input_paths = input_file_list
if len(input_paths) == 0:
raise FileNotFoundError("No input files found")
return input_paths

0 comments on commit c02c582

Please sign in to comment.