From d85f6e4e5186311f70883b2a3b5cdb080e4a2368 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 25 Jul 2024 11:38:21 -0400 Subject: [PATCH 01/33] add new command flepimop-pull --- flepimop/gempyor_pkg/setup.cfg | 1 + .../gempyor_pkg/src/gempyor/resume_pull.py | 154 ++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 flepimop/gempyor_pkg/src/gempyor/resume_pull.py diff --git a/flepimop/gempyor_pkg/setup.cfg b/flepimop/gempyor_pkg/setup.cfg index e5fb0902a..ee21301c7 100644 --- a/flepimop/gempyor_pkg/setup.cfg +++ b/flepimop/gempyor_pkg/setup.cfg @@ -53,6 +53,7 @@ console_scripts = gempyor-seir = gempyor.simulate_seir:simulate gempyor-simulate = gempyor.simulate:simulate flepimop-calibrate = gempyor.calibrate:calibrate + flepimop-pull = gempyor.resume_pull:fetching_resume_files [options.packages.find] where = src diff --git a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py new file mode 100644 index 000000000..2676f5023 --- /dev/null +++ b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +""" +Script for fetching resume files based on various input parameters. + +Overview: +This script is designed to fetch resume files based on various input parameters. +It uses Click for command-line interface (CLI) options and handles the fetching process either by downloading from an S3 bucket or moving files locally. + +Dependencies: +- click: A package for creating command-line interfaces. +- os: A module for interacting with the operating system. +- gempyor.utils: A module containing utility functions create_resume_file_names_map, download_file_from_s3, and move_file_at_local. + +CLI Options: +The script uses Click to define the following command-line options: + +--resume_location: +- Environment Variables: LAST_JOB_OUTPUT, RESUME_LOCATION +- Type: STRING +- Required: Yes +- Description: The path for the last run's output. + +--discard_seeding: +- Environment Variable: RESUME_DISCARD_SEEDING +- Type: BOOL +- Required: Yes +- Description: Boolean value indicating whether to discard seeding or not. +- valid values: true, 1, y, yes, True, False, false, f, 0, no, n + +--block_index: +- Environment Variable: FLEPI_BLOCK_INDEX +- Type: STRING +- Required: Yes +- Description: The block index for the FLEPI. + +--resume_run_index: +- Environment Variable: RESUME_RUN_INDEX +- Type: STRING +- Required: Yes +- Description: The run index for resuming. + +--flepi_run_index: +- Environment Variable: FLEPI_RUN_INDEX +- Type: STRING +- Required: Yes +- Description: The run index for the FLEPI. + +--flepi_prefix: +- Environment Variable: FLEPI_PREFIX +- Type: STRING +- Required: Yes +- Description: The prefix for the FLEPI. + +Function: fetching_resume_files + +Parameters: +- resume_location (str): Path to the last run's output. +- discard_seeding (bool): Whether to discard seeding. +- flepi_block_index (str): Block index for FLEPI. +- resume_run_index (str): Run index for resuming. +- flepi_run_index (str): Run index for FLEPI. +- flepi_prefix (str): Prefix for FLEPI. + +Description: +The function fetching_resume_files fetches resume files based on the provided parameters. It checks if the resume_location is an S3 path and decides to download from S3 or move files locally accordingly. + +Workflow: +1. Retrieves the environment variable SLURM_ARRAY_TASK_ID for the slot index. +2. Converts the discard_seeding boolean to a string "true" if it is True. +3. Creates a resume file name map using the create_resume_file_names_map function. +4. Checks if resume_location starts with "s3://": + - If yes, downloads the file from S3 using download_file_from_s3. + - If no, moves the file locally using move_file_at_local. + +Example Usage: +To use this script, you can run it from the command line with the required options: +```sh +python script_name.py --resume_location "path/to/resume" --discard_seeding True --block_index "block123" --resume_run_index "run456" --flepi_run_index "run789" --flepi_prefix "prefix" +""" +import click +import os +from gempyor.utils import create_resume_file_names_map, download_file_from_s3, move_file_at_local + +@click.command() +@click.option( + "--resume_location", + "resume_location", + envvar=["LAST_JOB_OUTPUT", "RESUME_LOCATION"], + type=click.STRING, + required=True, + help="the path for the last run's output", +) +@click.option( + "--discard_seeding", + "discard_seeding", + envvar="RESUME_DISCARD_SEEDING", + type=click.BOOL, + required=True, + help="required bool value for discarding seeding or not" +) +@click.option( + "--block_index", + "flepi_block_index", + envvar="FLEPI_BLOCK_INDEX", + type=click.STRING, + required=True +) +@click.option( + "--resume_run_index", + "resume_run_index", + envvar="RESUME_RUN_INDEX", + type=click.STRING, + required=True, +) +@click.option( + "--flepi_run_index", + "flepi_run_index", + envvar="FLEPI_RUN_INDEX", + type=click.STRING, + required=True +) +@click.option( + "--flepi_prefix", + "flepi_prefix", + envvar="FLEPI_PREFIX", + type=click.STRING, + required=True +) +def fetching_resume_files(resume_location, + discard_seeding, + flepi_block_index, + resume_run_index, + flepi_run_index, + flepi_prefix): + flep_slot_index = os.environ["SLURM_ARRAY_TASK_ID"] + if discard_seeding is True: + discard_seeding = "true" + + resume_file_name_map = create_resume_file_names_map(resume_discard_seeding=discard_seeding, + flepi_block_index=flepi_block_index, + resume_run_index=resume_run_index, + flepi_prefix=flepi_prefix, + flepi_slot_index=flep_slot_index, + flepi_run_index=flepi_run_index, + last_job_output=resume_location) + print(resume_file_name_map) + if resume_location.startswith("s3://"): + download_file_from_s3(resume_file_name_map) + else: + move_file_at_local(resume_file_name_map) + + +if __name__ == "__main__": + fetching_resume_files() \ No newline at end of file From edfbb6807538594eb5e07ea3fe0c7cf3a7ceebd2 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 25 Jul 2024 11:38:33 -0400 Subject: [PATCH 02/33] bug fix --- flepimop/gempyor_pkg/src/gempyor/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 6131b5c52..8f6b8a661 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -579,7 +579,7 @@ def create_resume_file_names_map( liketype=liketype, ) input_file_name = output_file_name - if os.environ.get("FLEPI_BLOCK_INDEX") == "1": + if flepi_block_index == "1": input_file_name = create_resume_input_filename( resume_run_index=resume_run_index, flepi_prefix=flepi_prefix, From 1cb9ef1296e995e6f2d72ab1d590535f9a1649ef Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 25 Jul 2024 11:43:37 -0400 Subject: [PATCH 03/33] change format --- .../gempyor_pkg/src/gempyor/resume_pull.py | 67 ++++++------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py index 2676f5023..c499a7e31 100644 --- a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py +++ b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py @@ -81,6 +81,7 @@ import os from gempyor.utils import create_resume_file_names_map, download_file_from_s3, move_file_at_local + @click.command() @click.option( "--resume_location", @@ -96,59 +97,35 @@ envvar="RESUME_DISCARD_SEEDING", type=click.BOOL, required=True, - help="required bool value for discarding seeding or not" -) -@click.option( - "--block_index", - "flepi_block_index", - envvar="FLEPI_BLOCK_INDEX", - type=click.STRING, - required=True + help="required bool value for discarding seeding or not", ) +@click.option("--block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.STRING, required=True) @click.option( - "--resume_run_index", - "resume_run_index", - envvar="RESUME_RUN_INDEX", - type=click.STRING, - required=True, + "--resume_run_index", "resume_run_index", envvar="RESUME_RUN_INDEX", type=click.STRING, required=True, ) -@click.option( - "--flepi_run_index", - "flepi_run_index", - envvar="FLEPI_RUN_INDEX", - type=click.STRING, - required=True -) -@click.option( - "--flepi_prefix", - "flepi_prefix", - envvar="FLEPI_PREFIX", - type=click.STRING, - required=True -) -def fetching_resume_files(resume_location, - discard_seeding, - flepi_block_index, - resume_run_index, - flepi_run_index, - flepi_prefix): +@click.option("--flepi_run_index", "flepi_run_index", envvar="FLEPI_RUN_INDEX", type=click.STRING, required=True) +@click.option("--flepi_prefix", "flepi_prefix", envvar="FLEPI_PREFIX", type=click.STRING, required=True) +def fetching_resume_files( + resume_location, discard_seeding, flepi_block_index, resume_run_index, flepi_run_index, flepi_prefix +): flep_slot_index = os.environ["SLURM_ARRAY_TASK_ID"] if discard_seeding is True: discard_seeding = "true" - - resume_file_name_map = create_resume_file_names_map(resume_discard_seeding=discard_seeding, - flepi_block_index=flepi_block_index, - resume_run_index=resume_run_index, - flepi_prefix=flepi_prefix, - flepi_slot_index=flep_slot_index, - flepi_run_index=flepi_run_index, - last_job_output=resume_location) - print(resume_file_name_map) + + resume_file_name_map = create_resume_file_names_map( + resume_discard_seeding=discard_seeding, + flepi_block_index=flepi_block_index, + resume_run_index=resume_run_index, + flepi_prefix=flepi_prefix, + flepi_slot_index=flep_slot_index, + flepi_run_index=flepi_run_index, + last_job_output=resume_location, + ) if resume_location.startswith("s3://"): download_file_from_s3(resume_file_name_map) else: move_file_at_local(resume_file_name_map) - - + + if __name__ == "__main__": - fetching_resume_files() \ No newline at end of file + fetching_resume_files() From d9d59a209ec464fbde0e5e25886bfc9e5da11563 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 25 Jul 2024 14:47:06 -0400 Subject: [PATCH 04/33] add the test --- .../tests/utils/test_flepimop_pull.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py diff --git a/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py b/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py new file mode 100644 index 000000000..50d2d6049 --- /dev/null +++ b/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py @@ -0,0 +1,68 @@ +import os +import pytest +from click.testing import CliRunner +from unittest.mock import patch +from gempyor.resume_pull import fetching_resume_files + + +@pytest.fixture +def runner(): + return CliRunner() + + +class TestFetchingResumeFiles: + @pytest.fixture(autouse=True) + def set_env(self): + with patch.dict(os.environ, {"SLURM_ARRAY_TASK_ID": "1"}): + yield + + def test_s3_resume_location(self, runner): + with patch( + "gempyor.resume_pull.create_resume_file_names_map", return_value="dummy_map" + ) as mock_create_map, patch("gempyor.resume_pull.download_file_from_s3") as mock_download: + result = runner.invoke( + fetching_resume_files, + [ + "--resume_location", + "s3://some/location", + "--discard_seeding", + "true", + "--block_index", + 1, + "--resume_run_index", + "1", + "--flepi_run_index", + "1", + "--flepi_prefix", + "prefix123", + ], + ) + assert result.exit_code == 0 + mock_create_map.assert_called_once() + mock_download.assert_called_once() + + def test_local_resume_location(self, runner): + with patch("gempyor.resume_pull.move_file_at_local") as mock_move: + result = runner.invoke( + fetching_resume_files, + [ + "--resume_location", + "local/path", + "--discard_seeding", + "true", + "--block_index", + 1, + "--resume_run_index", + "run123", + "--flepi_run_index", + "run123", + "--flepi_prefix", + "prefix123", + ], + ) + assert result.exit_code == 0 + mock_move.assert_called_once() + + +if __name__ == "__main__": + pytest.main() From c18ecde2947fad37611334eadd258db78d2829a8 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 25 Jul 2024 14:47:24 -0400 Subject: [PATCH 05/33] change argument type --- flepimop/gempyor_pkg/src/gempyor/resume_pull.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py index c499a7e31..1d6fb527b 100644 --- a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py +++ b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py @@ -99,7 +99,7 @@ required=True, help="required bool value for discarding seeding or not", ) -@click.option("--block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.STRING, required=True) +@click.option("--block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.INT, required=True) @click.option( "--resume_run_index", "resume_run_index", envvar="RESUME_RUN_INDEX", type=click.STRING, required=True, ) @@ -114,7 +114,7 @@ def fetching_resume_files( resume_file_name_map = create_resume_file_names_map( resume_discard_seeding=discard_seeding, - flepi_block_index=flepi_block_index, + flepi_block_index=str(flepi_block_index), resume_run_index=resume_run_index, flepi_prefix=flepi_prefix, flepi_slot_index=flep_slot_index, From ea5902696730af7f6c096155a3156341cc91bb99 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 29 Jul 2024 11:15:41 -0400 Subject: [PATCH 06/33] add file check --- .../gempyor_pkg/src/gempyor/resume_pull.py | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py index 1d6fb527b..a5c70e8de 100644 --- a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py +++ b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py @@ -71,6 +71,8 @@ 4. Checks if resume_location starts with "s3://": - If yes, downloads the file from S3 using download_file_from_s3. - If no, moves the file locally using move_file_at_local. +5. After pulling the input files, it will do a check. If input src_file does not exist, it will output these files. + If the input files exists, it is not pulled or copied to destination. It will raise FileExistsErrors. Example Usage: To use this script, you can run it from the command line with the required options: @@ -79,6 +81,9 @@ """ import click import os +import boto3 +import botocore +from typing import Dict from gempyor.utils import create_resume_file_names_map, download_file_from_s3, move_file_at_local @@ -123,8 +128,107 @@ def fetching_resume_files( ) if resume_location.startswith("s3://"): download_file_from_s3(resume_file_name_map) + pull_check_for_s3(resume_file_name_map) else: move_file_at_local(resume_file_name_map) + pull_check(resume_file_name_map) + + +# Todo: Unit test +def pull_check_for_s3(file_name_map: Dict[str, str]) -> None: + """ + Verifies the existence of specified files in an S3 bucket and checks if corresponding local files are present. + If a file in the S3 bucket does not exist or the local file is missing, it raises appropriate errors or prints a message. + + Parameters: + file_name_map (Dict[str, str]): A dictionary where the keys are S3 URIs (Uniform Resource Identifiers) and the values are the corresponding local file paths. + + Dependencies: + - boto3: The AWS SDK for Python, used to interact with AWS services such as S3. + - botocore: The low-level core functionality of boto3. + - os: The standard library module for interacting with the operating system, used here to check for file existence. + + Functionality: + 1. Initialize S3 Client: The function initializes an S3 client using `boto3.client('s3')`. + 2. Iterate through S3 URIs: For each S3 URI in the `file_name_map` dictionary: + - Parse the Bucket and Object Key: Extracts the bucket name and object key from the S3 URI. + - Check if Object Exists in S3: Uses the `head_object` method to check if the object exists in the specified S3 bucket. + - Check Local File Existence: If the object exists in S3, it checks if the corresponding local file exists using `os.path.exists`. + - Handle Errors: + - If the object does not exist in S3, it catches the `ClientError` and prints a message indicating the missing S3 object. + - If the local file does not exist, it raises a `FileExistsError` indicating the local file is missing. + + Example Usage: + file_name_map = { + "s3://my-bucket/path/to/file1.txt": "/local/path/to/file1.txt", + "s3://my-bucket/path/to/file2.txt": "/local/path/to/file2.txt" + } + + pull_check_for_s3(file_name_map) + + Exceptions: + - FileExistsError: Raised if the corresponding local file for an existing S3 object is missing. + - botocore.exceptions.ClientError: Caught and handled to print a message if the S3 object does not exist. Other client errors are re-raised. + + Notes: + - Ensure that AWS credentials are configured properly for boto3 to access the S3 bucket. + - This function assumes that the S3 URIs provided are in the format `s3://bucket-name/path/to/object`. + """ + s3 = boto3.client("s3") + for s3_uri in file_name_map: + bucket = s3_uri.split("/")[2] + object = s3_uri[len(bucket) + 6 :] + try: + s3.head_object(Bucket=bucket, Key=object) + if os.path.exists(file_name_map[s3_uri]) is False: + raise FileExistsError(f"For {s3_uri}, it is not copied to {file_name_map[s3_uri]}.") + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + print(f"Input {s3_uri} does not exist.") + else: + raise + + +# Todo: Unit Test +def pull_check(file_name_map: Dict[str, str]) -> None: + """ + Verifies the existence of specified source files and checks if corresponding destination files are present. + If a source file does not exist or the destination file is missing, it raises appropriate errors or prints a message. + + Parameters: + file_name_map (Dict[str, str]): A dictionary where the keys are source file paths and the values are the corresponding destination file paths. + + Dependencies: + - os: The standard library module for interacting with the operating system, used here to check for file existence. + + Functionality: + 1. Iterate through Source Files: For each source file path in the `file_name_map` dictionary: + - Check if Source File Exists: Uses `os.path.exists` to check if the source file exists. + - Check Destination File Existence: If the source file exists, it checks if the corresponding destination file exists using `os.path.exists`. + - Handle Errors: + - If the source file does not exist, it prints a message indicating the missing source file. + - If the destination file does not exist, it raises a `FileExistsError` indicating the destination file is missing. + + Example Usage: + file_name_map = { + "/path/to/source1.txt": "/path/to/destination1.txt", + "/path/to/source2.txt": "/path/to/destination2.txt" + } + + pull_check(file_name_map) + + Exceptions: + - FileExistsError: Raised if the corresponding destination file for an existing source file is missing. + + Notes: + - Ensure that the paths provided are valid and accessible on the file system. + """ + for src_file in file_name_map: + if os.path.exists(src_file): + if os.path.exists(file_name_map[src_file]) is False: + raise FileExistsError(f"For {src_file}, it is not copied to {file_name_map[src_file]}.") + else: + print(f"Input {src_file} does not exist.") if __name__ == "__main__": From aafc88b9626e670e7ab1f8895256b96719001926 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 5 Aug 2024 09:01:04 -0400 Subject: [PATCH 07/33] add new file for push command --- flepimop/gempyor_pkg/src/gempyor/fepimop_push.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 flepimop/gempyor_pkg/src/gempyor/fepimop_push.py diff --git a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py new file mode 100644 index 000000000..6399cd593 --- /dev/null +++ b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py @@ -0,0 +1,14 @@ +import click + + +@click.command() +@click.option('--aws', is_flag=True, help='push files to aws') +@click.option('--local', is_flag=True,) +@click.option('--flepi_run_index', 'flepi_run_index', envvar='FLEPI_RUN_INDEX', type=click.STRING, required=True) +@click.option('--flepi_prefix', 'flepi_prefix', envvar='FLEPI_PREFIX', type=click.STRING, required=True) +def flepimop_push(flepi_run_index, + ): + if aws: + push_to_aws(input_files) + else: + move_local(input_files) \ No newline at end of file From 62c0979b0fc75a722898f61cdf59dc8753f92b3e Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 8 Aug 2024 14:18:47 -0400 Subject: [PATCH 08/33] add function creating file names for pushing --- .../gempyor_pkg/src/gempyor/file_paths.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/flepimop/gempyor_pkg/src/gempyor/file_paths.py b/flepimop/gempyor_pkg/src/gempyor/file_paths.py index 627e8188a..3775ae80e 100644 --- a/flepimop/gempyor_pkg/src/gempyor/file_paths.py +++ b/flepimop/gempyor_pkg/src/gempyor/file_paths.py @@ -13,6 +13,7 @@ from datetime import datetime import os from pathlib import Path +from typing import List def create_file_name( @@ -207,3 +208,53 @@ def create_dir_name( create_directory=False, ) ) + +def create_file_name_for_push(flepi_run_index: str, + prefix: str, + flepi_slot_index: str, + flepi_block_index: str) -> List[str]: + """ + Generate a list of file names for different types of inference results. + + This function generates a list of file names based on the provided run index, prefix, slot index, + and block index. Each file name corresponds to a different type of inference result, such as + "seir", "hosp", "llik", etc. The file names are generated using the `create_file_name` function, + with specific extensions based on the type: "csv" for "seed" and "parquet" for all other types. + + Parameters: + ---------- + flepi_run_index : str + The index of the run. This is used to uniquely identify the run. + + prefix : str + A prefix string to be included in the file names. This is typically used to categorize or + identify the files. + + flepi_slot_index : str + The slot index used in the filename. This is formatted as a zero-padded nine-digit number. + + flepi_block_index : str + The block index used in the filename. This typically indicates a specific block or segment + of the data being processed. + + Returns: + ------- + List[str] + A list of generated file names, each corresponding to a different type of inference result. + The file names include the provided prefix, run index, slot index, block index, type, and + the appropriate file extension (either "csv" or "parquet"). + """ + type_list = ["seir", "hosp", "llik", "spar", "snpi", "hnpi", "hpar", "init", "seed"] + name_list = [] + for type_name in type_list: + extension = "csv" if type_name == "seed" else "parquet" + file_name = create_file_name(run_id=flepi_run_index, + prefix=prefix, + inference_filename_prefix="{:09d}.".format(int(flepi_slot_index)), + inference_filename_suffix='chimeric/intermediate', + index=flepi_block_index, + type=type_name, + extension=extension) + name_list.append(file_name) + return name_list + \ No newline at end of file From 5407c71befa3065b0bf9cd8577a2e995851222f1 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 8 Aug 2024 15:06:50 -0400 Subject: [PATCH 09/33] add body for flepimop-push --- .../gempyor_pkg/src/gempyor/fepimop_push.py | 48 ++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py index 6399cd593..f68e17a9f 100644 --- a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py @@ -1,14 +1,50 @@ +import os import click +import shutil +from file_paths import create_file_name_for_push @click.command() -@click.option('--aws', is_flag=True, help='push files to aws') -@click.option('--local', is_flag=True,) +@click.option('--aws', is_flag=True, help='push files to aws', required=True) @click.option('--flepi_run_index', 'flepi_run_index', envvar='FLEPI_RUN_INDEX', type=click.STRING, required=True) @click.option('--flepi_prefix', 'flepi_prefix', envvar='FLEPI_PREFIX', type=click.STRING, required=True) -def flepimop_push(flepi_run_index, - ): +@click.option('--flepi_block_index', 'flepi_block_index', envvar='FLEPI_BLOCK_INDEX', type=click.STRING, required=True) +@click.option('--flepi_slot_index', 'flepi_slot_index', envvar='FLEPI_SLOT_INDEX', type=click.STRING, required=True) +@click.option('--s3_results_path', 's3_results_path', envvar='S3_RESULTS_PATH', type=click.STRING, required=False) +@click.option('--fs_results_path', 'fs_results_path', envvar="FS_RESULTS_PATH", type=click.STRING, required=False) +def flepimop_push(aws: bool, + flepi_run_index: str, + flepi_prefix: str, + flepi_slot_index: str, + flepi_block_index:str, + s3_results_path:str = "", + fs_results_path:str = "") -> None: + file_name_list = create_file_name_for_push(flepi_run_index=flepi_run_index, + prefix=flepi_prefix, + flepi_slot_index=flepi_slot_index, + flepi_block_index=flepi_block_index) + exist_files = [f for f in file_name_list if os.path.exists(f) is True] if aws: - push_to_aws(input_files) + try: + import boto3 + from botocore.exceptions import ClientError + except ModuleNotFoundError: + raise ModuleNotFoundError(( + "No module named 'boto3', which is required for " + "gempyor.utils.download_file_from_s3. Please install the aws target." + )) + if s3_results_path == "": + raise ValueError("argument aws is setted to True, you must use --s3_results_path too or environment variable S3_RESULTS_PATH.") + s3 = boto3.client("s3") + for file in exist_files: + s3_path = os.path.join(s3_results_path, file) + bucket = s3_path.split("/")[2] + object_name = s3_path[len(bucket) + 6: ] + s3.upload_file(file, bucket, object_name) else: - move_local(input_files) \ No newline at end of file + if fs_results_path == "": + raise ValueError("argument aws is setted to False, you must use --fs_results_path or environment Variable FS_RESULTS_PATH.") + for file in exist_files: + dst = os.path.join(fs_results_path, file) + os.path.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.copy(file, dst) \ No newline at end of file From e8c1c42bd31d53022137923bc3f3c522a436a74e Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 8 Aug 2024 15:10:23 -0400 Subject: [PATCH 10/33] add command flepimop-push --- flepimop/gempyor_pkg/setup.cfg | 1 + flepimop/gempyor_pkg/src/gempyor/fepimop_push.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/setup.cfg b/flepimop/gempyor_pkg/setup.cfg index ee21301c7..abb72a716 100644 --- a/flepimop/gempyor_pkg/setup.cfg +++ b/flepimop/gempyor_pkg/setup.cfg @@ -54,6 +54,7 @@ console_scripts = gempyor-simulate = gempyor.simulate:simulate flepimop-calibrate = gempyor.calibrate:calibrate flepimop-pull = gempyor.resume_pull:fetching_resume_files + flepimop-push = gempyor.flepimop_push:flepimop_push [options.packages.find] where = src diff --git a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py index f68e17a9f..cb78f3348 100644 --- a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py @@ -47,4 +47,8 @@ def flepimop_push(aws: bool, for file in exist_files: dst = os.path.join(fs_results_path, file) os.path.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy(file, dst) \ No newline at end of file + shutil.copy(file, dst) + + +if __name__ =="__main__": + flepimop_push() \ No newline at end of file From cde74d45e7700963982b9ee086fa636a321c09b9 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 8 Aug 2024 15:14:47 -0400 Subject: [PATCH 11/33] change error message --- flepimop/gempyor_pkg/src/gempyor/fepimop_push.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py index cb78f3348..1a669788f 100644 --- a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py @@ -31,10 +31,10 @@ def flepimop_push(aws: bool, except ModuleNotFoundError: raise ModuleNotFoundError(( "No module named 'boto3', which is required for " - "gempyor.utils.download_file_from_s3. Please install the aws target." + "gempyor.flepimop_push.flepimop_push. Please install the aws target." )) if s3_results_path == "": - raise ValueError("argument aws is setted to True, you must use --s3_results_path too or environment variable S3_RESULTS_PATH.") + raise ValueError("argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH.") s3 = boto3.client("s3") for file in exist_files: s3_path = os.path.join(s3_results_path, file) From f1a57fbe4e1d9d7fe44b375c6f4bbe9d64d167f8 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 12 Aug 2024 14:14:28 -0400 Subject: [PATCH 12/33] fix wrong parameter --- flepimop/gempyor_pkg/src/gempyor/file_paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/file_paths.py b/flepimop/gempyor_pkg/src/gempyor/file_paths.py index 3775ae80e..105bb4127 100644 --- a/flepimop/gempyor_pkg/src/gempyor/file_paths.py +++ b/flepimop/gempyor_pkg/src/gempyor/file_paths.py @@ -251,9 +251,9 @@ def create_file_name_for_push(flepi_run_index: str, file_name = create_file_name(run_id=flepi_run_index, prefix=prefix, inference_filename_prefix="{:09d}.".format(int(flepi_slot_index)), - inference_filename_suffix='chimeric/intermediate', + inference_filepath_suffix='chimeric/intermediate', index=flepi_block_index, - type=type_name, + ftype=type_name, extension=extension) name_list.append(file_name) return name_list From 8c6b65f8e41da31867da3f1b894150df5b347407 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 12 Aug 2024 14:14:34 -0400 Subject: [PATCH 13/33] rename file --- .../gempyor_pkg/src/gempyor/flepimop_push.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 flepimop/gempyor_pkg/src/gempyor/flepimop_push.py diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py new file mode 100644 index 000000000..1d1443e0d --- /dev/null +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -0,0 +1,96 @@ +import os +import click +import shutil +from gempyor.file_paths import create_file_name_for_push + + +@click.command() +@click.option('--aws', is_flag=True, help='push files to aws', required=True) +@click.option('--flepi_run_index', 'flepi_run_index', envvar='FLEPI_RUN_INDEX', type=click.STRING, required=True) +@click.option('--flepi_prefix', 'flepi_prefix', envvar='FLEPI_PREFIX', type=click.STRING, required=True) +@click.option('--flepi_block_index', 'flepi_block_index', envvar='FLEPI_BLOCK_INDEX', type=click.STRING, required=True) +@click.option('--flepi_slot_index', 'flepi_slot_index', envvar='FLEPI_SLOT_INDEX', type=click.STRING, required=True) +@click.option('--s3_results_path', 's3_results_path', envvar='S3_RESULTS_PATH', type=click.STRING, required=False) +@click.option('--fs_results_path', 'fs_results_path', envvar="FS_RESULTS_PATH", type=click.STRING, required=False) +def flepimop_push(aws: bool, + flepi_run_index: str, + flepi_prefix: str, + flepi_slot_index: str, + flepi_block_index:str, + s3_results_path:str = "", + fs_results_path:str = "") -> None: + """ + Push files to either AWS S3 or the local filesystem. + + This function generates a list of file names based on the provided parameters and checks which files + exist locally. It then uploads these files to either AWS S3 or the local filesystem based on the + specified options. + + Parameters: + ---------- + aws : bool + Flag indicating whether to push files to AWS S3. If set to True, files will be uploaded to S3. + + flepi_run_index : str + The index of the FLEPI run. This is used to uniquely identify the run. + + flepi_prefix : str + A prefix string to be included in the file names. This is typically used to categorize or + identify the files. + + flepi_slot_index : str + The slot index used in the filename. This is formatted as a zero-padded nine-digit number. + + flepi_block_index : str + The block index used in the filename. This typically indicates a specific block or segment + of the data being processed. + + s3_results_path : str, optional + The S3 path where the results should be uploaded. This parameter is required if `aws` is set to True. + + fs_results_path : str, optional + The local filesystem path where the results should be copied. This parameter is required if `aws` is set to False. + + Raises: + ------ + ValueError + If `aws` is set to True and `s3_results_path` is not provided. + If `aws` is set to False and `fs_results_path` is not provided. + + ModuleNotFoundError + If `boto3` is not installed when `aws` is set to True. + """ + file_name_list = create_file_name_for_push(flepi_run_index=flepi_run_index, + prefix=flepi_prefix, + flepi_slot_index=flepi_slot_index, + flepi_block_index=flepi_block_index) + exist_files = [f for f in file_name_list if os.path.exists(f) is True] + if aws: + try: + import boto3 + from botocore.exceptions import ClientError + except ModuleNotFoundError: + raise ModuleNotFoundError(( + "No module named 'boto3', which is required for " + "gempyor.flepimop_push.flepimop_push. Please install the aws target." + )) + if s3_results_path == "": + raise ValueError("argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH.") + s3 = boto3.client("s3") + for file in exist_files: + s3_path = os.path.join(s3_results_path, file) + bucket = s3_path.split("/")[2] + object_name = s3_path[len(bucket) + 6: ] + s3.upload_file(file, bucket, object_name) + else: + if fs_results_path == "": + raise ValueError("argument aws is setted to False, you must use --fs_results_path or environment Variable FS_RESULTS_PATH.") + for file in exist_files: + dst = os.path.join(fs_results_path, file) + os.path.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.copy(file, dst) + print("flepimop-push successfully push all existing files.") + + +if __name__ =="__main__": + flepimop_push() \ No newline at end of file From b0d889528f3dae51eac599bf0647f20839b54974 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 12 Aug 2024 14:15:06 -0400 Subject: [PATCH 14/33] wrong file name --- .../gempyor_pkg/src/gempyor/fepimop_push.py | 54 ------------------- 1 file changed, 54 deletions(-) delete mode 100644 flepimop/gempyor_pkg/src/gempyor/fepimop_push.py diff --git a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py deleted file mode 100644 index 1a669788f..000000000 --- a/flepimop/gempyor_pkg/src/gempyor/fepimop_push.py +++ /dev/null @@ -1,54 +0,0 @@ -import os -import click -import shutil -from file_paths import create_file_name_for_push - - -@click.command() -@click.option('--aws', is_flag=True, help='push files to aws', required=True) -@click.option('--flepi_run_index', 'flepi_run_index', envvar='FLEPI_RUN_INDEX', type=click.STRING, required=True) -@click.option('--flepi_prefix', 'flepi_prefix', envvar='FLEPI_PREFIX', type=click.STRING, required=True) -@click.option('--flepi_block_index', 'flepi_block_index', envvar='FLEPI_BLOCK_INDEX', type=click.STRING, required=True) -@click.option('--flepi_slot_index', 'flepi_slot_index', envvar='FLEPI_SLOT_INDEX', type=click.STRING, required=True) -@click.option('--s3_results_path', 's3_results_path', envvar='S3_RESULTS_PATH', type=click.STRING, required=False) -@click.option('--fs_results_path', 'fs_results_path', envvar="FS_RESULTS_PATH", type=click.STRING, required=False) -def flepimop_push(aws: bool, - flepi_run_index: str, - flepi_prefix: str, - flepi_slot_index: str, - flepi_block_index:str, - s3_results_path:str = "", - fs_results_path:str = "") -> None: - file_name_list = create_file_name_for_push(flepi_run_index=flepi_run_index, - prefix=flepi_prefix, - flepi_slot_index=flepi_slot_index, - flepi_block_index=flepi_block_index) - exist_files = [f for f in file_name_list if os.path.exists(f) is True] - if aws: - try: - import boto3 - from botocore.exceptions import ClientError - except ModuleNotFoundError: - raise ModuleNotFoundError(( - "No module named 'boto3', which is required for " - "gempyor.flepimop_push.flepimop_push. Please install the aws target." - )) - if s3_results_path == "": - raise ValueError("argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH.") - s3 = boto3.client("s3") - for file in exist_files: - s3_path = os.path.join(s3_results_path, file) - bucket = s3_path.split("/")[2] - object_name = s3_path[len(bucket) + 6: ] - s3.upload_file(file, bucket, object_name) - else: - if fs_results_path == "": - raise ValueError("argument aws is setted to False, you must use --fs_results_path or environment Variable FS_RESULTS_PATH.") - for file in exist_files: - dst = os.path.join(fs_results_path, file) - os.path.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy(file, dst) - - -if __name__ =="__main__": - flepimop_push() \ No newline at end of file From fc8b4fa6216ed6cb1bb021b917119690f2a7a6f0 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 12 Aug 2024 15:40:03 -0400 Subject: [PATCH 15/33] update doc and fix format --- .../gempyor_pkg/src/gempyor/flepimop_push.py | 141 +++++++++++------- 1 file changed, 91 insertions(+), 50 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index 1d1443e0d..adfb2295e 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -5,92 +5,133 @@ @click.command() -@click.option('--aws', is_flag=True, help='push files to aws', required=True) -@click.option('--flepi_run_index', 'flepi_run_index', envvar='FLEPI_RUN_INDEX', type=click.STRING, required=True) -@click.option('--flepi_prefix', 'flepi_prefix', envvar='FLEPI_PREFIX', type=click.STRING, required=True) -@click.option('--flepi_block_index', 'flepi_block_index', envvar='FLEPI_BLOCK_INDEX', type=click.STRING, required=True) -@click.option('--flepi_slot_index', 'flepi_slot_index', envvar='FLEPI_SLOT_INDEX', type=click.STRING, required=True) -@click.option('--s3_results_path', 's3_results_path', envvar='S3_RESULTS_PATH', type=click.STRING, required=False) -@click.option('--fs_results_path', 'fs_results_path', envvar="FS_RESULTS_PATH", type=click.STRING, required=False) -def flepimop_push(aws: bool, - flepi_run_index: str, - flepi_prefix: str, - flepi_slot_index: str, - flepi_block_index:str, - s3_results_path:str = "", - fs_results_path:str = "") -> None: +@click.option("--s3_upload", "s3_upload", envvar="S3_UPLOAD", help="push files to aws", required=True) +@click.option("--data-path", "data_path", envvar="PROJECT_PATH", type=click.Path(exists=True), required=True) +@click.option("--flepi_run_index", "flepi_run_index", envvar="FLEPI_RUN_INDEX", type=click.STRING, required=True) +@click.option("--flepi_prefix", "flepi_prefix", envvar="FLEPI_PREFIX", type=click.STRING, required=True) +@click.option("--flepi_block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.STRING, required=True) +@click.option("--flepi_slot_index", "flepi_slot_index", envvar="FLEPI_SLOT_INDEX", type=click.STRING, required=True) +@click.option( + "--s3_results_path", "s3_results_path", envvar="S3_RESULTS_PATH", type=click.STRING, default="", required=False +) +@click.option( + "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.STRING, default="", required=False +) +def flepimop_push( + s3_upload: bool, + data_path: str, + flepi_run_index: str, + flepi_prefix: str, + flepi_slot_index: str, + flepi_block_index: str, + s3_results_path: str = "", + fs_results_path: str = "", +) -> None: """ Push files to either AWS S3 or the local filesystem. - This function generates a list of file names based on the provided parameters and checks which files - exist locally. It then uploads these files to either AWS S3 or the local filesystem based on the - specified options. + This function generates a list of file names based on the provided parameters, checks which files + exist locally, and uploads or copies these files to either AWS S3 or the local filesystem based on + the specified options. Parameters: ---------- - aws : bool - Flag indicating whether to push files to AWS S3. If set to True, files will be uploaded to S3. + s3_upload : str + String indicating whether to push files to AWS S3. If set to true, files will be uploaded to S3. + If set to False, files will be copied to the local filesystem as specified by `fs_results_path`. + + data_path : str + The local directory path where the data files are stored. flepi_run_index : str - The index of the FLEPI run. This is used to uniquely identify the run. + The index of the FLEPI run. This is used to uniquely identify the run and generate the corresponding file names. flepi_prefix : str - A prefix string to be included in the file names. This is typically used to categorize or - identify the files. + A prefix string to be included in the file names. This is typically used to categorize or identify the files. flepi_slot_index : str - The slot index used in the filename. This is formatted as a zero-padded nine-digit number. + The slot index used in the filename. This is formatted as a zero-padded nine-digit number, which helps in + distinguishing different slots of data processing. flepi_block_index : str - The block index used in the filename. This typically indicates a specific block or segment - of the data being processed. + The block index used in the filename. This typically indicates a specific block or segment of the data being processed. s3_results_path : str, optional - The S3 path where the results should be uploaded. This parameter is required if `aws` is set to True. + The S3 path where the results should be uploaded. This parameter is required if `s3_upload` is set to true. + Default is an empty string, which will raise an error if `s3_upload` is True. fs_results_path : str, optional - The local filesystem path where the results should be copied. This parameter is required if `aws` is set to False. + The local filesystem path where the results should be copied. + Default is an empty string, which means no files will be copied locally unless specified. Raises: ------ ValueError - If `aws` is set to True and `s3_results_path` is not provided. - If `aws` is set to False and `fs_results_path` is not provided. + If `s3_upload` is set to True and `s3_results_path` is not provided. + If `s3_upload` is set to False and `fs_results_path` is not provided. ModuleNotFoundError - If `boto3` is not installed when `aws` is set to True. + If `boto3` is not installed when `s3_upload` is set to True. + + Notes: + ----- + - This function first checks for the existence of the files generated by `create_file_name_for_push` + in the `data_path` directory. Only the files that exist will be pushed to AWS S3 or copied to the local filesystem. + + - When uploading to AWS S3, the function attempts to create the specified path in the S3 bucket if it does not exist. + + - Local directories specified by `fs_results_path` are created if they do not already exist. + + Example Usage: + -------------- + ```bash + flepimop-push --s3_upload true --data-path /path/to/data --flepi_run_index run_01 --flepi_prefix prefix_01 \ + --flepi_slot_index 1 --flepi_block_index 1 --s3_results_path s3://my-bucket/results/ + ``` + + This would push the existing files generated by the `create_file_name_for_push` function to the specified S3 bucket. """ - file_name_list = create_file_name_for_push(flepi_run_index=flepi_run_index, - prefix=flepi_prefix, - flepi_slot_index=flepi_slot_index, - flepi_block_index=flepi_block_index) - exist_files = [f for f in file_name_list if os.path.exists(f) is True] - if aws: + print("data-path: " + data_path) + file_name_list = create_file_name_for_push( + flepi_run_index=flepi_run_index, + prefix=flepi_prefix, + flepi_slot_index=flepi_slot_index, + flepi_block_index=flepi_block_index, + ) + exist_files = [] + for file_name in file_name_list: + file_path = os.path.join(data_path, file_name) + if os.path.exists(file_path): + exist_files.append(file_name) + print("Exist files: " + str(exist_files)) + if s3_upload == "true": try: import boto3 from botocore.exceptions import ClientError except ModuleNotFoundError: - raise ModuleNotFoundError(( - "No module named 'boto3', which is required for " - "gempyor.flepimop_push.flepimop_push. Please install the aws target." - )) + raise ModuleNotFoundError( + ( + "No module named 'boto3', which is required for " + "gempyor.flepimop_push.flepimop_push. Please install the aws target." + ) + ) if s3_results_path == "": - raise ValueError("argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH.") + raise ValueError( + "argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH." + ) s3 = boto3.client("s3") for file in exist_files: s3_path = os.path.join(s3_results_path, file) bucket = s3_path.split("/")[2] - object_name = s3_path[len(bucket) + 6: ] - s3.upload_file(file, bucket, object_name) - else: - if fs_results_path == "": - raise ValueError("argument aws is setted to False, you must use --fs_results_path or environment Variable FS_RESULTS_PATH.") + object_name = s3_path[len(bucket) + 6 :] + s3.upload_file(os.path.join(data_path, file), bucket, object_name) + if fs_results_path != "": for file in exist_files: dst = os.path.join(fs_results_path, file) os.path.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy(file, dst) + shutil.copy(os.path.join(fs_results_path, file), dst) print("flepimop-push successfully push all existing files.") - -if __name__ =="__main__": - flepimop_push() \ No newline at end of file + +if __name__ == "__main__": + flepimop_push() From 5fce4d43e988a97fc018b30f4c4cef86ca0ae6fb Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 12 Aug 2024 15:42:12 -0400 Subject: [PATCH 16/33] fix --- flepimop/gempyor_pkg/src/gempyor/flepimop_push.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index adfb2295e..8a8b47507 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -18,7 +18,7 @@ "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.STRING, default="", required=False ) def flepimop_push( - s3_upload: bool, + s3_upload: str, data_path: str, flepi_run_index: str, flepi_prefix: str, @@ -129,7 +129,7 @@ def flepimop_push( for file in exist_files: dst = os.path.join(fs_results_path, file) os.path.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy(os.path.join(fs_results_path, file), dst) + shutil.copy(os.path.join(data_path, file), dst) print("flepimop-push successfully push all existing files.") From ce734c45120c1926d60d86c1b3df4fe1112b046f Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Mon, 12 Aug 2024 15:59:41 -0400 Subject: [PATCH 17/33] black fix format --- .../gempyor_pkg/src/gempyor/file_paths.py | 55 ++++++------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/file_paths.py b/flepimop/gempyor_pkg/src/gempyor/file_paths.py index 105bb4127..d231c2609 100644 --- a/flepimop/gempyor_pkg/src/gempyor/file_paths.py +++ b/flepimop/gempyor_pkg/src/gempyor/file_paths.py @@ -59,13 +59,7 @@ def create_file_name( """ if create_directory: os.makedirs( - create_dir_name( - run_id, - prefix, - ftype, - inference_filepath_suffix, - inference_filename_prefix, - ), + create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix,), exist_ok=True, ) @@ -124,13 +118,7 @@ def create_file_name_without_extension( """ if create_directory: os.makedirs( - create_dir_name( - run_id, - prefix, - ftype, - inference_filepath_suffix, - inference_filename_prefix, - ), + create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix,), exist_ok=True, ) filename = Path( @@ -170,11 +158,7 @@ def run_id(timestamp: None | datetime = None) -> str: def create_dir_name( - run_id: str, - prefix: str, - ftype: str, - inference_filepath_suffix: str, - inference_filename_prefix: str, + run_id: str, prefix: str, ftype: str, inference_filepath_suffix: str, inference_filename_prefix: str, ) -> str: """ Generate a directory name based on the given parameters. @@ -199,20 +183,14 @@ def create_dir_name( """ return os.path.dirname( create_file_name_without_extension( - run_id, - prefix, - 1, - ftype, - inference_filepath_suffix, - inference_filename_prefix, - create_directory=False, + run_id, prefix, 1, ftype, inference_filepath_suffix, inference_filename_prefix, create_directory=False, ) ) -def create_file_name_for_push(flepi_run_index: str, - prefix: str, - flepi_slot_index: str, - flepi_block_index: str) -> List[str]: + +def create_file_name_for_push( + flepi_run_index: str, prefix: str, flepi_slot_index: str, flepi_block_index: str +) -> List[str]: """ Generate a list of file names for different types of inference results. @@ -248,13 +226,14 @@ def create_file_name_for_push(flepi_run_index: str, name_list = [] for type_name in type_list: extension = "csv" if type_name == "seed" else "parquet" - file_name = create_file_name(run_id=flepi_run_index, - prefix=prefix, - inference_filename_prefix="{:09d}.".format(int(flepi_slot_index)), - inference_filepath_suffix='chimeric/intermediate', - index=flepi_block_index, - ftype=type_name, - extension=extension) + file_name = create_file_name( + run_id=flepi_run_index, + prefix=prefix, + inference_filename_prefix="{:09d}.".format(int(flepi_slot_index)), + inference_filepath_suffix="chimeric/intermediate", + index=flepi_block_index, + ftype=type_name, + extension=extension, + ) name_list.append(file_name) return name_list - \ No newline at end of file From 6cacb69342913e7c1da57d0ab90d8964ea163832 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Tue, 13 Aug 2024 13:10:40 -0400 Subject: [PATCH 18/33] print message --- .../gempyor_pkg/src/gempyor/flepimop_push.py | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index 8a8b47507..da59e6ca1 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -68,7 +68,6 @@ def flepimop_push( ------ ValueError If `s3_upload` is set to True and `s3_results_path` is not provided. - If `s3_upload` is set to False and `fs_results_path` is not provided. ModuleNotFoundError If `boto3` is not installed when `s3_upload` is set to True. @@ -103,7 +102,10 @@ def flepimop_push( file_path = os.path.join(data_path, file_name) if os.path.exists(file_path): exist_files.append(file_name) - print("Exist files: " + str(exist_files)) + print("flepimos-push find these existing files: " + " ".join(exist_files)) + # Track failed uploads/copies separately + failed_s3_uploads = [] + failed_fs_copies = [] if s3_upload == "true": try: import boto3 @@ -124,13 +126,39 @@ def flepimop_push( s3_path = os.path.join(s3_results_path, file) bucket = s3_path.split("/")[2] object_name = s3_path[len(bucket) + 6 :] - s3.upload_file(os.path.join(data_path, file), bucket, object_name) + try: + s3.upload_file(os.path.join(data_path, file), bucket, object_name) + print(f"Uploaded {file} to S3 successfully.") + except ClientError as e: + print(f"Failed to upload {file} to S3: {e}") + failed_s3_uploads.append(file) + if fs_results_path != "": for file in exist_files: dst = os.path.join(fs_results_path, file) - os.path.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy(os.path.join(data_path, file), dst) - print("flepimop-push successfully push all existing files.") + os.makedirs(os.path.dirname(dst), exist_ok=True) + try: + shutil.copy(os.path.join(data_path, file), dst) + print(f"Copied {file} to local filesystem successfully.") + except IOError as e: + print(f"Failed to copy {file} to local filesystem: {e}") + failed_fs_copies.append(file) + + # Print failed files for S3 uploads + if failed_s3_uploads: + print("The following files failed to upload to S3:") + for file in failed_s3_uploads: + print(file) + + # Print failed files for local filesystem copies + if failed_fs_copies: + print("The following files failed to copy to the local filesystem:") + for file in failed_fs_copies: + print(file) + + # Success message if no failures + if not failed_s3_uploads and not failed_fs_copies: + print("flepimop-push successfully pushed all existing files.") if __name__ == "__main__": From 34b18cf13c3fd09d4e339f930ee68a8b96639b87 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Tue, 13 Aug 2024 13:12:23 -0400 Subject: [PATCH 19/33] clean --- flepimop/gempyor_pkg/src/gempyor/flepimop_push.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index da59e6ca1..b9bfb1b34 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -90,7 +90,6 @@ def flepimop_push( This would push the existing files generated by the `create_file_name_for_push` function to the specified S3 bucket. """ - print("data-path: " + data_path) file_name_list = create_file_name_for_push( flepi_run_index=flepi_run_index, prefix=flepi_prefix, From 534d932b2f066d0bda67ff430b0e18d3aaa219b8 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Tue, 13 Aug 2024 13:21:18 -0400 Subject: [PATCH 20/33] correct variable name --- flepimop/gempyor_pkg/src/gempyor/resume_pull.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py index a5c70e8de..df83d44ef 100644 --- a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py +++ b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py @@ -113,7 +113,7 @@ def fetching_resume_files( resume_location, discard_seeding, flepi_block_index, resume_run_index, flepi_run_index, flepi_prefix ): - flep_slot_index = os.environ["SLURM_ARRAY_TASK_ID"] + flepi_slot_index = os.environ["SLURM_ARRAY_TASK_ID"] if discard_seeding is True: discard_seeding = "true" @@ -122,7 +122,7 @@ def fetching_resume_files( flepi_block_index=str(flepi_block_index), resume_run_index=resume_run_index, flepi_prefix=flepi_prefix, - flepi_slot_index=flep_slot_index, + flepi_slot_index=flepi_slot_index, flepi_run_index=flepi_run_index, last_job_output=resume_location, ) From 72ef61b6a346476931ed292db44147b04f127553 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Tue, 13 Aug 2024 13:59:41 -0400 Subject: [PATCH 21/33] correct tests --- flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py b/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py index 50d2d6049..f27d5ef15 100644 --- a/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py +++ b/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py @@ -17,9 +17,9 @@ def set_env(self): yield def test_s3_resume_location(self, runner): - with patch( - "gempyor.resume_pull.create_resume_file_names_map", return_value="dummy_map" - ) as mock_create_map, patch("gempyor.resume_pull.download_file_from_s3") as mock_download: + with patch("gempyor.resume_pull.download_file_from_s3") as mock_download, patch( + "gempyor.resume_pull.pull_check_for_s3" + ) as mock_pull_check_for_s3: result = runner.invoke( fetching_resume_files, [ @@ -38,8 +38,8 @@ def test_s3_resume_location(self, runner): ], ) assert result.exit_code == 0 - mock_create_map.assert_called_once() mock_download.assert_called_once() + mock_pull_check_for_s3.assert_called_once() def test_local_resume_location(self, runner): with patch("gempyor.resume_pull.move_file_at_local") as mock_move: From da0b989fcb3a5e7add7a0cb56451c3890280cb1f Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Wed, 23 Oct 2024 13:32:53 -0400 Subject: [PATCH 22/33] address comments --- .../gempyor_pkg/src/gempyor/file_paths.py | 41 ++++++----- .../tests/resume_pull/test_flepimop_pull.py | 68 +++++++++++++++++++ 2 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 flepimop/gempyor_pkg/tests/resume_pull/test_flepimop_pull.py diff --git a/flepimop/gempyor_pkg/src/gempyor/file_paths.py b/flepimop/gempyor_pkg/src/gempyor/file_paths.py index d231c2609..04acae6c0 100644 --- a/flepimop/gempyor_pkg/src/gempyor/file_paths.py +++ b/flepimop/gempyor_pkg/src/gempyor/file_paths.py @@ -13,7 +13,6 @@ from datetime import datetime import os from pathlib import Path -from typing import List def create_file_name( @@ -190,7 +189,7 @@ def create_dir_name( def create_file_name_for_push( flepi_run_index: str, prefix: str, flepi_slot_index: str, flepi_block_index: str -) -> List[str]: +) -> list[str]: """ Generate a list of file names for different types of inference results. @@ -199,28 +198,26 @@ def create_file_name_for_push( "seir", "hosp", "llik", etc. The file names are generated using the `create_file_name` function, with specific extensions based on the type: "csv" for "seed" and "parquet" for all other types. - Parameters: - ---------- - flepi_run_index : str - The index of the run. This is used to uniquely identify the run. - - prefix : str - A prefix string to be included in the file names. This is typically used to categorize or - identify the files. - - flepi_slot_index : str - The slot index used in the filename. This is formatted as a zero-padded nine-digit number. - - flepi_block_index : str - The block index used in the filename. This typically indicates a specific block or segment - of the data being processed. + Args: + flepi_run_index : + The index of the run. This is used to uniquely identify the run. + + prefix : + A prefix string to be included in the file names. This is typically used to categorize or + identify the files. + + flepi_slot_index : + The slot index used in the filename. This is formatted as a zero-padded nine-digit number. + + flepi_block_index : + The block index used in the filename. This typically indicates a specific block or segment + of the data being processed. Returns: - ------- - List[str] - A list of generated file names, each corresponding to a different type of inference result. - The file names include the provided prefix, run index, slot index, block index, type, and - the appropriate file extension (either "csv" or "parquet"). + list[str] + A list of generated file names, each corresponding to a different type of inference result. + The file names include the provided prefix, run index, slot index, block index, type, and + the appropriate file extension (either "csv" or "parquet"). """ type_list = ["seir", "hosp", "llik", "spar", "snpi", "hnpi", "hpar", "init", "seed"] name_list = [] diff --git a/flepimop/gempyor_pkg/tests/resume_pull/test_flepimop_pull.py b/flepimop/gempyor_pkg/tests/resume_pull/test_flepimop_pull.py new file mode 100644 index 000000000..f27d5ef15 --- /dev/null +++ b/flepimop/gempyor_pkg/tests/resume_pull/test_flepimop_pull.py @@ -0,0 +1,68 @@ +import os +import pytest +from click.testing import CliRunner +from unittest.mock import patch +from gempyor.resume_pull import fetching_resume_files + + +@pytest.fixture +def runner(): + return CliRunner() + + +class TestFetchingResumeFiles: + @pytest.fixture(autouse=True) + def set_env(self): + with patch.dict(os.environ, {"SLURM_ARRAY_TASK_ID": "1"}): + yield + + def test_s3_resume_location(self, runner): + with patch("gempyor.resume_pull.download_file_from_s3") as mock_download, patch( + "gempyor.resume_pull.pull_check_for_s3" + ) as mock_pull_check_for_s3: + result = runner.invoke( + fetching_resume_files, + [ + "--resume_location", + "s3://some/location", + "--discard_seeding", + "true", + "--block_index", + 1, + "--resume_run_index", + "1", + "--flepi_run_index", + "1", + "--flepi_prefix", + "prefix123", + ], + ) + assert result.exit_code == 0 + mock_download.assert_called_once() + mock_pull_check_for_s3.assert_called_once() + + def test_local_resume_location(self, runner): + with patch("gempyor.resume_pull.move_file_at_local") as mock_move: + result = runner.invoke( + fetching_resume_files, + [ + "--resume_location", + "local/path", + "--discard_seeding", + "true", + "--block_index", + 1, + "--resume_run_index", + "run123", + "--flepi_run_index", + "run123", + "--flepi_prefix", + "prefix123", + ], + ) + assert result.exit_code == 0 + mock_move.assert_called_once() + + +if __name__ == "__main__": + pytest.main() From 3c21a8276b5044968c5d8eb9d92485a3bd8f4c04 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Wed, 23 Oct 2024 13:33:07 -0400 Subject: [PATCH 23/33] address comments 2 --- .../tests/utils/test_flepimop_pull.py | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py diff --git a/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py b/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py deleted file mode 100644 index f27d5ef15..000000000 --- a/flepimop/gempyor_pkg/tests/utils/test_flepimop_pull.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import pytest -from click.testing import CliRunner -from unittest.mock import patch -from gempyor.resume_pull import fetching_resume_files - - -@pytest.fixture -def runner(): - return CliRunner() - - -class TestFetchingResumeFiles: - @pytest.fixture(autouse=True) - def set_env(self): - with patch.dict(os.environ, {"SLURM_ARRAY_TASK_ID": "1"}): - yield - - def test_s3_resume_location(self, runner): - with patch("gempyor.resume_pull.download_file_from_s3") as mock_download, patch( - "gempyor.resume_pull.pull_check_for_s3" - ) as mock_pull_check_for_s3: - result = runner.invoke( - fetching_resume_files, - [ - "--resume_location", - "s3://some/location", - "--discard_seeding", - "true", - "--block_index", - 1, - "--resume_run_index", - "1", - "--flepi_run_index", - "1", - "--flepi_prefix", - "prefix123", - ], - ) - assert result.exit_code == 0 - mock_download.assert_called_once() - mock_pull_check_for_s3.assert_called_once() - - def test_local_resume_location(self, runner): - with patch("gempyor.resume_pull.move_file_at_local") as mock_move: - result = runner.invoke( - fetching_resume_files, - [ - "--resume_location", - "local/path", - "--discard_seeding", - "true", - "--block_index", - 1, - "--resume_run_index", - "run123", - "--flepi_run_index", - "run123", - "--flepi_prefix", - "prefix123", - ], - ) - assert result.exit_code == 0 - mock_move.assert_called_once() - - -if __name__ == "__main__": - pytest.main() From 29cf95d41eb99e909706e8d7abd570f36cba496f Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 14 Nov 2024 14:18:49 -0500 Subject: [PATCH 24/33] change doc string of file_paths --- flepimop/gempyor_pkg/src/gempyor/file_paths.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/file_paths.py b/flepimop/gempyor_pkg/src/gempyor/file_paths.py index 04acae6c0..cb4e3370d 100644 --- a/flepimop/gempyor_pkg/src/gempyor/file_paths.py +++ b/flepimop/gempyor_pkg/src/gempyor/file_paths.py @@ -201,20 +201,17 @@ def create_file_name_for_push( Args: flepi_run_index : The index of the run. This is used to uniquely identify the run. - prefix : A prefix string to be included in the file names. This is typically used to categorize or identify the files. - flepi_slot_index : The slot index used in the filename. This is formatted as a zero-padded nine-digit number. - flepi_block_index : The block index used in the filename. This typically indicates a specific block or segment of the data being processed. Returns: - list[str] + list A list of generated file names, each corresponding to a different type of inference result. The file names include the provided prefix, run index, slot index, block index, type, and the appropriate file extension (either "csv" or "parquet"). From 62c56f06fba030379b4c855b4c95abb09a1035bd Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 14 Nov 2024 14:20:25 -0500 Subject: [PATCH 25/33] remove main --- flepimop/gempyor_pkg/src/gempyor/flepimop_push.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index b9bfb1b34..f5ea949eb 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -1,7 +1,7 @@ import os import click import shutil -from gempyor.file_paths import create_file_name_for_push +from .file_paths import create_file_name_for_push @click.command() @@ -158,7 +158,3 @@ def flepimop_push( # Success message if no failures if not failed_s3_uploads and not failed_fs_copies: print("flepimop-push successfully pushed all existing files.") - - -if __name__ == "__main__": - flepimop_push() From 8194214b804fa42bc9c2793b37ed47306c3330f2 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Thu, 14 Nov 2024 14:20:51 -0500 Subject: [PATCH 26/33] remove main and relocate import --- .../gempyor_pkg/src/gempyor/resume_pull.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py index df83d44ef..2118d9db8 100644 --- a/flepimop/gempyor_pkg/src/gempyor/resume_pull.py +++ b/flepimop/gempyor_pkg/src/gempyor/resume_pull.py @@ -81,10 +81,7 @@ """ import click import os -import boto3 -import botocore -from typing import Dict -from gempyor.utils import create_resume_file_names_map, download_file_from_s3, move_file_at_local +from .utils import create_resume_file_names_map, download_file_from_s3, move_file_at_local @click.command() @@ -135,13 +132,13 @@ def fetching_resume_files( # Todo: Unit test -def pull_check_for_s3(file_name_map: Dict[str, str]) -> None: +def pull_check_for_s3(file_name_map: dict[str, str]) -> None: """ Verifies the existence of specified files in an S3 bucket and checks if corresponding local files are present. If a file in the S3 bucket does not exist or the local file is missing, it raises appropriate errors or prints a message. Parameters: - file_name_map (Dict[str, str]): A dictionary where the keys are S3 URIs (Uniform Resource Identifiers) and the values are the corresponding local file paths. + file_name_map (dict[str, str]): A dictionary where the keys are S3 URIs (Uniform Resource Identifiers) and the values are the corresponding local file paths. Dependencies: - boto3: The AWS SDK for Python, used to interact with AWS services such as S3. @@ -174,6 +171,14 @@ def pull_check_for_s3(file_name_map: Dict[str, str]) -> None: - Ensure that AWS credentials are configured properly for boto3 to access the S3 bucket. - This function assumes that the S3 URIs provided are in the format `s3://bucket-name/path/to/object`. """ + try: + import boto3 + from botocore.exceptions import ClientError + except ModuleNotFoundError: + raise ModuleNotFoundError(( + "No module named 'boto3', which is required for " + "gempyor.utils.download_file_from_s3. Please install the aws target." + )) s3 = boto3.client("s3") for s3_uri in file_name_map: bucket = s3_uri.split("/")[2] @@ -190,13 +195,13 @@ def pull_check_for_s3(file_name_map: Dict[str, str]) -> None: # Todo: Unit Test -def pull_check(file_name_map: Dict[str, str]) -> None: +def pull_check(file_name_map: dict[str, str]) -> None: """ Verifies the existence of specified source files and checks if corresponding destination files are present. If a source file does not exist or the destination file is missing, it raises appropriate errors or prints a message. Parameters: - file_name_map (Dict[str, str]): A dictionary where the keys are source file paths and the values are the corresponding destination file paths. + file_name_map (dict[str, str]): A dictionary where the keys are source file paths and the values are the corresponding destination file paths. Dependencies: - os: The standard library module for interacting with the operating system, used here to check for file existence. @@ -229,7 +234,3 @@ def pull_check(file_name_map: Dict[str, str]) -> None: raise FileExistsError(f"For {src_file}, it is not copied to {file_name_map[src_file]}.") else: print(f"Input {src_file} does not exist.") - - -if __name__ == "__main__": - fetching_resume_files() From 14193ff6dd96fc04aaf418a1cf5fb1b6bcb971d1 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Tue, 19 Nov 2024 13:42:56 -0500 Subject: [PATCH 27/33] add test file --- .../tests/file_paths/test_create_file_name_for_push.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py diff --git a/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py b/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py new file mode 100644 index 000000000..e69de29bb From 22f5188936d6af9ed725dd01f15be32b2fe715b1 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Wed, 20 Nov 2024 11:07:21 -0500 Subject: [PATCH 28/33] change --- flepimop/gempyor_pkg/src/gempyor/file_paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/file_paths.py b/flepimop/gempyor_pkg/src/gempyor/file_paths.py index cb4e3370d..08e420051 100644 --- a/flepimop/gempyor_pkg/src/gempyor/file_paths.py +++ b/flepimop/gempyor_pkg/src/gempyor/file_paths.py @@ -217,9 +217,9 @@ def create_file_name_for_push( the appropriate file extension (either "csv" or "parquet"). """ type_list = ["seir", "hosp", "llik", "spar", "snpi", "hnpi", "hpar", "init", "seed"] + extension_map = {type_name: "csv" if type_name == "seed" else "parquet" for type_name in type_list} name_list = [] - for type_name in type_list: - extension = "csv" if type_name == "seed" else "parquet" + for type_name, extension in extension_map.items(): file_name = create_file_name( run_id=flepi_run_index, prefix=prefix, From a417e3837eea6f2deb276cb0fe2cebe71677a6a8 Mon Sep 17 00:00:00 2001 From: fang19911030 Date: Wed, 20 Nov 2024 11:07:33 -0500 Subject: [PATCH 29/33] new unit test --- .../test_create_file_name_for_push.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py b/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py index e69de29bb..68d915ef7 100644 --- a/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py +++ b/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py @@ -0,0 +1,42 @@ +from unittest.mock import patch +from gempyor.file_paths import create_file_name_for_push + +class TestCreateFileNameForPush: + # Mock implementation of create_file_name for testing + def mocked_create_file_name(self, run_id, prefix, inference_filename_prefix, inference_filepath_suffix, index, ftype, extension): + return f"{prefix}_{run_id}_{inference_filename_prefix}_{inference_filepath_suffix}_{index}_{ftype}.{extension}" + + # Test method for create_file_name_for_push + @patch("__main__.create_file_name") + def test_create_file_name_for_push(self, mock_create_file_name): + mock_create_file_name.side_effect = self.mocked_create_file_name + + flepi_run_index = "run123" + prefix = "testprefix" + flepi_slot_index = "42" + flepi_block_index = "3" + + expected_file_names = [ + f"testprefix_run123_000000042._chimeric/intermediate_3_seir.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_hosp.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_llik.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_spar.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_snpi.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_hnpi.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_hpar.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_init.parquet", + f"testprefix_run123_000000042._chimeric/intermediate_3_seed.csv", + ] + + result = create_file_name_for_push( + flepi_run_index=flepi_run_index, + prefix=prefix, + flepi_slot_index=flepi_slot_index, + flepi_block_index=flepi_block_index, + ) + + # Assert the result matches the expected file names + assert result == expected_file_names + + # Assert that create_file_name was called the expected number of times + assert mock_create_file_name.call_count == 9 \ No newline at end of file From f796ce258ae44bc37d5cff1e6348cea6a9eeb5d4 Mon Sep 17 00:00:00 2001 From: PC Date: Wed, 20 Nov 2024 13:30:38 -0500 Subject: [PATCH 30/33] add test --- .../file_paths/test_create_file_name_for_push.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py b/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py index 68d915ef7..42b73f68f 100644 --- a/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py +++ b/flepimop/gempyor_pkg/tests/file_paths/test_create_file_name_for_push.py @@ -1,13 +1,23 @@ from unittest.mock import patch from gempyor.file_paths import create_file_name_for_push + class TestCreateFileNameForPush: # Mock implementation of create_file_name for testing - def mocked_create_file_name(self, run_id, prefix, inference_filename_prefix, inference_filepath_suffix, index, ftype, extension): + def mocked_create_file_name( + self, + run_id, + prefix, + inference_filename_prefix, + inference_filepath_suffix, + index, + ftype, + extension, + ): return f"{prefix}_{run_id}_{inference_filename_prefix}_{inference_filepath_suffix}_{index}_{ftype}.{extension}" # Test method for create_file_name_for_push - @patch("__main__.create_file_name") + @patch("gempyor.file_paths.create_file_name") def test_create_file_name_for_push(self, mock_create_file_name): mock_create_file_name.side_effect = self.mocked_create_file_name @@ -39,4 +49,4 @@ def test_create_file_name_for_push(self, mock_create_file_name): assert result == expected_file_names # Assert that create_file_name was called the expected number of times - assert mock_create_file_name.call_count == 9 \ No newline at end of file + assert mock_create_file_name.call_count == 9 From 25d79d316f5e258b9488cfc7f60174caa2e80e65 Mon Sep 17 00:00:00 2001 From: PC Date: Wed, 20 Nov 2024 13:49:19 -0500 Subject: [PATCH 31/33] change click type --- flepimop/gempyor_pkg/src/gempyor/flepimop_push.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index f5ea949eb..f6f92b12d 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -15,7 +15,7 @@ "--s3_results_path", "s3_results_path", envvar="S3_RESULTS_PATH", type=click.STRING, default="", required=False ) @click.option( - "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.STRING, default="", required=False + "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.Path, default="", required=False ) def flepimop_push( s3_upload: str, From fea94493243f9d4b79133b6633dce09dbb27307d Mon Sep 17 00:00:00 2001 From: PC Date: Thu, 5 Dec 2024 14:05:10 -0500 Subject: [PATCH 32/33] change click string to path --- flepimop/gempyor_pkg/src/gempyor/flepimop_push.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index f6f92b12d..19f1159ba 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -15,7 +15,7 @@ "--s3_results_path", "s3_results_path", envvar="S3_RESULTS_PATH", type=click.STRING, default="", required=False ) @click.option( - "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.Path, default="", required=False + "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.Path(), default="", required=False ) def flepimop_push( s3_upload: str, From 1522e8fbfd68552f571092d595bc8164b64ae4f4 Mon Sep 17 00:00:00 2001 From: PC Date: Thu, 5 Dec 2024 14:08:23 -0500 Subject: [PATCH 33/33] format change --- .../gempyor_pkg/src/gempyor/flepimop_push.py | 62 ++++++++++++++++--- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py index 19f1159ba..600bc757f 100644 --- a/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py +++ b/flepimop/gempyor_pkg/src/gempyor/flepimop_push.py @@ -5,17 +5,63 @@ @click.command() -@click.option("--s3_upload", "s3_upload", envvar="S3_UPLOAD", help="push files to aws", required=True) -@click.option("--data-path", "data_path", envvar="PROJECT_PATH", type=click.Path(exists=True), required=True) -@click.option("--flepi_run_index", "flepi_run_index", envvar="FLEPI_RUN_INDEX", type=click.STRING, required=True) -@click.option("--flepi_prefix", "flepi_prefix", envvar="FLEPI_PREFIX", type=click.STRING, required=True) -@click.option("--flepi_block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.STRING, required=True) -@click.option("--flepi_slot_index", "flepi_slot_index", envvar="FLEPI_SLOT_INDEX", type=click.STRING, required=True) @click.option( - "--s3_results_path", "s3_results_path", envvar="S3_RESULTS_PATH", type=click.STRING, default="", required=False + "--s3_upload", + "s3_upload", + envvar="S3_UPLOAD", + help="push files to aws", + required=True, ) @click.option( - "--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.Path(), default="", required=False + "--data-path", + "data_path", + envvar="PROJECT_PATH", + type=click.Path(exists=True), + required=True, +) +@click.option( + "--flepi_run_index", + "flepi_run_index", + envvar="FLEPI_RUN_INDEX", + type=click.STRING, + required=True, +) +@click.option( + "--flepi_prefix", + "flepi_prefix", + envvar="FLEPI_PREFIX", + type=click.STRING, + required=True, +) +@click.option( + "--flepi_block_index", + "flepi_block_index", + envvar="FLEPI_BLOCK_INDEX", + type=click.STRING, + required=True, +) +@click.option( + "--flepi_slot_index", + "flepi_slot_index", + envvar="FLEPI_SLOT_INDEX", + type=click.STRING, + required=True, +) +@click.option( + "--s3_results_path", + "s3_results_path", + envvar="S3_RESULTS_PATH", + type=click.STRING, + default="", + required=False, +) +@click.option( + "--fs_results_path", + "fs_results_path", + envvar="FS_RESULTS_PATH", + type=click.Path(), + default="", + required=False, ) def flepimop_push( s3_upload: str,