From 566e963ab5aed2aeb1f65523c4886ba7baf842f1 Mon Sep 17 00:00:00 2001 From: Arthur Prevot Date: Sun, 21 Jul 2024 21:18:11 +0200 Subject: [PATCH] update file copy post pypify.sh --- yaetos/libs/generic_jobs/copy_raw_job.py | 7 +++++-- yaetos/libs/generic_jobs/list_files_job.py | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/yaetos/libs/generic_jobs/copy_raw_job.py b/yaetos/libs/generic_jobs/copy_raw_job.py index 6e42f6e3..8173afa7 100644 --- a/yaetos/libs/generic_jobs/copy_raw_job.py +++ b/yaetos/libs/generic_jobs/copy_raw_job.py @@ -1,7 +1,7 @@ """ Job meant to run locally to get data from AWS S3 to local. Updates required to run in cluster. """ -from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup +from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup # FS_Ops_Dispatcher import os from cloudpathlib import CloudPath as CPt import fnmatch @@ -27,6 +27,9 @@ def transform(self, files_to_copy): pattern = '*' pattern_type = 'glob' + # TODO: replace code below (and all functions) with the commented code + # FS_Ops_Dispatcher().copy_file(path_in, path_out) + session = get_aws_setup(self.jargs.merged_args) s3 = session.client('s3') @@ -34,7 +37,7 @@ def transform(self, files_to_copy): self.logger.info(f"Number of files to be downloaded {file_number}") self.download_files(s3, path_raw_in.bucket, path_raw_in.key, pattern, pattern_type, path_raw_out) - self.logger.info("Finished downloading all files") + self.logger.info("Finished copying all files") return None def download_files(self, s3, bucket_name, prefix, pattern, pattern_type, path_raw_out): diff --git a/yaetos/libs/generic_jobs/list_files_job.py b/yaetos/libs/generic_jobs/list_files_job.py index 656574c1..a106d8ac 100755 --- a/yaetos/libs/generic_jobs/list_files_job.py +++ b/yaetos/libs/generic_jobs/list_files_job.py @@ -1,4 +1,4 @@ -from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup +from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup # FS_Ops_Dispatcher from cloudpathlib import CloudPath as CPt import fnmatch import re @@ -26,6 +26,9 @@ def transform(self, files): pattern = '*' pattern_type = 'glob' + # TODO: replace code below (and all functions) with the commented code + # files = FS_Ops_Dispatcher().list_files(path, regex=None, globy=None) + session = get_aws_setup(self.jargs.merged_args) s3 = session.client('s3')