Skip to content

Commit

Permalink
update file copy post pypify.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprevot committed Jul 21, 2024
1 parent 9def33f commit 566e963
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
7 changes: 5 additions & 2 deletions yaetos/libs/generic_jobs/copy_raw_job.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Job meant to run locally to get data from AWS S3 to local. Updates required to run in cluster.
"""
from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup
from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup # FS_Ops_Dispatcher
import os
from cloudpathlib import CloudPath as CPt
import fnmatch
Expand All @@ -27,14 +27,17 @@ def transform(self, files_to_copy):
pattern = '*'
pattern_type = 'glob'

# TODO: replace code below (and all functions) with the commented code
# FS_Ops_Dispatcher().copy_file(path_in, path_out)

session = get_aws_setup(self.jargs.merged_args)
s3 = session.client('s3')

file_number = self.get_size(s3, path_raw_in.bucket, path_raw_in.key, pattern, pattern_type)
self.logger.info(f"Number of files to be downloaded {file_number}")

self.download_files(s3, path_raw_in.bucket, path_raw_in.key, pattern, pattern_type, path_raw_out)
self.logger.info("Finished downloading all files")
self.logger.info("Finished copying all files")
return None

def download_files(self, s3, bucket_name, prefix, pattern, pattern_type, path_raw_out):
Expand Down
5 changes: 4 additions & 1 deletion yaetos/libs/generic_jobs/list_files_job.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup
from yaetos.etl_utils import ETL_Base, Commandliner, get_aws_setup # FS_Ops_Dispatcher
from cloudpathlib import CloudPath as CPt
import fnmatch
import re
Expand Down Expand Up @@ -26,6 +26,9 @@ def transform(self, files):
pattern = '*'
pattern_type = 'glob'

# TODO: replace code below (and all functions) with the commented code
# files = FS_Ops_Dispatcher().list_files(path, regex=None, globy=None)

session = get_aws_setup(self.jargs.merged_args)

s3 = session.client('s3')
Expand Down

0 comments on commit 566e963

Please sign in to comment.