Skip to content

Commit

Permalink
cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprevot committed Jun 8, 2024
1 parent 5bfe9c9 commit 489192c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 6 deletions.
1 change: 0 additions & 1 deletion tests/yaetos/etl_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ def test_create_spark_submit_python_job_with_launcher(self):
'spark-submit',
'--py-files=some/files.zip',
'jobs/examples/ex7_pandas_job.py', # launcher.py not carried over. may want to change behavior.
# '--job_name=some_job',
]
assert cmd_lst_real == cmd_lst_expected

Expand Down
9 changes: 4 additions & 5 deletions yaetos/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,24 +50,23 @@ def __init__(self, deploy_args, app_args):
self.app_args = app_args
self.app_file = app_args['py_job'] # TODO: remove all refs to app_file to be consistent.
self.aws_setup = aws_setup
# From aws_config.cfg:
self.ec2_key_name = config.get(aws_setup, 'ec2_key_name')
self.s3_region = config.get(aws_setup, 's3_region')
self.user = config.get(aws_setup, 'user')
self.profile_name = config.get(aws_setup, 'profile_name')
self.ec2_subnet_id = config.get(aws_setup, 'ec2_subnet_id')
self.extra_security_gp = config.get(aws_setup, 'extra_security_gp')
self.extra_security_gp = config.get(aws_setup, 'extra_security_gp', fallback=None)
self.emr_ec2_role = config.get(aws_setup, 'emr_ec2_role', fallback='EMR_EC2_DefaultRole')
self.emr_role = config.get(aws_setup, 'emr_role', fallback='EMR_DefaultRole')
# From jobs_metadata.yml:
self.emr_core_instances = int(app_args.get('emr_core_instances', 1)) # TODO: make this update EMR_Scheduled mode too.
self.deploy_args = deploy_args
self.ec2_instance_master = app_args.get('ec2_instance_master', 'm5.xlarge') # 'm5.12xlarge', # used m3.2xlarge (8 vCPU, 30 Gib RAM), and earlier m3.xlarge (4 vCPU, 15 Gib RAM)
self.ec2_instance_slaves = app_args.get('ec2_instance_slaves', 'm5.xlarge')
# Paths
# self.s3_logs = CPt(app_args.get('s3_logs', 's3://').replace('{root_path}', self.app_args.get('root_path', '')))
# Computed params:
s3_logs = app_args.get('s3_logs', 's3://').replace('{root_path}', self.app_args.get('root_path', ''))
print('#### ------', s3_logs)
self.s3_logs = CPt(s3_logs)

self.s3_bucket_logs = self.s3_logs.bucket
self.metadata_folder = 'pipelines_metadata' # TODO remove
self.pipeline_name = self.generate_pipeline_name(self.deploy_args['mode'], self.app_args['job_name'], self.user) # format: some_job.some_user.20181204.153429
Expand Down

0 comments on commit 489192c

Please sign in to comment.