diff --git a/tests/yaetos/etl_utils_test.py b/tests/yaetos/etl_utils_test.py index 3720a9e4..d4b66c5a 100644 --- a/tests/yaetos/etl_utils_test.py +++ b/tests/yaetos/etl_utils_test.py @@ -186,7 +186,6 @@ def test_create_spark_submit_python_job_with_launcher(self): 'spark-submit', '--py-files=some/files.zip', 'jobs/examples/ex7_pandas_job.py', # launcher.py not carried over. may want to change behavior. - # '--job_name=some_job', ] assert cmd_lst_real == cmd_lst_expected diff --git a/yaetos/deploy.py b/yaetos/deploy.py index e464f390..4a1e85dc 100644 --- a/yaetos/deploy.py +++ b/yaetos/deploy.py @@ -50,24 +50,23 @@ def __init__(self, deploy_args, app_args): self.app_args = app_args self.app_file = app_args['py_job'] # TODO: remove all refs to app_file to be consistent. self.aws_setup = aws_setup + # From aws_config.cfg: self.ec2_key_name = config.get(aws_setup, 'ec2_key_name') self.s3_region = config.get(aws_setup, 's3_region') self.user = config.get(aws_setup, 'user') self.profile_name = config.get(aws_setup, 'profile_name') self.ec2_subnet_id = config.get(aws_setup, 'ec2_subnet_id') - self.extra_security_gp = config.get(aws_setup, 'extra_security_gp') + self.extra_security_gp = config.get(aws_setup, 'extra_security_gp', fallback=None) self.emr_ec2_role = config.get(aws_setup, 'emr_ec2_role', fallback='EMR_EC2_DefaultRole') self.emr_role = config.get(aws_setup, 'emr_role', fallback='EMR_DefaultRole') + # From jobs_metadata.yml: self.emr_core_instances = int(app_args.get('emr_core_instances', 1)) # TODO: make this update EMR_Scheduled mode too. self.deploy_args = deploy_args self.ec2_instance_master = app_args.get('ec2_instance_master', 'm5.xlarge') # 'm5.12xlarge', # used m3.2xlarge (8 vCPU, 30 Gib RAM), and earlier m3.xlarge (4 vCPU, 15 Gib RAM) self.ec2_instance_slaves = app_args.get('ec2_instance_slaves', 'm5.xlarge') - # Paths - # self.s3_logs = CPt(app_args.get('s3_logs', 's3://').replace('{root_path}', self.app_args.get('root_path', ''))) + # Computed params: s3_logs = app_args.get('s3_logs', 's3://').replace('{root_path}', self.app_args.get('root_path', '')) - print('#### ------', s3_logs) self.s3_logs = CPt(s3_logs) - self.s3_bucket_logs = self.s3_logs.bucket self.metadata_folder = 'pipelines_metadata' # TODO remove self.pipeline_name = self.generate_pipeline_name(self.deploy_args['mode'], self.app_args['job_name'], self.user) # format: some_job.some_user.20181204.153429