-
Notifications
You must be signed in to change notification settings - Fork 0
/
template.yml
118 lines (104 loc) · 3.77 KB
/
template.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Parameters:
ExecutionRoleArn:
Type: String
Default: arn:aws:iam::117819748843:role/service-role
DeploymentZone:
Type: String
Default: deployment-zone-117819748843-us-east-1
EmrZone:
Type: String
Default: log-zone-117819748843-us-east-1
DockerImageUri:
Type: String
Default: 117819748843.dkr.ecr.us-east-1.amazonaws.com/emr-serverless:satellite-image-etl
SecurityGroupIds:
Type: CommaDelimitedList
Default: sg-002c9a50cf295d5ad
SubnetIds:
Type: CommaDelimitedList
Default: subnet-07fa6b708d8df66b4
Resources:
ETLJobTrigger:
Type: AWS::Events::Rule
Properties:
# Description: String
Name: satellite-image-etl-trigger
RoleArn: !Ref ExecutionRoleArn
ScheduleExpression: "cron(0 6 * * ? *)"
State: ENABLED
Targets:
- Id: ETLStepFunction
Arn: !GetAtt StepFunctionStateMachine.Arn
Input: "{\n \"input\": \"{}\"\n}"
RetryPolicy:
MaximumEventAgeInSeconds: 300
MaximumRetryAttempts: 3
RoleArn: !Ref ExecutionRoleArn
EMRServerlessApplication:
Type: AWS::EMRServerless::Application
Properties:
Name: SatelliteImageETLApplication
Architecture: X86_64
AutoStartConfiguration:
Enabled: true
AutoStopConfiguration:
Enabled: true
IdleTimeoutMinutes: 1
MaximumCapacity:
Cpu: 16vCPU
Disk: 100GB
Memory: 64GB
ReleaseLabel: "emr-7.2.0"
Type: Spark
ImageConfiguration:
ImageUri: !Ref DockerImageUri
NetworkConfiguration:
SecurityGroupIds: !Ref SecurityGroupIds
SubnetIds: !Ref SubnetIds
StepFunctionStateMachine:
Type: AWS::StepFunctions::StateMachine
Properties:
StateMachineName: satellite-image-etl
RoleArn: !Ref ExecutionRoleArn
Definition: {
"Comment": "Runs EMR Serverless job for Satellite Image ETL",
"StartAt": "StartJobRun",
"States": {
"StartJobRun": {
"Type": "Task",
"End": true,
"Parameters": {
"Name": "Satellite Image ETL",
"ClientToken.$": "States.UUID()",
"ApplicationId": !Ref EMRServerlessApplication,
"ExecutionRoleArn": !Ref ExecutionRoleArn,
"JobDriver": {
"SparkSubmit": {
"EntryPoint": !Sub "s3://${DeploymentZone}/satellite_image_etl/spark_entrypoint.py",
"SparkSubmitParameters": !Sub "
--driver-memory 14G
--driver-cores 4
--num-executors 1
--executor-memory 2G
--executor-cores 2
--conf spark.dynamicAllocation.enabled=false
--conf spark.sql.execution.arrow.pyspark.enabled=true
--jars https://repo1.maven.org/maven2/org/postgresql/postgresql/42.6.0/postgresql-42.6.0.jar
"
}
},
"ConfigurationOverrides": {
"MonitoringConfiguration": {
"S3MonitoringConfiguration": {
"LogUri": !Sub "s3://${EmrZone}/logging/"
}
}
},
"ExecutionTimeoutMinutes": 600
},
"Resource": "arn:aws:states:::aws-sdk:emrserverless:startJobRun"
}
}
}