-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild.sbt
98 lines (91 loc) · 3.53 KB
/
build.sbt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
name := "geotrellis-hillshade"
organization := "workshop"
version := "0.1.0"
scalaVersion := "2.12.12"
libraryDependencies ++= Seq(
"com.monovore" %% "decline" % "1.2.0",
"org.locationtech.geotrellis" %% "geotrellis-spark" % "3.5.1",
"org.locationtech.geotrellis" %% "geotrellis-s3" % "3.5.1",
"org.locationtech.geotrellis" %% "geotrellis-gdal" % "3.5.1",
"org.apache.spark" %% "spark-core" % "2.4.4" % Provided,
"org.apache.spark" %% "spark-sql" % "2.4.4" % Provided,
"org.apache.spark" %% "spark-hive" % "2.4.4" % Provided
)
initialCommands in console :=
"""
import java.net._
import geotrellis.layer._
import geotrellis.vector._
import geotrellis.raster._
import geotrellis.raster.gdal._
import geotrellis.spark._
""".stripMargin
// Fork JVM for test context to avoid memory leaks in Metaspace
Test / fork := true
Test / outputStrategy := Some(StdoutOutput)
// Settings for sbt-assembly plugin which builds fat jars for spark-submit
assemblyMergeStrategy in assembly := {
case "reference.conf" => MergeStrategy.concat
case "application.conf" => MergeStrategy.concat
case PathList("META-INF", xs @ _*) =>
xs match {
case ("MANIFEST.MF" :: Nil) =>
MergeStrategy.discard
case ("services" :: _ :: Nil) =>
MergeStrategy.concat
case ("javax.media.jai.registryFile.jai" :: Nil) | ("registryFile.jai" :: Nil) | ("registryFile.jaiext" :: Nil) =>
MergeStrategy.concat
case (name :: Nil) if name.endsWith(".RSA") || name.endsWith(".DSA") || name.endsWith(".SF") =>
MergeStrategy.discard
case _ =>
MergeStrategy.first
}
case _ => MergeStrategy.first
}
// Settings from sbt-lighter plugin that will automate creating and submitting this job to EMR
import sbtlighter._
sparkEmrRelease := "emr-6.0.0"
sparkAwsRegion := "us-east-1"
sparkClusterName := "geotrellis-hillshade"
sparkEmrApplications := Seq("Spark", "Zeppelin", "Ganglia")
sparkJobFlowInstancesConfig := sparkJobFlowInstancesConfig.value.withEc2KeyName("geotrellis-emr")
sparkS3JarFolder := "s3://geotrellis-test/jobs/jars"
sparkS3LogUri := Some("s3://geotrellis-test/jobs/logs")
sparkMasterType := "m4.xlarge"
sparkCoreType := "m4.xlarge"
sparkInstanceCount := 3
sparkMasterPrice := Some(0.5)
sparkCorePrice := Some(0.5)
sparkEmrServiceRole := "EMR_DefaultRole"
sparkInstanceRole := "EMR_EC2_DefaultRole"
sparkMasterEbsSize := Some(64)
sparkCoreEbsSize := Some(64)
sparkEmrBootstrap := List(
BootstrapAction(
"Install GDAL",
"s3://geotrellis-demo/emr/bootstrap/conda-gdal.sh",
"3.1.2"
)
)
sparkEmrConfigs := List(
EmrConfig("spark").withProperties(
"maximizeResourceAllocation" -> "true"
),
EmrConfig("spark-defaults").withProperties(
"spark.driver.maxResultSize" -> "3G",
"spark.dynamicAllocation.enabled" -> "true",
"spark.shuffle.service.enabled" -> "true",
"spark.shuffle.compress" -> "true",
"spark.shuffle.spill.compress" -> "true",
"spark.rdd.compress" -> "true",
"spark.driver.extraJavaOptions" ->"-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p'",
"spark.executor.extraJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p'",
"spark.yarn.appMasterEnv.LD_LIBRARY_PATH" -> "/usr/local/miniconda/lib/:/usr/local/lib",
"spark.executorEnv.LD_LIBRARY_PATH" -> "/usr/local/miniconda/lib/:/usr/local/lib"
),
EmrConfig("yarn-site").withProperties(
"yarn.resourcemanager.am.max-attempts" -> "1",
"yarn.nodemanager.vmem-check-enabled" -> "false",
"yarn.nodemanager.pmem-check-enabled" -> "false"
)
)