Skip to content

Commit

Permalink
chore: added s3 support (#18)
Browse files Browse the repository at this point in the history
- add matching spark version with hadoop version
- add dependencies to support S3
  • Loading branch information
dmrmlvv authored Dec 8, 2023
1 parent 205cbc8 commit d4cf5e9
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion project/Utils.scala
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import scala.util.matching.Regex

import sbt._
import src.main.scala.BuildAssyModePlugin.autoImport.AssyMode
import src.main.scala.BuildPackageTypePlugin.autoImport.PackageType
Expand All @@ -9,6 +11,17 @@ object Utils {
lazy val jpountz = ExclusionRule(organization = "net.jpountz.lz4", name = "lz4")
lazy val hadoop = ExclusionRule(organization = "org.apache.hadoop", name = "hadoop-client-runtime")


val hadoopSparkMatching: Map[Regex, String] = Map(
"2\\.4\\.[0-8]".r -> "2.6.5",
"3\\.0\\.[0-3]".r -> "2.7.4",
"3\\.1\\.[0-3]".r -> "3.2.0",
"3\\.2\\.[0-4]".r -> "3.3.1",
"3\\.3\\.[0-3]".r -> "3.3.2",
"3\\.4\\.[0-2]".r -> "3.3.4",
"3.5.0".r -> "3.3.4"
)

val sparkDeps: Map[String, ModuleID] = Map(
"sparkCore" -> "org.apache.spark" %% "spark-core" % sparkVersion,
"sparkSql" -> "org.apache.spark" %% "spark-sql" % sparkVersion,
Expand All @@ -17,12 +30,17 @@ object Utils {
"sparkCatalyst" -> "org.apache.spark" %% "spark-catalyst" % sparkVersion
).mapValues(m => if (assyMode == AssyMode.WithSpark) m else m % "provided")

val hadoopDeps: Map[String, ModuleID] = Map(
"hadoopAws" -> "org.apache.hadoop" % "hadoop-aws" % hadoopSparkMatching.collectFirst {
case (key, value) if key.findFirstMatchIn(sparkVersion).isDefined => value
}.get
)
val sparkKafkaDeps: Map[String, ModuleID] = Map(
"sparkKafkaStreaming" -> "org.apache.spark" %% "spark-streaming-kafka-0-10" % sparkVersion,
"sparkKafkaSql" -> "org.apache.spark" %% "spark-sql-kafka-0-10" % sparkVersion
).mapValues(_.excludeAll(jpountz, hadoop))

sparkDeps ++ sparkKafkaDeps + ("sparkAvro" -> "org.apache.spark" %% "spark-avro" % sparkVersion)
sparkDeps ++ hadoopDeps ++ sparkKafkaDeps + ("sparkAvro" -> "org.apache.spark" %% "spark-avro" % sparkVersion)
}

def overrideFasterXml(sparkVersion: String): Seq[ModuleID] = {
Expand Down

0 comments on commit d4cf5e9

Please sign in to comment.