Skip to content
This repository has been archived by the owner on Nov 22, 2024. It is now read-only.

Commit

Permalink
Spark 3.5.1 (#174)
Browse files Browse the repository at this point in the history
* fix build-and-test by upgrading spark to 3.5.1 and akka to 2.8.5
  • Loading branch information
fieldera-da authored Feb 27, 2024
1 parent 050a837 commit 9ed89d0
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 17 deletions.
8 changes: 4 additions & 4 deletions cloudflow-it/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ prepare-swiss-knife:
prepare-clis:
@echo '****** Prepare the runtimes Clis'
rm -rf spark
wget https://downloads.apache.org/spark/spark-3.3.4/spark-3.3.4-bin-hadoop2.tgz
tar -xf spark-3.3.4-bin-hadoop2.tgz
mv spark-3.3.4-bin-hadoop2 spark
rm spark-3.3.4-bin-hadoop2.tgz
wget https://downloads.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz
tar -xf spark-3.5.1-bin-hadoop3.tgz
mv spark-3.5.1-bin-hadoop3 spark
rm spark-3.5.1-bin-hadoop3.tgz
rm -rf flink
wget https://archive.apache.org/dist/flink/flink-1.18.1/flink-1.18.1-bin-scala_2.12.tgz
tar -xf flink-1.18.1-bin-scala_2.12.tgz
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ object CloudflowNativeSparkPlugin extends AutoPlugin {
IO.write(sparkEntrypointSh, sparkEntrypointShContent)

val scalaVersion = (ThisProject / scalaBinaryVersion).value
val sparkVersion = "3.3.4"
val sparkVersion = "3.5.1"
val sparkHome = "/opt/spark"

val sparkTgz = s"spark-${sparkVersion}-bin-hadoop2.tgz"
val sparkTgz = s"spark-${sparkVersion}-bin-hadoop3.tgz"
val sparkTgzUrl = s"https://downloads.apache.org/spark/spark-${sparkVersion}/${sparkTgz}"

val tiniVersion = "v0.18.0"
Expand All @@ -118,12 +118,12 @@ object CloudflowNativeSparkPlugin extends AutoPlugin {
Seq("wget", sparkTgzUrl),
Seq("tar", "-xvzf", sparkTgz),
Seq("mkdir", "-p", sparkHome),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/jars", s"${sparkHome}/jars"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/bin", s"${sparkHome}/bin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/sbin", s"${sparkHome}/sbin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/examples", s"${sparkHome}/examples"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/data", s"${sparkHome}/data"),
Seq("cp", s"spark-${sparkVersion}-bin-hadoop2/kubernetes/dockerfiles/spark/entrypoint.sh", "/opt/"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/jars", s"${sparkHome}/jars"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/bin", s"${sparkHome}/bin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/sbin", s"${sparkHome}/sbin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/examples", s"${sparkHome}/examples"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/data", s"${sparkHome}/data"),
Seq("cp", s"spark-${sparkVersion}-bin-hadoop3/kubernetes/dockerfiles/spark/entrypoint.sh", "/opt/"),
Seq("mkdir", "-p", s"${sparkHome}/conf"),
Seq("cp", "/tmp/log4j.properties", s"${sparkHome}/conf/log4j.properties"),
Seq("rm", sparkTgz),
Expand All @@ -132,7 +132,7 @@ object CloudflowNativeSparkPlugin extends AutoPlugin {
// logback is provided by the streamlet
//Seq("rm", s"${sparkHome}/jars/slf4j-log4j12-1.7.30.jar"),
//Seq("rm", s"${sparkHome}/jars/log4j-1.2.17.jar"),
Seq("rm", "-rf", s"spark-${sparkVersion}-bin-hadoop2"),
Seq("rm", "-rf", s"spark-${sparkVersion}-bin-hadoop3"),
Seq("chmod", "a+x", "/opt/spark-entrypoint.sh"),
Seq("ln", "-s", "/lib", "/lib64"),
Seq("apk", "add", "bash", "curl"),
Expand Down
2 changes: 1 addition & 1 deletion docs/docs-source/docs/modules/get-started/pages/index.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ helm upgrade -i cloudflow cloudflow-helm-charts/cloudflow \

**In any Cloudflow application using Spark or Flink**, the Kubernetes cluster will need to have a storage class of the `ReadWriteMany` type installed.

NOTE: The NFS Server Provisioner is an excellent and easy to setup storage in the development environment, for production use the suggested and supported Cloud integrations for https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/filesystems/overview/#pluggable-file-systems[Flink] and for https://spark.apache.org/docs/3.3.4/cloud-integration.html#important-cloud-object-stores-are-not-real-filesystems[Spark]
NOTE: The NFS Server Provisioner is an excellent and easy to setup storage in the development environment, for production use the suggested and supported Cloud integrations for https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/filesystems/overview/#pluggable-file-systems[Flink] and for https://spark.apache.org/docs/3.5.1/cloud-integration.html#important-cloud-object-stores-are-not-real-filesystems[Spark]

For testing purposes, we suggest using the NFS Server Provisioner, which can be found here: https://github.com/helm/charts/tree/master/stable/nfs-server-provisioner[NFS Server Provisioner Helm chart]

Expand Down
6 changes: 3 additions & 3 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ object Dependencies {

val flinkVersion = "1.18.1"
val flinkKafkaVersion = "3.1.0-1.18"
val sparkVersion = "3.3.4"
val akka = "2.6.19"
val sparkVersion = "3.5.1"
val akka = "2.8.5"
val jackson = "2.12.6"
val fabric8 = "5.0.0"
val scalaTest = "3.2.11"
Expand All @@ -24,7 +24,7 @@ object Dependencies {
val cloudflowAvro = "com.lightbend.cloudflow" %% "cloudflow-avro" % Versions.cloudflowVersion
val akkaActor = "com.typesafe.akka" %% "akka-actor" % Versions.akka
val akkaStream = "com.typesafe.akka" %% "akka-stream" % Versions.akka
val akkaProtobuf = "com.typesafe.akka" %% "akka-protobuf" % Versions.akka
val akkaProtobuf = "com.typesafe.akka" %% "akka-protobuf-v3" % Versions.akka
val akkaDiscovery = "com.typesafe.akka" %% "akka-discovery" % Versions.akka

val flink = "org.apache.flink" %% "flink-scala" % Versions.flinkVersion
Expand Down

0 comments on commit 9ed89d0

Please sign in to comment.