From 3098d2fa66ccd498733559ed231d8a7fb4a07b39 Mon Sep 17 00:00:00 2001 From: fieldera-da Date: Mon, 26 Feb 2024 11:22:33 +0000 Subject: [PATCH 1/2] fix build-and-test by upgrading spark to 3.5.1 and akka to 2.8.5 --- .../contrib/CloudflowNativeSparkPlugin.scala | 18 +++++++++--------- .../docs/modules/get-started/pages/index.adoc | 2 +- project/Dependencies.scala | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cloudflow-sbt-spark/src/main/scala/cloudflow/contrib/CloudflowNativeSparkPlugin.scala b/cloudflow-sbt-spark/src/main/scala/cloudflow/contrib/CloudflowNativeSparkPlugin.scala index 37d80d9..bd14c5b 100644 --- a/cloudflow-sbt-spark/src/main/scala/cloudflow/contrib/CloudflowNativeSparkPlugin.scala +++ b/cloudflow-sbt-spark/src/main/scala/cloudflow/contrib/CloudflowNativeSparkPlugin.scala @@ -96,10 +96,10 @@ object CloudflowNativeSparkPlugin extends AutoPlugin { IO.write(sparkEntrypointSh, sparkEntrypointShContent) val scalaVersion = (ThisProject / scalaBinaryVersion).value - val sparkVersion = "3.3.4" + val sparkVersion = "3.5.1" val sparkHome = "/opt/spark" - val sparkTgz = s"spark-${sparkVersion}-bin-hadoop2.tgz" + val sparkTgz = s"spark-${sparkVersion}-bin-hadoop3.tgz" val sparkTgzUrl = s"https://downloads.apache.org/spark/spark-${sparkVersion}/${sparkTgz}" val tiniVersion = "v0.18.0" @@ -118,12 +118,12 @@ object CloudflowNativeSparkPlugin extends AutoPlugin { Seq("wget", sparkTgzUrl), Seq("tar", "-xvzf", sparkTgz), Seq("mkdir", "-p", sparkHome), - Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/jars", s"${sparkHome}/jars"), - Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/bin", s"${sparkHome}/bin"), - Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/sbin", s"${sparkHome}/sbin"), - Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/examples", s"${sparkHome}/examples"), - Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/data", s"${sparkHome}/data"), - Seq("cp", s"spark-${sparkVersion}-bin-hadoop2/kubernetes/dockerfiles/spark/entrypoint.sh", "/opt/"), + Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/jars", s"${sparkHome}/jars"), + Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/bin", s"${sparkHome}/bin"), + Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/sbin", s"${sparkHome}/sbin"), + Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/examples", s"${sparkHome}/examples"), + Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop3/data", s"${sparkHome}/data"), + Seq("cp", s"spark-${sparkVersion}-bin-hadoop3/kubernetes/dockerfiles/spark/entrypoint.sh", "/opt/"), Seq("mkdir", "-p", s"${sparkHome}/conf"), Seq("cp", "/tmp/log4j.properties", s"${sparkHome}/conf/log4j.properties"), Seq("rm", sparkTgz), @@ -132,7 +132,7 @@ object CloudflowNativeSparkPlugin extends AutoPlugin { // logback is provided by the streamlet //Seq("rm", s"${sparkHome}/jars/slf4j-log4j12-1.7.30.jar"), //Seq("rm", s"${sparkHome}/jars/log4j-1.2.17.jar"), - Seq("rm", "-rf", s"spark-${sparkVersion}-bin-hadoop2"), + Seq("rm", "-rf", s"spark-${sparkVersion}-bin-hadoop3"), Seq("chmod", "a+x", "/opt/spark-entrypoint.sh"), Seq("ln", "-s", "/lib", "/lib64"), Seq("apk", "add", "bash", "curl"), diff --git a/docs/docs-source/docs/modules/get-started/pages/index.adoc b/docs/docs-source/docs/modules/get-started/pages/index.adoc index 5ed641c..b78b2c8 100644 --- a/docs/docs-source/docs/modules/get-started/pages/index.adoc +++ b/docs/docs-source/docs/modules/get-started/pages/index.adoc @@ -26,7 +26,7 @@ helm upgrade -i cloudflow cloudflow-helm-charts/cloudflow \ **In any Cloudflow application using Spark or Flink**, the Kubernetes cluster will need to have a storage class of the `ReadWriteMany` type installed. -NOTE: The NFS Server Provisioner is an excellent and easy to setup storage in the development environment, for production use the suggested and supported Cloud integrations for https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/filesystems/overview/#pluggable-file-systems[Flink] and for https://spark.apache.org/docs/3.3.4/cloud-integration.html#important-cloud-object-stores-are-not-real-filesystems[Spark] +NOTE: The NFS Server Provisioner is an excellent and easy to setup storage in the development environment, for production use the suggested and supported Cloud integrations for https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/filesystems/overview/#pluggable-file-systems[Flink] and for https://spark.apache.org/docs/3.5.1/cloud-integration.html#important-cloud-object-stores-are-not-real-filesystems[Spark] For testing purposes, we suggest using the NFS Server Provisioner, which can be found here: https://github.com/helm/charts/tree/master/stable/nfs-server-provisioner[NFS Server Provisioner Helm chart] diff --git a/project/Dependencies.scala b/project/Dependencies.scala index b55089d..4322795 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -11,8 +11,8 @@ object Dependencies { val flinkVersion = "1.18.1" val flinkKafkaVersion = "3.1.0-1.18" - val sparkVersion = "3.3.4" - val akka = "2.6.19" + val sparkVersion = "3.5.1" + val akka = "2.8.5" val jackson = "2.12.6" val fabric8 = "5.0.0" val scalaTest = "3.2.11" @@ -24,7 +24,7 @@ object Dependencies { val cloudflowAvro = "com.lightbend.cloudflow" %% "cloudflow-avro" % Versions.cloudflowVersion val akkaActor = "com.typesafe.akka" %% "akka-actor" % Versions.akka val akkaStream = "com.typesafe.akka" %% "akka-stream" % Versions.akka - val akkaProtobuf = "com.typesafe.akka" %% "akka-protobuf" % Versions.akka + val akkaProtobuf = "com.typesafe.akka" %% "akka-protobuf-v3" % Versions.akka val akkaDiscovery = "com.typesafe.akka" %% "akka-discovery" % Versions.akka val flink = "org.apache.flink" %% "flink-scala" % Versions.flinkVersion From cad253e31a7e3b1228da6a3560076f5a1783786a Mon Sep 17 00:00:00 2001 From: fieldera-da Date: Mon, 26 Feb 2024 11:24:13 +0000 Subject: [PATCH 2/2] same spark upgrade in makefile --- cloudflow-it/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudflow-it/Makefile b/cloudflow-it/Makefile index fa8d3d5..d89dd44 100644 --- a/cloudflow-it/Makefile +++ b/cloudflow-it/Makefile @@ -29,10 +29,10 @@ prepare-swiss-knife: prepare-clis: @echo '****** Prepare the runtimes Clis' rm -rf spark - wget https://downloads.apache.org/spark/spark-3.3.4/spark-3.3.4-bin-hadoop2.tgz - tar -xf spark-3.3.4-bin-hadoop2.tgz - mv spark-3.3.4-bin-hadoop2 spark - rm spark-3.3.4-bin-hadoop2.tgz + wget https://downloads.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz + tar -xf spark-3.5.1-bin-hadoop3.tgz + mv spark-3.5.1-bin-hadoop3 spark + rm spark-3.5.1-bin-hadoop3.tgz rm -rf flink wget https://archive.apache.org/dist/flink/flink-1.18.1/flink-1.18.1-bin-scala_2.12.tgz tar -xf flink-1.18.1-bin-scala_2.12.tgz