diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index adebf39..6dcc8f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ jobs: strategy: fail-fast: false matrix: - spark: ["2.4.8","3.0.3","3.1.3","3.2.1"] + spark: ["3.1.3","3.2.1", "3.3.2", "3.4.3", "3.5.1"] runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 diff --git a/README.md b/README.md index 55ba2e4..e1b63a7 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,8 @@ Fetch the JAR file from Maven. ```scala // for Spark 3 libraryDependencies += "com.github.mrpowers" %% "spark-fast-tests" % "1.1.0" % "test" - -// for Spark 2 -libraryDependencies += "com.github.mrpowers" %% "spark-fast-tests" % "0.23.0" % "test" ``` +**Important: Future versions of spark-fast-test will no longer support Spark 2.x. We recommend upgrading to Spark 3.x to ensure compatibility with upcoming releases.** Here's a link to the releases for different Scala versions: diff --git a/build.sbt b/build.sbt index cd410d5..105cd6a 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,5 @@ enablePlugins(GitVersioning) - -scalafmtOnCompile in Compile := true +Compile / scalafmtOnCompile:= true organization := "com.github.mrpowers" name := "spark-fast-tests" @@ -11,26 +10,26 @@ val versionRegex = """^(.*)\.(.*)\.(.*)$""".r val sparkVersion = settingKey[String]("Spark version") -val scala2_13= "2.13.8" +val scala2_13= "2.13.13" val scala2_12= "2.12.15" -val scala2_11= "2.11.12" - -sparkVersion := System.getProperty("spark.testVersion", "3.2.1") -crossScalaVersions := {sparkVersion.value match { - case versionRegex("3", m, _) if m.toInt >= 2 => Seq(scala2_12, scala2_13) - case versionRegex("3", _ , _) => Seq(scala2_12) - case versionRegex("2", _ , _) => Seq(scala2_11) -} +val scala2_11= "2.11.17" + +sparkVersion := System.getProperty("spark.testVersion", "3.5.1") +crossScalaVersions := { + sparkVersion.value match { + case versionRegex("3", m, _) if m.toInt >= 2 => Seq(scala2_12, scala2_13) + case versionRegex("3", _ , _) => Seq(scala2_12) + } } scalaVersion := crossScalaVersions.value.head libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided" -libraryDependencies += "org.scalatest" %% "scalatest" % "3.1.0" % "test" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test" credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials") -fork in Test := true +Test / fork := true javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled", "-Duser.timezone=GMT") licenses := Seq("MIT" -> url("http://opensource.org/licenses/MIT")) @@ -46,4 +45,4 @@ publishMavenStyle := true publishTo := sonatypePublishToBundle.value -Global/useGpgPinentry := true +Global / useGpgPinentry := true \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index fc3a0ff..9921a29 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -6,6 +6,6 @@ resolvers += Resolver.typesafeRepo("releases") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") -addSbtPlugin("com.geirsson" % "sbt-ci-release" % "1.5.3") +addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12") -addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.1") +addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1") \ No newline at end of file diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala index c8c6af4..3443ede 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala @@ -2,9 +2,9 @@ package com.github.mrpowers.spark.fast.tests import java.sql.Date import java.time.LocalDate -import org.scalatest.FreeSpec +import org.scalatest.freespec.AnyFreeSpec -class ArrayUtilTest extends FreeSpec { +class ArrayUtilTest extends AnyFreeSpec { "blah" in { val arr: Array[(Any, Any)] = Array(("hi", "there"), ("fun", "train")) diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala index d615c75..6920d3a 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala @@ -6,9 +6,9 @@ import org.apache.spark.sql.types._ import java.sql.Date import java.sql.Timestamp -import org.scalatest.FreeSpec +import org.scalatest.freespec.AnyFreeSpec -class ColumnComparerTest extends FreeSpec with ColumnComparer with SparkSessionTestWrapper { +class ColumnComparerTest extends AnyFreeSpec with ColumnComparer with SparkSessionTestWrapper { "assertColumnEquality" - { @@ -176,15 +176,18 @@ class ColumnComparerTest extends FreeSpec with ColumnComparer with SparkSessionT ) val actualDF = sourceDF.withColumn( "colors", - split( - concat_ws( - ",", - when(col("words").contains("blue"), "blue"), - when(col("words").contains("red"), "red"), - when(col("words").contains("pink"), "pink"), - when(col("words").contains("cyan"), "cyan") + coalesce( + split( + concat_ws( + ",", + when(col("words").contains("blue"), "blue"), + when(col("words").contains("red"), "red"), + when(col("words").contains("pink"), "pink"), + when(col("words").contains("cyan"), "cyan") + ), + "," ), - "," + typedLit(Array()) ) ) assertColumnEquality(actualDF, "colors", "expected_colors") diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala index ee9310c..aeec74d 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala @@ -2,10 +2,9 @@ package com.github.mrpowers.spark.fast.tests import org.apache.spark.sql.types.{IntegerType, StringType} import SparkSessionExt._ +import org.scalatest.freespec.AnyFreeSpec -import org.scalatest.FreeSpec - -class DataFrameComparerTest extends FreeSpec with DataFrameComparer with SparkSessionTestWrapper { +class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with SparkSessionTestWrapper { "prints a descriptive error message if it bugs out" in { val sourceDF = spark.createDF( @@ -64,7 +63,7 @@ class DataFrameComparerTest extends FreeSpec with DataFrameComparer with SparkSe ) ) - val e = intercept[DatasetContentMismatch] { + intercept[DatasetContentMismatch] { assertSmallDataFrameEquality(sourceDF, expectedDF) } } @@ -178,10 +177,10 @@ class DataFrameComparerTest extends FreeSpec with DataFrameComparer with SparkSe ) ) - val e = intercept[DatasetSchemaMismatch] { + intercept[DatasetSchemaMismatch] { assertLargeDataFrameEquality(sourceDF, expectedDF) } - val e2 = intercept[DatasetSchemaMismatch] { + intercept[DatasetSchemaMismatch] { assertSmallDataFrameEquality(sourceDF, expectedDF) } } @@ -203,10 +202,10 @@ class DataFrameComparerTest extends FreeSpec with DataFrameComparer with SparkSe List(("number", IntegerType, true)) ) - val e = intercept[DatasetContentMismatch] { + intercept[DatasetContentMismatch] { assertLargeDataFrameEquality(sourceDF, expectedDF) } - val e2 = intercept[DatasetContentMismatch] { + intercept[DatasetContentMismatch] { assertSmallDataFrameEquality(sourceDF, expectedDF) } } @@ -249,7 +248,7 @@ class DataFrameComparerTest extends FreeSpec with DataFrameComparer with SparkSe ), List(("number", IntegerType, true)) ) - val e = intercept[DatasetContentMismatch] { + intercept[DatasetContentMismatch] { assertSmallDataFrameEquality(sourceDF, expectedDF, orderedComparison = false) } } diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala index 1a91bbb..07841cd 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala @@ -2,8 +2,7 @@ package com.github.mrpowers.spark.fast.tests import org.apache.spark.sql.types._ import SparkSessionExt._ - -import org.scalatest.FreeSpec +import org.scalatest.freespec.AnyFreeSpec object Person { @@ -14,10 +13,9 @@ object Person { case class Person(name: String, age: Int) case class PrecisePerson(name: String, age: Double) -class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessionTestWrapper { +class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSessionTestWrapper { "checkDatasetEquality" - { - import spark.implicits._ "provides a good README example" in { @@ -276,7 +274,6 @@ class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessio ) assertLargeDatasetEquality(sourceDF, expectedDF, orderedComparison = false) - assertSmallDatasetEquality(sourceDF, expectedDF, orderedComparison = false) } "throws an error for unordered Dataset comparisons that don't match" in { @@ -343,7 +340,6 @@ class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessio assertLargeDatasetEquality(sourceDF, expectedDF) } } - } "assertSmallDatasetEquality" - { @@ -384,7 +380,6 @@ class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessio ), List(("number", IntegerType, true)) ) - assertLargeDatasetEquality(sourceDF, expectedDF, orderedComparison = false) assertSmallDatasetEquality(sourceDF, expectedDF, orderedComparison = false) } @@ -401,7 +396,6 @@ class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessio Person("bob", 1) ) ) - assertLargeDatasetEquality(sourceDS, expectedDS, orderedComparison = false) assertSmallDatasetEquality(sourceDS, expectedDS, orderedComparison = false) } @@ -465,7 +459,6 @@ class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessio assertSmallDatasetEquality(sourceDF, expectedDF) } } - } "defaultSortDataset" - { @@ -629,7 +622,6 @@ class DatasetComparerTest extends FreeSpec with DatasetComparer with SparkSessio assertApproximateDataFrameEquality(ds1, ds2, precision = 0.0000001, orderedComparison = false) } - } // "works with FloatType columns" - { diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala index 1c73d8f..5a6cd33 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala @@ -1,8 +1,8 @@ package com.github.mrpowers.spark.fast.tests -import org.scalatest.FreeSpec +import org.scalatest.freespec.AnyFreeSpec -class RDDComparerTest extends FreeSpec with RDDComparer with SparkSessionTestWrapper { +class RDDComparerTest extends AnyFreeSpec with RDDComparer with SparkSessionTestWrapper { "contentMismatchMessage" - { diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala index de20262..ba94e88 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala @@ -1,10 +1,10 @@ package com.github.mrpowers.spark.fast.tests -import org.scalatest.FreeSpec +import org.scalatest.freespec.AnyFreeSpec import org.apache.spark.sql.Row -class RowComparerTest extends FreeSpec { +class RowComparerTest extends AnyFreeSpec { "areRowsEqual" - { @@ -12,7 +12,7 @@ class RowComparerTest extends FreeSpec { val r1 = Row("a", "b") val r2 = Row("a", "b") assert( - RowComparer.areRowsEqual(r1, r2, 0.0) == true + RowComparer.areRowsEqual(r1, r2, 0.0) ) } @@ -20,7 +20,7 @@ class RowComparerTest extends FreeSpec { val r1 = Row("a", 3) val r2 = Row("a", 4) assert( - RowComparer.areRowsEqual(r1, r2, 0.0) == false + !RowComparer.areRowsEqual(r1, r2, 0.0) ) } diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala index d1d898b..a71c0c8 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala @@ -1,9 +1,9 @@ package com.github.mrpowers.spark.fast.tests import org.apache.spark.sql.types._ -import org.scalatest.FreeSpec +import org.scalatest.freespec.AnyFreeSpec -class SchemaComparerTest extends FreeSpec { +class SchemaComparerTest extends AnyFreeSpec { "equals" - { @@ -125,7 +125,5 @@ class SchemaComparerTest extends FreeSpec { ) assert(SchemaComparer.equals(s1, s2, ignoreColumnNames = true)) } - } - } diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala index 67f4944..6ad7348 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala @@ -5,12 +5,14 @@ import org.apache.spark.sql.SparkSession trait SparkSessionTestWrapper { lazy val spark: SparkSession = { - SparkSession + val session = SparkSession .builder() .master("local") .appName("spark session") .config("spark.sql.shuffle.partitions", "1") .getOrCreate() + session.sparkContext.setLogLevel("ERROR") + session } }