From 686a855a3632f7b8d31579578f8bd2eecd186396 Mon Sep 17 00:00:00 2001 From: Tuan Pham Date: Sun, 8 Sep 2024 10:44:45 +1000 Subject: [PATCH] Innit Benchmarks project --- .github/workflows/ci.yml | 2 + .../spark/fast/tests/MyBenchmark.scala | 41 +++++++++++++++++ build.sbt | 44 +++++++++++++------ .../mrpowers/spark/fast/tests/ArrayUtil.scala | 0 .../spark/fast/tests/ColumnComparer.scala | 0 .../spark/fast/tests/DataFrameComparer.scala | 0 .../fast/tests/DataFramePrettyPrint.scala | 0 .../spark/fast/tests/DataframeUtil.scala | 0 .../spark/fast/tests/DatasetComparer.scala | 0 .../spark/fast/tests/RDDComparer.scala | 0 .../spark/fast/tests/RddHelpers.scala | 0 .../spark/fast/tests/RowComparer.scala | 0 .../spark/fast/tests/SchemaComparer.scala | 0 .../spark/fast/tests/SeqLikesExtensions.scala | 0 .../spark/fast/tests/ufansi/Fansi.scala | 0 .../fast/tests/ufansi/FansiExtensions.scala | 0 .../src}/test/resources/log4j.properties | 0 .../spark/fast/tests/ArrayUtilTest.scala | 0 .../spark/fast/tests/ColumnComparerTest.scala | 0 .../fast/tests/DataFrameComparerTest.scala | 0 .../fast/tests/DatasetComparerTest.scala | 0 .../spark/fast/tests/ExamplesTest.scala | 0 .../spark/fast/tests/RDDComparerTest.scala | 0 .../spark/fast/tests/RowComparerTest.scala | 0 .../spark/fast/tests/SchemaComparerTest.scala | 0 .../fast/tests/SeqLikesExtensionsTest.scala | 0 .../spark/fast/tests/SparkSessionExt.scala | 0 .../fast/tests/SparkSessionTestWrapper.scala | 0 project/plugins.sbt | 2 + 29 files changed, 76 insertions(+), 13 deletions(-) create mode 100644 benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/RDDComparer.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/RowComparer.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/SchemaComparer.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensions.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/Fansi.scala (100%) rename {src => core/src}/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/FansiExtensions.scala (100%) rename {src => core/src}/test/resources/log4j.properties (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensionsTest.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionExt.scala (100%) rename {src => core/src}/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ddcb9d..744b8c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,3 +17,5 @@ jobs: - uses: olafurpg/setup-scala@v10 - name: Test run: sbt -Dspark.testVersion=${{ matrix.spark }} +test + - name: Benchmark + run: sbt -Dspark.testVersion=${{ matrix.spark }} +benchmarks/Jmh/run diff --git a/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala b/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala new file mode 100644 index 0000000..4da7450 --- /dev/null +++ b/benchmarks/src/main/scala/com/github/mrpowers/spark/fast/tests/MyBenchmark.scala @@ -0,0 +1,41 @@ +package com.github.mrpowers.spark.fast.tests + +import org.apache.spark.sql.SparkSession +import org.openjdk.jmh.annotations._ +import org.openjdk.jmh.infra.Blackhole + +import java.util.concurrent.TimeUnit +import scala.util.Try + +private class MyBenchmark extends DataFrameComparer { + @Benchmark + @BenchmarkMode(Array(Mode.AverageTime, Mode.SingleShotTime)) + @Fork(value = 2) + @Warmup(iterations = 10) + @Measurement(iterations = 10) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + def assertApproximateDataFrameEqualityWithPrecision(blackHole: Blackhole): Boolean = { + val spark = SparkSession + .builder() + .master("local") + .appName("spark session") + .config("spark.sql.shuffle.partitions", "1") + .getOrCreate() + spark.sparkContext.setLogLevel("ERROR") + + import spark.implicits._ + val ds1 = Seq( + ("1", "10/01/2019", 26.762499999999996), + ("1", "11/01/2019", 26.762499999999996) + ).toDF("col_B", "col_C", "col_A") + + val ds2 = Seq( + ("1", "10/01/2019", 26.762499999999946), + ("1", "11/01/2019", 26.76249999999991) + ).toDF("col_B", "col_C", "col_A") + val result = Try(assertApproximateDataFrameEquality(ds1, ds2, precision = 0.0000001, orderedComparison = false)) + + blackHole.consume(result) + result.isSuccess + } +} diff --git a/build.sbt b/build.sbt index 866dfdb..5601492 100644 --- a/build.sbt +++ b/build.sbt @@ -8,14 +8,12 @@ version := "1.10.1" val versionRegex = """^(.*)\.(.*)\.(.*)$""".r -val sparkVersion = settingKey[String]("Spark version") - val scala2_13 = "2.13.14" val scala2_12 = "2.12.20" -sparkVersion := System.getProperty("spark.testVersion", "3.5.1") +val sparkVersion = System.getProperty("spark.testVersion", "3.5.1") crossScalaVersions := { - sparkVersion.value match { + sparkVersion match { case versionRegex("3", m, _) if m.toInt >= 2 => Seq(scala2_12, scala2_13) case versionRegex("3", _, _) => Seq(scala2_12) } @@ -23,17 +21,37 @@ crossScalaVersions := { scalaVersion := crossScalaVersions.value.head -libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided" -libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test" +Test / fork := true -credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials") +lazy val commonSettings = Seq( + javaOptions ++= { + Seq("-Xms512M", "-Xmx2048M", "-Duser.timezone=GMT") ++ (if (System.getProperty("java.version").startsWith("1.8.0")) + Seq("-XX:+CMSClassUnloadingEnabled") + else Seq.empty) + }, + libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-sql" % sparkVersion % "compile", + "org.scalatest" %% "scalatest" % "3.2.18" % "test" + ), +) -Test / fork := true -javaOptions ++= { - Seq("-Xms512M", "-Xmx2048M", "-Duser.timezone=GMT") ++ (if (System.getProperty("java.version").startsWith("1.8.0")) - Seq("-XX:+CMSClassUnloadingEnabled") - else Seq.empty) -} +lazy val core = (project in file("core")) + .settings( + commonSettings, + name := "core", + ) + +lazy val benchmarks = (project in file("benchmarks")) + .dependsOn(core) + .settings(commonSettings) + .settings( + libraryDependencies ++= Seq( + "org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37" //required for jmh IDEA plugin. Make sure this version matches sbt-jmh version! + ), + name := "benchmarks", + ).enablePlugins(JmhPlugin) + +credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials") licenses := Seq("MIT" -> url("http://opensource.org/licenses/MIT")) homepage := Some(url("https://github.com/mrpowers-io/spark-fast-tests")) diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/RDDComparer.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/RDDComparer.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/RDDComparer.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/RDDComparer.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/RowComparer.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/RowComparer.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/RowComparer.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/RowComparer.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/SchemaComparer.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/SchemaComparer.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/SchemaComparer.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/SchemaComparer.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensions.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensions.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensions.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensions.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/Fansi.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/Fansi.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/Fansi.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/Fansi.scala diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/FansiExtensions.scala b/core/src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/FansiExtensions.scala similarity index 100% rename from src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/FansiExtensions.scala rename to core/src/main/scala/com/github/mrpowers/spark/fast/tests/ufansi/FansiExtensions.scala diff --git a/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties similarity index 100% rename from src/test/resources/log4j.properties rename to core/src/test/resources/log4j.properties diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/ArrayUtilTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/ColumnComparerTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/ExamplesTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/RDDComparerTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/RowComparerTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/SchemaComparerTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensionsTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensionsTest.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensionsTest.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/SeqLikesExtensionsTest.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionExt.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionExt.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionExt.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionExt.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala similarity index 100% rename from src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala rename to core/src/test/scala/com/github/mrpowers/spark/fast/tests/SparkSessionTestWrapper.scala diff --git a/project/plugins.sbt b/project/plugins.sbt index 5bf82d0..a8847c0 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -9,3 +9,5 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.6.1") addSbtPlugin("org.typelevel" % "laika-sbt" % "1.2.0") + +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.3") \ No newline at end of file