Skip to content

Commit

Permalink
Merge pull request #147 from zeotuan/benchmarks
Browse files Browse the repository at this point in the history
Init benchmarks module with multi-projects structure
  • Loading branch information
SemyonSinchenko authored Sep 8, 2024
2 parents dc600c6 + 686a855 commit a59f877
Show file tree
Hide file tree
Showing 29 changed files with 76 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ jobs:
- uses: olafurpg/setup-scala@v10
- name: Test
run: sbt -Dspark.testVersion=${{ matrix.spark }} +test
- name: Benchmark
run: sbt -Dspark.testVersion=${{ matrix.spark }} +benchmarks/Jmh/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.github.mrpowers.spark.fast.tests

import org.apache.spark.sql.SparkSession
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole

import java.util.concurrent.TimeUnit
import scala.util.Try

private class MyBenchmark extends DataFrameComparer {
@Benchmark
@BenchmarkMode(Array(Mode.AverageTime, Mode.SingleShotTime))
@Fork(value = 2)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def assertApproximateDataFrameEqualityWithPrecision(blackHole: Blackhole): Boolean = {
val spark = SparkSession
.builder()
.master("local")
.appName("spark session")
.config("spark.sql.shuffle.partitions", "1")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

import spark.implicits._
val ds1 = Seq(
("1", "10/01/2019", 26.762499999999996),
("1", "11/01/2019", 26.762499999999996)
).toDF("col_B", "col_C", "col_A")

val ds2 = Seq(
("1", "10/01/2019", 26.762499999999946),
("1", "11/01/2019", 26.76249999999991)
).toDF("col_B", "col_C", "col_A")
val result = Try(assertApproximateDataFrameEquality(ds1, ds2, precision = 0.0000001, orderedComparison = false))

blackHole.consume(result)
result.isSuccess
}
}
44 changes: 31 additions & 13 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,50 @@ version := "1.10.1"

val versionRegex = """^(.*)\.(.*)\.(.*)$""".r

val sparkVersion = settingKey[String]("Spark version")

val scala2_13 = "2.13.14"
val scala2_12 = "2.12.20"

sparkVersion := System.getProperty("spark.testVersion", "3.5.1")
val sparkVersion = System.getProperty("spark.testVersion", "3.5.1")
crossScalaVersions := {
sparkVersion.value match {
sparkVersion match {
case versionRegex("3", m, _) if m.toInt >= 2 => Seq(scala2_12, scala2_13)
case versionRegex("3", _, _) => Seq(scala2_12)
}
}

scalaVersion := crossScalaVersions.value.head

libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test"
Test / fork := true

credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials")
lazy val commonSettings = Seq(
javaOptions ++= {
Seq("-Xms512M", "-Xmx2048M", "-Duser.timezone=GMT") ++ (if (System.getProperty("java.version").startsWith("1.8.0"))
Seq("-XX:+CMSClassUnloadingEnabled")
else Seq.empty)
},
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "compile",
"org.scalatest" %% "scalatest" % "3.2.18" % "test"
),
)

Test / fork := true
javaOptions ++= {
Seq("-Xms512M", "-Xmx2048M", "-Duser.timezone=GMT") ++ (if (System.getProperty("java.version").startsWith("1.8.0"))
Seq("-XX:+CMSClassUnloadingEnabled")
else Seq.empty)
}
lazy val core = (project in file("core"))
.settings(
commonSettings,
name := "core",
)

lazy val benchmarks = (project in file("benchmarks"))
.dependsOn(core)
.settings(commonSettings)
.settings(
libraryDependencies ++= Seq(
"org.openjdk.jmh" % "jmh-generator-annprocess" % "1.37" //required for jmh IDEA plugin. Make sure this version matches sbt-jmh version!
),
name := "benchmarks",
).enablePlugins(JmhPlugin)

credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials")

licenses := Seq("MIT" -> url("http://opensource.org/licenses/MIT"))
homepage := Some(url("https://github.com/mrpowers-io/spark-fast-tests"))
Expand Down
File renamed without changes.
2 changes: 2 additions & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.6.1")

addSbtPlugin("org.typelevel" % "laika-sbt" % "1.2.0")

addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.3")

0 comments on commit a59f877

Please sign in to comment.