diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..f090925 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,17 @@ +name: Docs +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: olafurpg/setup-scala@v10 + - name: Build docs + run: sbt laikaSite + - name: Deploy to GH Pages + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./target/docs/site diff --git a/README.md b/README.md index e1b63a7..543ba23 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# spark-fast-tests +# Spark Fast Tests [![CI](https://github.com/MrPowers/spark-fast-tests/actions/workflows/ci.yml/badge.svg)](https://github.com/MrPowers/spark-fast-tests/actions/workflows/ci.yml) @@ -64,7 +64,7 @@ val expectedDS = Seq( ).toDS ``` -![assert_small_dataset_equality_error_message](https://github.com/MrPowers/spark-fast-tests/blob/master/images/assertSmallDatasetEquality_error_message.png) +![assert_small_dataset_equality_error_message](https://raw.githubusercontent.com/mrpowers-io/spark-fast-tests/main/images/assertSmallDatasetEquality_error_message.png) The colors in the error message make it easy to identify the rows that aren't equal. @@ -89,12 +89,14 @@ def myLowerClean(col: Column): Column = { Here's how long the tests take to execute: -|test method|runtime| -|-------|--------------------| -|`assertLargeDataFrameEquality`|709 milliseconds| -|`assertSmallDataFrameEquality`|166 milliseconds| -|`assertColumnEquality`|108 milliseconds| -|`evalString`|26 milliseconds| + +| test method | runtime | +|--------------------------------|------------------| +| `assertLargeDataFrameEquality` | 709 milliseconds | +| `assertSmallDataFrameEquality` | 166 milliseconds | +| `assertColumnEquality` | 108 milliseconds | +| `evalString` | 26 milliseconds | + `evalString` isn't as robust, but is the fastest. `assertColumnEquality` is robust and saves a lot of time. @@ -192,7 +194,7 @@ The following code will throw a `ColumnMismatch` error message: assertColumnEquality(df, "name", "expected_name") ``` -![assert_column_equality_error_message](https://github.com/MrPowers/spark-fast-tests/blob/master/images/assertColumnEquality_error_message.png) +![assert_column_equality_error_message](https://raw.githubusercontent.com/mrpowers-io/spark-fast-tests/main/images/assertColumnEquality_error_message.png) Mix in the `ColumnComparer` trait to your test class to access the `assertColumnEquality` method: diff --git a/build.sbt b/build.sbt index 105cd6a..f1b5a13 100644 --- a/build.sbt +++ b/build.sbt @@ -1,31 +1,31 @@ enablePlugins(GitVersioning) -Compile / scalafmtOnCompile:= true +Compile / scalafmtOnCompile := true organization := "com.github.mrpowers" -name := "spark-fast-tests" +name := "spark-fast-tests" version := "1.10.1" -val versionRegex = """^(.*)\.(.*)\.(.*)$""".r +val versionRegex = """^(.*)\.(.*)\.(.*)$""".r val sparkVersion = settingKey[String]("Spark version") -val scala2_13= "2.13.13" -val scala2_12= "2.12.15" -val scala2_11= "2.11.17" +val scala2_13 = "2.13.13" +val scala2_12 = "2.12.15" +val scala2_11 = "2.11.17" sparkVersion := System.getProperty("spark.testVersion", "3.5.1") crossScalaVersions := { sparkVersion.value match { case versionRegex("3", m, _) if m.toInt >= 2 => Seq(scala2_12, scala2_13) - case versionRegex("3", _ , _) => Seq(scala2_12) + case versionRegex("3", _, _) => Seq(scala2_12) } } scalaVersion := crossScalaVersions.value.head libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided" -libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.18" % "test" credentials += Credentials(Path.userHome / ".sbt" / "sonatype_credentials") @@ -45,4 +45,49 @@ publishMavenStyle := true publishTo := sonatypePublishToBundle.value -Global / useGpgPinentry := true \ No newline at end of file +Global / useGpgPinentry := true + +enablePlugins(LaikaPlugin) + +import laika.format.Markdown +import laika.config.SyntaxHighlighting +import laika.ast.Path.Root +import laika.ast.{Image, ExternalTarget} +import laika.helium.config._ +import laika.helium.Helium + +laikaTheme := Helium.defaults.site + .landingPage( + title = Some("Spark Fast Tests"), + subtitle = Some("Unit testing your Apache Spark application"), + latestReleases = Seq( + ReleaseInfo("Latest Stable Release", "1.0.0") + ), + license = Some("Apache 2-0"), + titleLinks = Seq( + VersionMenu.create(unversionedLabel = "Getting Started"), + LinkGroup.create( + IconLink.external("https://github.com/mrpowers-io/spark-fast-tests", HeliumIcon.github) + ) + ), + linkPanel = Some( + LinkPanel( + "Documentation", + TextLink.internal(Root / "about" / "README.md", "Spark Fast Tests") + ) + ), + projectLinks = Seq( + LinkGroup.create( + TextLink.internal(Root / "api" / "com" / "github" / "mrpowers" / "spark" / "fast" / "tests" / "index.html", "API (Scaladoc)") + ) + ), + teasers = Seq( + Teaser("Fast", "Handle small dataframes effectively and provide column assertions"), + Teaser("Flexible", "Works fine with scalatest, uTest, munit") + ) + ) + .build + +laikaIncludeAPI := true +laikaExtensions ++= Seq(Markdown.GitHubFlavor, SyntaxHighlighting) +Laika / sourceDirectories := Seq((ThisBuild / baseDirectory).value / "docs") diff --git a/docs/about/README.md b/docs/about/README.md new file mode 120000 index 0000000..fe84005 --- /dev/null +++ b/docs/about/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index 9921a29..fc8bdb9 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -8,4 +8,6 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12") -addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1") \ No newline at end of file +addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1") + +addSbtPlugin("org.typelevel" % "laika-sbt" % "1.2.0")