Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/yaml perftest #34

Merged
merged 17 commits into from
Jul 12, 2024
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ Another feature is that some parses need to fail for others to succeed. For exam
identifier. However, `missing` is a keyword and is matched first. To ensure correct parsing, negative lookahead is used
for keywords.

To improve parsing performance, the parsing results for some sub-expressions are memoized.
This is implemented via an add-on library `fastparse-memoize`.

#### Limitations

So far, there are some issues with the Unicode characters:
Expand All @@ -238,8 +241,15 @@ So far, there are some issues with the Unicode characters:

# Release version history


## 0.2.1

- Implemented `fastparse-memoize` to speed up parsing (by 10x and more in some cases).
- Upgrade to fastparse 3.1.x

## 0.2.0

- First version published on Sonatype
- Fixed the regression described in https://github.com/dhall-lang/dhall-haskell/issues/2597
- Support for Yaml and JSON export
- Standalone JAR executable `dhall.jar` with command-line options similar to `dhall-haskell`
Expand Down
102 changes: 77 additions & 25 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import sbt.Keys.{developers, homepage, scmInfo}
import sbt.Keys.homepage
import sbt.url
import sbtassembly.AssemblyKeys.assembly
import xerial.sbt.Sonatype.{GitHubHosting, sonatypeCentralHost}
import xerial.sbt.Sonatype.GitHubHosting

import scala.collection.immutable.List

val thisReleaseVersion = "0.2.0"
val thisReleaseVersion = "0.2.1"

val scala2V = "2.13.13"
val scala212V = "2.12.19"
Expand All @@ -18,7 +16,7 @@ def munitFramework = new TestFramework("munit.Framework")
val munitTest = "org.scalameta" %% "munit" % "0.7.29" % Test
val assertVerboseTest = "com.eed3si9n.expecty" %% "expecty" % "0.16.0" % Test

val fastparse = "com.lihaoyi" %% "fastparse" % "3.0.2"
val fastparse = "com.lihaoyi" %% "fastparse" % "3.1.1"
val antlr4 = "org.antlr" % "antlr4-runtime" % "4.13.1"
val anltr4_formatter = "com.khubla.antlr4formatter" % "antlr4-formatter-standalone" % "1.2.1" % Provided

Expand All @@ -34,6 +32,7 @@ val cbor1 = "co.nstant.in" % "cbor" % "0.9"
val cbor2 = "com.upokecenter" % "cbor" % "4.5.3"
val reflections = "org.reflections" % "reflections" % "0.10.2"
val mainargs = "com.lihaoyi" %% "mainargs" % "0.7.0"
val sourcecode = "com.lihaoyi" %% "sourcecode" % "0.4.2"

// Not used now:
val flatlaf = "com.formdev" % "flatlaf" % "3.2.2"
Expand All @@ -53,9 +52,6 @@ lazy val publishingOptions = Seq(
description := "Implementation of the Dhall language in Scala, with Scala language bindings",
publishTo := sonatypePublishToBundle.value,
sonatypeProjectHosting := Some(GitHubHosting("winitzki", "scall", "[email protected]")),
// homepage := Some(url("https://github.com/winitzki/scall")),
// scmInfo := Some(ScmInfo(url("https://github.com/winitzki/scall"), "scm:[email protected]:winitzki/scall.git")),
// developers := List(Developer(id = "winitzki", name = "Sergei Winitzki", email = "[email protected]", url = url("https://sites.google.com/site/winitzki"))),
)

lazy val noPublishing =
Expand All @@ -71,11 +67,72 @@ lazy val jdkModuleOptions: Seq[String] = {
lazy val root = (project in file("."))
.settings(noPublishing)
.settings(scalaVersion := scalaV, crossScalaVersions := Seq(scalaV), name := "scall-root")
.aggregate(scall_core, scall_testutils, dhall_codec, abnf, scall_macros, scall_typeclasses, scall_cli)
.aggregate(scall_core, scall_testutils, dhall_codec, abnf, scall_macros, scall_typeclasses, scall_cli, nano_dhall, fastparse_memoize)

lazy val nano_dhall = (project in file("nano-dhall")) // This is a POC project.
.settings(noPublishing)
.settings(
name := "nano-dhall",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Test / fork := true,
coverageEnabled := false,
scalafmtFailOnErrors := false, // Cannot disable the unicode surrogate pair error in Parser.scala?
testFrameworks += munitFramework,
Test / javaOptions ++= jdkModuleOptions,
Compile / scalacOptions ++= {
CrossVersion.partialVersion(scalaVersion.value) match {
case Some((3, _)) => Seq("-Ydebug")
case Some((2, 12 | 13)) => Seq("-Ypatmat-exhaust-depth", "10") // Cannot make it smaller than 10. Want to speed up compilation.
}
},
ThisBuild / scalacOptions ++= {
CrossVersion.partialVersion(scalaVersion.value) match {
case Some((3, _)) => Seq("-Ykind-projector") // Seq("-Ykind-projector:underscores")
case Some((2, 12 | 13)) => Seq() // Seq("-Xsource:3", "-P:kind-projector:underscore-placeholders")
}
},
// We need to run tests in forked JVM starting with the current directory set to the base resource directory.
// That base directory should contain `./dhall-lang` and all files below that.
Test / baseDirectory := (Test / resourceDirectory).value,
// addCompilerPlugin is a shortcut for libraryDependencies += compilerPlugin(dependency)
// See https://stackoverflow.com/questions/67579041
libraryDependencies ++=
(CrossVersion.partialVersion(scalaVersion.value) match {
case Some((2, _)) => Seq(scala_reflect(scalaVersion.value), kindProjectorPlugin)
case Some((3, _)) => Seq.empty // No need for scala-reflect with Scala 3.
}),
libraryDependencies ++= Seq(
fastparse,
antlr4,
anltr4_formatter,
munitTest,
assertVerboseTest,
enumeratum,
cbor2,
// scalahashing,
// cbor3,
httpRequest,
os_lib % Test,
),
).dependsOn(scall_testutils % "test->compile", scall_typeclasses, fastparse_memoize)

lazy val fastparse_memoize = (project in file("fastparse-memoize"))
.settings(publishingOptions)
.settings(
name := "fastparse-memoize",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
testFrameworks += munitFramework,
Test / javaOptions ++= jdkModuleOptions,
libraryDependencies ++= Seq(fastparse, sourcecode, munitTest, assertVerboseTest),
).dependsOn(scall_testutils % "test->compile")

lazy val scall_core = (project in file("scall-core"))
.settings(publishingOptions)
.settings(
name := "dhall-scala-core",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand Down Expand Up @@ -119,11 +176,12 @@ lazy val scall_core = (project in file("scall-core"))
httpRequest,
os_lib % Test,
),
).dependsOn(scall_testutils % "test->compile", scall_typeclasses)
).dependsOn(scall_testutils % "test->compile", scall_typeclasses, fastparse_memoize)

lazy val scall_testutils = (project in file("scall-testutils"))
.settings(publishingOptions)
.settings(
name := "dhall-scala-testutils",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand All @@ -136,6 +194,7 @@ lazy val scall_testutils = (project in file("scall-testutils"))
lazy val dhall_codec = (project in file("dhall-codec"))
.settings(publishingOptions)
.settings(
name := "dhall-scala-bindings",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand All @@ -155,6 +214,7 @@ lazy val dhall_codec = (project in file("dhall-codec"))
lazy val scall_cli = (project in file("scall-cli"))
.settings(publishingOptions)
.settings(
name := "dhall-scala-cli",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand All @@ -178,7 +238,7 @@ lazy val scall_cli = (project in file("scall-cli"))
lazy val abnf = (project in file("abnf"))
.settings(noPublishing)
.settings(
name := "scall-abnf",
name := "dhall-scala-abnf",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand All @@ -189,7 +249,7 @@ lazy val abnf = (project in file("abnf"))
lazy val scall_macros = (project in file("scall-macros"))
.settings(publishingOptions)
.settings(
name := "scall-macros",
name := "dhall-scala-macros",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand All @@ -205,7 +265,7 @@ lazy val scall_macros = (project in file("scall-macros"))
lazy val scall_typeclasses = (project in file("scall-typeclasses"))
.settings(publishingOptions)
.settings(
name := "scall-typeclasses",
name := "dhall-scala-typeclasses",
scalaVersion := scalaV,
crossScalaVersions := supportedScalaVersions,
Test / parallelExecution := true,
Expand All @@ -220,18 +280,10 @@ lazy val scall_typeclasses = (project in file("scall-typeclasses"))

/////////////////////////////////////////////////////////////////////////////////////////////////////
// Publishing to Sonatype Maven repository
publishMavenStyle := true
publishTo := sonatypePublishToBundle.value
sonatypeProfileName := "io.chymyst"
publishMavenStyle := true
publishTo := sonatypePublishToBundle.value
sonatypeProfileName := "io.chymyst"
//ThisBuild / sonatypeCredentialHost := sonatypeCentralHost // Not relevant because io.chymyst was created before 2021.

/*{
val nexus = "https://oss.sonatype.org/"
if (isSnapshot.value)
Some("snapshots" at nexus + "content/repositories/snapshots")
else
Some("releases" at nexus + "service/local/staging/deploy/maven2")
}*/
//
Test / publishArtifact := false
//
Expand Down
122 changes: 122 additions & 0 deletions fastparse-memoize/src/main/scala/io/chymyst/fastparse/Memoize.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package io.chymyst.fastparse

import fastparse.{P, Parsed, ParserInput, ParserInputSource, ParsingRun}
import fastparse.internal.{Instrument, Msgs}

import scala.collection.mutable

/* See discussion in https://github.com/com-lihaoyi/fastparse/discussions/301 */

final case class PRunData( // Copy all the mutable data from ParsingRun.
terminalMsgs: Msgs,
aggregateMsgs: Msgs,
shortMsg: Msgs,
lastFailureMsg: Msgs,
failureStack: List[(String, Int)],
isSuccess: Boolean,
logDepth: Int,
index: Int,
cut: Boolean,
successValue: Any,
verboseFailures: Boolean,
noDropBuffer: Boolean,
misc: collection.mutable.Map[Any, Any],
) {
override def toString: String = {
s"ParsingRun(index=$index, isSuccess = $isSuccess, successValue = $successValue)"
}

}

object PRunData { // Copy all the mutable data from a parsing run into a PRunData value.
def ofParsingRun[T](pr: ParsingRun[T]): PRunData = PRunData(
pr.terminalMsgs,
pr.aggregateMsgs,
pr.shortMsg,
pr.lastFailureMsg,
pr.failureStack,
pr.isSuccess,
pr.logDepth,
pr.index,
pr.cut,
pr.successValue,
pr.verboseFailures,
pr.noDropBuffer,
mutable.Map.from(pr.misc),
)
}

object Memoize {
val enable = true

def assignToParsingRun[T](data: PRunData, pr: ParsingRun[T]): ParsingRun[T] = { // Assign the mutable data to a given ParsingRun value.
pr.terminalMsgs = data.terminalMsgs
pr.aggregateMsgs = data.aggregateMsgs
pr.shortMsg = data.shortMsg
pr.lastFailureMsg = data.lastFailureMsg
pr.failureStack = data.failureStack
pr.isSuccess = data.isSuccess
pr.logDepth = data.logDepth
pr.index = data.index
pr.cut = data.cut
pr.successValue = data.successValue
pr.verboseFailures = data.verboseFailures
pr.noDropBuffer = data.noDropBuffer
data.misc.foreach { case (k, v) => pr.misc.put(k, v) }
pr
}

@inline private def cacheGrammar[R](cache: mutable.Map[Int, PRunData], parser: => P[_])(implicit p: P[_]): P[R] = {
// The `parser` has not yet been run! And it is mutable. Do not run it twice!
val cachedData: PRunData = cache.getOrElseUpdate(p.index, PRunData.ofParsingRun(parser))
// After the `parser` has been run on `p`, the value of `p` changes and becomes equal to the result of running the parser.
// If the result was cached, we need to assign it to the current value of `p`. This will imitate the side effect of running the parser again.
assignToParsingRun(cachedData, p).asInstanceOf[P[R]]
}

private val cache = new mutable.HashMap[(sourcecode.File, sourcecode.Line), mutable.Map[Int, PRunData]]

private def getOrCreateCache(file: sourcecode.File, line: sourcecode.Line): mutable.Map[Int, PRunData] = {
cache.getOrElseUpdate((file, line), new mutable.HashMap[Int, PRunData])
}

implicit class MemoizeParser[A](parser: => P[A]) {
@inline def memoize(implicit file: sourcecode.File, line: sourcecode.Line, p: P[_]): P[A] = if (enable) {
val cache: mutable.Map[Int, PRunData] = getOrCreateCache(file, line)
cacheGrammar(cache, parser)
} else parser

Check warning on line 87 in fastparse-memoize/src/main/scala/io/chymyst/fastparse/Memoize.scala

View check run for this annotation

Codecov / codecov/patch

fastparse-memoize/src/main/scala/io/chymyst/fastparse/Memoize.scala#L87

Added line #L87 was not covered by tests
}

def clearAll(): Unit = cache.values.foreach(_.clear())

def statistics: String = cache.map { case ((file, line), c) => s"$file#$line: ${c.size} entries" }.mkString("\n")

def parse[T](
input: ParserInputSource,
parser: P[_] => P[T],
verboseFailures: Boolean = false,
startIndex: Int = 0,
instrument: Instrument = null,
): Parsed[T] = {
clearAll()
val result = fastparse.parse(input, parser, verboseFailures, startIndex, instrument)
clearAll()
result
}

def parseInputRaw[T](
input: ParserInput,
parser: P[_] => P[T],
verboseFailures: Boolean = false,
startIndex: Int = 0,
traceIndex: Int = -1,
instrument: Instrument = null,
enableLogging: Boolean = true,
): ParsingRun[T] = {
clearAll()
val result = fastparse.parseInputRaw(input, parser, verboseFailures, startIndex, traceIndex, instrument, enableLogging)
clearAll()

Check warning on line 118 in fastparse-memoize/src/main/scala/io/chymyst/fastparse/Memoize.scala

View check run for this annotation

Codecov / codecov/patch

fastparse-memoize/src/main/scala/io/chymyst/fastparse/Memoize.scala#L116-L118

Added lines #L116 - L118 were not covered by tests
result
}

}
Loading
Loading