-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
219 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
128 changes: 128 additions & 0 deletions
128
fastparse-memoize/src/main/scala/io/chymyst/fastparse/Memoize.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
package io.chymyst.fastparse | ||
|
||
import fastparse.{P, Parsed, ParserInput, ParserInputSource, ParsingRun} | ||
import fastparse.internal.{Instrument, Msgs} | ||
|
||
import scala.collection.mutable | ||
|
||
final case class PRunData( // Copy all the mutable data from ParsingRun. | ||
terminalMsgs: Msgs, | ||
aggregateMsgs: Msgs, | ||
shortMsg: Msgs, | ||
lastFailureMsg: Msgs, | ||
failureStack: List[(String, Int)], | ||
isSuccess: Boolean, | ||
logDepth: Int, | ||
index: Int, | ||
cut: Boolean, | ||
successValue: Any, | ||
verboseFailures: Boolean, | ||
noDropBuffer: Boolean, | ||
misc: collection.mutable.Map[Any, Any], | ||
) { | ||
override def toString: String = { | ||
s"ParsingRun(index=$index, isSuccess = $isSuccess, successValue = $successValue)" | ||
} | ||
|
||
|
||
} | ||
|
||
object PRunData { // Copy all the mutable data from a parsing run into a PRunData value. | ||
def ofParsingRun[T](pr: ParsingRun[T]): PRunData = PRunData( | ||
pr.terminalMsgs, | ||
pr.aggregateMsgs, | ||
pr.shortMsg, | ||
pr.lastFailureMsg, | ||
pr.failureStack, | ||
pr.isSuccess, | ||
pr.logDepth, | ||
pr.index, | ||
pr.cut, | ||
pr.successValue, | ||
pr.verboseFailures, | ||
pr.noDropBuffer, | ||
mutable.Map.from(pr.misc), | ||
) | ||
} | ||
|
||
object Memoize { | ||
def assignToParsingRun[T](data: PRunData, pr: ParsingRun[T]): ParsingRun[T] = { // Assign the mutable data to a given ParsingRun value. | ||
pr.terminalMsgs = data.terminalMsgs | ||
pr.aggregateMsgs = data.aggregateMsgs | ||
pr.shortMsg = data.shortMsg | ||
pr.lastFailureMsg = data.lastFailureMsg | ||
pr.failureStack = data.failureStack | ||
pr.isSuccess = data.isSuccess | ||
pr.logDepth = data.logDepth | ||
pr.index = data.index | ||
pr.cut = data.cut | ||
pr.successValue = data.successValue | ||
pr.verboseFailures = data.verboseFailures | ||
pr.noDropBuffer = data.noDropBuffer | ||
data.misc.foreach { case (k, v) => pr.misc.put(k, v) } | ||
pr | ||
} | ||
private def cacheGrammar[R](cache: mutable.Map[Int, PRunData], parser: => P[_])(implicit p: P[_]): P[R] = { | ||
// The `parser` has not yet been run! And it is mutable. Do not run it twice! | ||
val cachedData: PRunData = cache.getOrElseUpdate(p.index, PRunData.ofParsingRun(parser)) | ||
// After the `parser` has been run on `p`, the value of `p` changes and becomes equal to the result of running the parser. | ||
// If the result was cached, we need to assign it to the current value of `p`. This will imitate the side effect of running the parser again. | ||
assignToParsingRun(cachedData, p).asInstanceOf[P[R]] | ||
} | ||
|
||
private val cache = new mutable.HashMap[(sourcecode.File, sourcecode.Line), mutable.Map[Int, PRunData]] | ||
|
||
private def getOrCreateCache(file : sourcecode.File, line: sourcecode.Line): mutable.Map[Int, PRunData] = { | ||
cache.getOrElseUpdate((file, line), new mutable.HashMap[Int, PRunData]) | ||
} | ||
|
||
implicit class MemoizeParser[A](parser: => P[A]) { | ||
def memoize(implicit file : sourcecode.File, line: sourcecode.Line, p: P[_]): P[A] = { | ||
val cache: mutable.Map[Int, PRunData] = getOrCreateCache(file, line) | ||
cacheGrammar(cache, parser) | ||
} | ||
} | ||
|
||
def clearAll(): Unit = cache.values.foreach(_.clear()) | ||
|
||
def statistics: String = cache.map {case ((file, line), c) => s"$file#$line: ${c.size} entries"}.mkString("\n") | ||
|
||
def parse[T](input: ParserInputSource, | ||
parser: P[_] => P[T], | ||
verboseFailures: Boolean = false, | ||
startIndex: Int = 0, | ||
instrument: Instrument = null): Parsed[T] = { | ||
clearAll() | ||
fastparse.parse(input, parser, verboseFailures, startIndex, instrument) | ||
} | ||
|
||
def parseInputRaw[T](input: ParserInput, | ||
parser: P[_] => P[T], | ||
verboseFailures: Boolean = false, | ||
startIndex: Int = 0, | ||
traceIndex: Int = -1, | ||
instrument: Instrument = null, | ||
enableLogging: Boolean = true): ParsingRun[T] = { | ||
clearAll() | ||
fastparse.parseInputRaw(input, parser, verboseFailures, startIndex, traceIndex, instrument, enableLogging) | ||
} | ||
|
||
} | ||
|
||
/* See discussion in https://github.com/com-lihaoyi/fastparse/discussions/301 | ||
//... other rules of the grammar as above. The changes are only for `x_times` and `x_other`: | ||
def x_times[$: P]: P[R] = P(x_other_cached ~ ("*" ~ x_other_cached).rep).map { case (i, is) => i * is.product } | ||
def x_other[$: P]: P[R] = P(number | ("(" ~ expr ~ ")")) | ||
def x_other_cached[$](implicit p: P[$]): P[R] = cachedParser(cache_other, x_other) | ||
// Need a separate cache for every memoized parser. | ||
val cache_other = mutable.Map[Int, PRunData]() | ||
// Need to do cache_other.clear() between different calls to parse an expression. | ||
val n = 500 | ||
cache_other.clear() | ||
assert(parse("(" * (n - 1) + "1" + ")" * (n - 1), program(_)).get.value == 1) | ||
cache_other.clear() | ||
assert(parse("1+" + "(1+" * (n - 1) + "1" + ")" * (n - 1), program(_)).get.value == n + 1) | ||
*/ |
60 changes: 60 additions & 0 deletions
60
fastparse-memoize/src/test/scala/io/chymyst/fastparse/unit/MemoizeTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package io.chymyst.fastparse.unit | ||
|
||
import com.eed3si9n.expecty.Expecty.expect | ||
import fastparse.NoWhitespace._ | ||
import fastparse._ | ||
import io.chymyst.fastparse.Memoize | ||
import io.chymyst.test.TestTimings | ||
import munit.FunSuite | ||
|
||
class MemoizeTest extends FunSuite with TestTimings { | ||
|
||
test("slow grammar becomes faster after memoization") { | ||
// Integer calculator program: 1+2*3-(4-5)*6 and so on. No spaces, for simplicity. | ||
def program1[$: P]: P[Int] = P(expr1 ~ End) | ||
def expr1[$: P]: P[Int] = P(minus1 | plus1) | ||
def minus1[$: P] = P(times1 ~ "-" ~ expr1) | ||
.map { case (x, y) => x - y } | ||
def plus1[$: P] = P(times1 ~ ("+" ~ expr1).rep) | ||
.map { case (i, is) => i + is.sum } | ||
def times1[$: P] = P(other1 ~ ("*" ~ other1).rep) | ||
.map { case (i, is) => i * is.product } | ||
def other1[$: P]: P[Int] = P(number | ("(" ~ expr1 ~ ")")) | ||
def number[$: P] = P(CharIn("0-9").rep(1)) | ||
.!.map(_.toInt) | ||
// Verify that this works as expected. | ||
assert(fastparse.parse("123*(1+1)", program1(_)).get.value == 246) | ||
assert(fastparse.parse("123*1+1", program1(_)).get.value == 124) | ||
assert(fastparse.parse("123*1-1", program1(_)).get.value == 122) | ||
assert(fastparse.parse("123*(1-1)", program1(_)).get.value == 0) | ||
|
||
// Parse an expression of the form `(((((...(1)...)))))`. | ||
val n = 23 | ||
val (result1, elapsed1) = elapsedNanos(fastparse.parse("(" * (n - 1) + "1" + ")" * (n - 1), program1(_))) | ||
assert(result1.get.value == 1) | ||
|
||
// The same parsing after memoization. | ||
import io.chymyst.fastparse.Memoize.MemoizeParser | ||
def program2[$: P]: P[Int] = P(expr2 ~ End) | ||
def expr2[$: P]: P[Int] = P(minus2 | plus2) | ||
def minus2[$: P] = P(times2 ~ "-" ~ expr2) | ||
.map { case (x, y) => x - y } | ||
def plus2[$: P] = P(times2 ~ ("+" ~ expr2).rep) | ||
.map { case (i, is) => i + is.sum } | ||
def times2[$: P] = P(other2 ~ ("*" ~ other2).rep) | ||
.map { case (i, is) => i * is.product } | ||
def other2[$: P]: P[Int] = P(number | ("(" ~ expr2 ~ ")")).memoize | ||
|
||
val (result2, elapsed2) = elapsedNanos(Memoize.parse("(" * (n - 1) + "1" + ")" * (n - 1), program2(_))) | ||
assert(result2.get.value == 1) | ||
// Verify that the memoized parser works as expected. | ||
assert(Memoize.parse("123*(1+1)", program2(_)).get.value == 246) | ||
assert(Memoize.parse("123*1+1", program2(_)).get.value == 124) | ||
assert(Memoize.parse("123*1-1", program2(_)).get.value == 122) | ||
assert(Memoize.parse("123*(1-1)", program2(_)).get.value == 0) | ||
|
||
println(s"before memoization: ${elapsed1/1e9}, after memoization: ${elapsed2/1e9}, statistics: ${Memoize.statistics}") | ||
// Memoization should speed up at least 100 times in this example. | ||
expect(elapsed1 > elapsed2 * 100) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters