Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lexing before parsing #42

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package parseback.benchmarks

import org.openjdk.jmh.annotations._
import parseback.LexerHelper

import scala.util.parsing.{combinator => spc}

Expand Down Expand Up @@ -115,20 +116,23 @@ class ArithmeticBenchmarks {
else
""

neg + i.toString + operators(i % 4)
neg + i.toString + " " + operators(i % 4)
} drop 1 mkString
}

val sizes = List(2, 4, 8, 16, 32, 64, 128)

sizes.map({ i => i -> inner(i) })(collection.breakOut)
sizes.map({ i => i -> inner(i).dropRight(1) })(collection.breakOut)
}

@Benchmark
def parsebackRun(): Unit = {
import _root_.parseback.LineStream

import cats.Eval
val numR = """\d+""".r
val whitespace = """\s+""".r
implicit val lexer = LexerHelper.lexer(Option(whitespace), Set.empty, Set("+", "-", "*", "/", "(", ")"), Set(numR))

val stream = LineStream[Eval](sample(size))
parseback(stream).value
Expand Down
90 changes: 90 additions & 0 deletions core/src/main/scala/parseback/LexerHelper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright 2018 Daniel Spiewak
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package parseback

import scala.util.matching.Regex

object LexerHelper {

def lexer(whitespace: Option[Regex], keywords: Set[String], symbols: Set[String], others: Set[Regex]): String => Array[Token] = { str: String =>
type R = (Option[Token], Int)

def lexingW(str: String, index: Int): Option[R] = {
whitespace.flatMap(_.findPrefixOf(str) match {
case Some(space) =>
Option((None, index + space.length))
case None =>
Option.empty
})
}

def lexingR(rs: Set[Regex], str: String, index: Int): Option[R] = {
val k = rs.flatMap(r => r.findPrefixOf(str))
if(k.nonEmpty) {
val m = k.maxBy(k => trim(k).length)
Option((Some(Token(trim(m))), index + m.length))
} else {
Option.empty
}
}

def lexingS(rs: Set[String], str: String, index: Int): Option[R] = {
rs.filter(s => str.startsWith(s)) match {
case s if s.nonEmpty =>
val m = s.maxBy(_.length)
Option((Some(Token(m)), index + m.length))
case _ =>
Option.empty
}
}

def trim(s:String): String = {
whitespace.map(_.replaceAllIn(s, "")).getOrElse(s)
}

def loop(index: Int): (Option[Token], Int) = {
val sub = str.substring(index)
lexingW(sub, index) getOrElse {
lexingR(keywords.map(r => s"${r}${whitespace.map(_.pattern).getOrElse("")}".r), sub, index) getOrElse {
lexingS(symbols, sub, index) getOrElse {
lexingR(others, sub, index) getOrElse {
throw new Exception(s"lexing fail at $sub")
}
}
}
}
}
def unfold[A, B](until: A => Boolean, h: A => (Option[B], A), a: A): List[B] =
if (until(a))
Nil
else {
h(a) match {
case (Some(v), n) =>
v :: unfold(until, h, n)
case (None, n) =>
unfold(until, h, n)
}
}

if(str.isEmpty) {
Array.empty
} else {
unfold[Int, Token](index => str.length <= index, loop, 0).toArray
}
}

}
24 changes: 19 additions & 5 deletions core/src/main/scala/parseback/Line.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ package parseback
* @param lineNo The line offset of this line within the larger input stream (0 indexed)
* @param colNo The column offset into `base` (0 indexed)
*/
final case class Line(base: String, lineNo: Int = 0, colNo: Int = 0) {
final case class Line(base: Array[Token], lineNo: Int = 0, colNo: Int = 0) {

def head: Char = base charAt colNo
def head: Token = base(colNo)

def project: String = base substring colNo
def project: String = base.map(_.value).mkString(" ")

def isEmpty: Boolean = base.length == colNo

Expand All @@ -36,10 +36,24 @@ final case class Line(base: String, lineNo: Int = 0, colNo: Int = 0) {
this.lineNo < that.lineNo || (this.lineNo == that.lineNo && this.colNo < that.colNo)

def renderError: String =
base + s"${0 until colNo map { _ => ' ' } mkString}^"
project + s"${0 until colNo map { _ => ' ' } mkString}^"

// due to Array.
override def equals(thatGeneric: scala.Any): Boolean = {
if(!thatGeneric.isInstanceOf[Line])
return false

val that = thatGeneric.asInstanceOf[Line]
val thisBase = if(this.base == null) null else this.base.deep
val thatBase = if(that.base == null) null else that.base.deep

(thisBase, lineNo, colNo) == ((thatBase, that.lineNo, that.colNo))
}

override def toString: String = s"Line(${project}, ${lineNo}, ${colNo})"
}

object Line extends ((String, Int, Int) => Line) {
object Line extends ((Array[Token], Int, Int) => Line) {

def addTo(lines: Vector[Line], line: Line): Vector[Line] = {
if (lines.isEmpty) {
Expand Down
16 changes: 11 additions & 5 deletions core/src/main/scala/parseback/LineStream.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,24 @@ sealed trait LineStream[F[+_]] extends Product with Serializable {

object LineStream {

def apply[F[+_]: Applicative](str: String): LineStream[F] = {
def apply[F[+_]: Applicative](str: String, lexer: String => Array[Token]): LineStream[F] = {
if (str.isEmpty) {
Empty()
} else {
val splits = str split """\r|\r?\n"""
val (front, last) = splits splitAt (splits.length - 1)
val splits: Array[String] = str split """\r|\r?\n"""
apply(splits.map(lexer))
}
}

apply((front map { _ + "\n" }) ++ last)
def apply[F[+_]: Applicative](line: Array[Token]): LineStream[F] = {
if (line.isEmpty) {
Empty()
} else {
apply(Seq(line))
}
}

def apply[F[+_]: Applicative](lines: Seq[String]): LineStream[F] = {
def apply[F[+_]: Applicative](lines: Seq[Array[Token]]): LineStream[F] = {
val actuals = lines.zipWithIndex map {
case (str, lineNo) => Line(str, lineNo, 0)
}
Expand Down
18 changes: 9 additions & 9 deletions core/src/main/scala/parseback/MemoTable.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import java.util.HashMap
// TODO it may be possible to retain SOME results between derivations (just not those which involve Apply)
private[parseback] sealed abstract class MemoTable {

def derived[A](from: Parser[A], c: Char, to: Parser[A]): this.type
def derive[A](from: Parser[A], c: Char): Option[Parser[A]]
def derived[A](from: Parser[A], c: Token, to: Parser[A]): this.type
def derive[A](from: Parser[A], c: Token): Option[Parser[A]]

def finished[A](target: Parser[A], results: Results.Cacheable[A]): this.type
def finish[A](target: Parser[A]): Option[Results.Cacheable[A]]
Expand All @@ -48,22 +48,22 @@ private[parseback] final class InitialMemoTable extends MemoTable {
import MemoTable._

// still using the single-derivation optimization here
private val derivations: HashMap[(MemoTable, ParserId[_]), (Char, Parser[_])] = new HashMap(16) // TODO tune capacities
private val derivations: HashMap[(MemoTable, ParserId[_]), (Token, Parser[_])] = new HashMap(16) // TODO tune capacities
private val finishes: HashMap[(MemoTable, ParserId[_]), Results.Cacheable[_]] = new HashMap(16)

def derived[A](from: Parser[A], c: Char, to: Parser[A]): this.type =
def derived[A](from: Parser[A], c: Token, to: Parser[A]): this.type =
derived(this, from, c, to)

private[parseback] def derived[A](table: MemoTable, from: Parser[A], c: Char, to: Parser[A]): this.type = {
private[parseback] def derived[A](table: MemoTable, from: Parser[A], c: Token, to: Parser[A]): this.type = {
derivations.put((table, new ParserId(from)), (c, to))

this
}

def derive[A](from: Parser[A], c: Char): Option[Parser[A]] =
def derive[A](from: Parser[A], c: Token): Option[Parser[A]] =
derive(this, from, c)

private[parseback] def derive[A](table: MemoTable, from: Parser[A], c: Char): Option[Parser[A]] = {
private[parseback] def derive[A](table: MemoTable, from: Parser[A], c: Token): Option[Parser[A]] = {
val back = derivations.get((table, new ParserId(from)))

if (back != null && back._1 == c)
Expand Down Expand Up @@ -92,7 +92,7 @@ private[parseback] final class InitialMemoTable extends MemoTable {

private[parseback] final class FieldMemoTable(delegate: InitialMemoTable) extends MemoTable {

def derived[A](from: Parser[A], c: Char, to: Parser[A]): this.type = {
def derived[A](from: Parser[A], c: Token, to: Parser[A]): this.type = {
if (from.isRoot) {
delegate.derived(this, from, c, to)
} else {
Expand All @@ -104,7 +104,7 @@ private[parseback] final class FieldMemoTable(delegate: InitialMemoTable) extend
this
}

def derive[A](from: Parser[A], c: Char): Option[Parser[A]] = {
def derive[A](from: Parser[A], c: Token): Option[Parser[A]] = {
if (from.isRoot) {
delegate.derive(this, from, c)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@

package parseback

final case class Whitespace(layout: Option[Parser[_]]) extends AnyVal
case class Token(value: String)

object Whitespace {
implicit val Default = Whitespace(None)

def apply(layout: Parser[_]): Whitespace =
Whitespace(Some(layout))
}
object Token {
def apply(strs: String*): Array[Token] = strs.toArray.map(s=>Token(s))
}
7 changes: 2 additions & 5 deletions core/src/main/scala/parseback/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package object parseback {

// TODO macroize
private[parseback] def trace(str: => String): Unit = {
// println(str)
//println(str)
}

// external syntax
Expand All @@ -29,10 +29,7 @@ package object parseback {
val ~ = Tuple2

implicit def literal(str: String): Parser[String] = {
if (str.isEmpty)
Parser.Epsilon("")
else
Parser.Literal(str, 0)
Parser.Literal(str)
}

implicit def literalLazy(str: String): LazyParserSyntax[String] =
Expand Down
Loading