Skip to content

Commit

Permalink
Rename Any matcher, add lineText to each token, add start and end ind…
Browse files Browse the repository at this point in the history
…ices for each token, add TerminalByContent to DSL
  • Loading branch information
RowDaBoat committed Feb 20, 2024
1 parent ce80a04 commit 2f5cf55
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 68 deletions.
25 changes: 16 additions & 9 deletions Notes.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
TODO should the parser and lexer provide their own Accept, Reject and Reason types?
DONE Rename 'Any' to 'Anything'
DONE update README.md
DONE cleanup imports
DONE cleanup repositories
DONE matchers

[ Matchers ]
DONE implement a Rejection Reason
DONE parser
DONE proper error output
DONE consider premature EOF
DONE always consume all input
DONE match terminal symbols
DONE write a simple playbook
TODO lexer

[ Lexer ]
TODO return a result accepting or rejecting the tokenized string, consider cases:
Unexpected tokens
No remaining tokens
Trailing tokens
TODO add a reference to the corresponding line in each token
TODO test rule order
DONE add a reference to the corresponding line text to each token
DONE add start and end indices to Token
DONE proper error output
DONE implement line and column numbers
DONE refactor ignore and produce rules in lexer
DONE write a simple playbook

[ Parser ]
TODO bad parenthesis break everything on the parser
DONE add TerminalByContent to dsl as terminal(Token("..."))
DONE proper error output
DONE consider premature EOF
DONE always consume all input
DONE match terminal symbols
DONE write a simple playbook
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,4 @@ val errorMessage = when (reason) {
is TrailingTokens -> "Some input remains after parsed expression."
is UnsupportedLeftRecursion -> "The grammar has an unsupported left recursive case. This is a bug."
}
```
```
20 changes: 16 additions & 4 deletions src/main/kotlin/io/vexel/kobold/Token.kt
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
package io.vexel.kobold

open class Token(val text: String): Symbol {
var lineText: String = ""
private set

var line: Int = 0
private set

var column: Int = 0
private set

var start: Int = 0
private set

var end: Int = 0
private set

override val children: List<Symbol> =
emptyList()

constructor(character: Char) : this(character.toString())

fun addMetadata(lineNumber: Int, columnNumber: Int) {
this.line = lineNumber
this.column = columnNumber
fun addMetadata(lineText: String, line: Int, column: Int, start: Int, end: Int) {
this.lineText = lineText
this.line = line
this.column = column
this.start = start
this.end = end
}

override fun show(depth: Int) =
Expand All @@ -27,5 +39,5 @@ open class Token(val text: String): Symbol {
31 * text.hashCode() + children.hashCode()

override fun toString() =
"${this.javaClass.simpleName}(text=\"$text\", line=$line, column=$column)"
"${this.javaClass.simpleName}(text=\"$text\", line=$line, column=$column, start=$start, end=$end)"
}
6 changes: 4 additions & 2 deletions src/main/kotlin/io/vexel/kobold/lexer/Lexer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ fun lexer(ruleDeclarations: LexerDSL.() -> Unit): Lexer {
}

class Lexer(private val rules: MutableList<LexerRule>) {
fun tokenize(string: String): List<Token> =
generateSequence(LexerState(string, rules, NothingToken())) { it.nextState() }
fun tokenize(text: String): List<Token> {
val lines = text.split('\n')
return generateSequence(LexerState(text, lines, rules, NothingToken())) { it.nextState() }
.filter { it.token !is NothingToken }
.filter { it.token !is IgnoredToken }
.map { it.token }
.toList()
}
}
29 changes: 18 additions & 11 deletions src/main/kotlin/io/vexel/kobold/lexer/LexerState.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import io.vexel.kobold.lexer.rules.LexerRule
import io.vexel.kobold.lexer.rules.RuleMatched

class LexerState(
val string: String,
val text: String,
private val lines: List<String>,
private val rules: List<LexerRule>,
val token: Token,
private val lineNumber: Int = 1,
private val columnNumber: Int = 1
private val line: Int = 1,
private val column: Int = 1,
private val start: Int = 0
) {
fun nextState(): LexerState? {
return when(val result = matchWithRules()) {
Expand All @@ -20,27 +22,32 @@ class LexerState(

private fun matchWithRules() =
rules.asSequence()
.map { it.match(string) }
.map { it.match(text) }
.firstOrNull { it is RuleMatched }

private fun advanceState(result: RuleMatched): LexerState {
val token = result.token
val tokenText = token.text
val newLineCount = tokenText.count { it == '\n' }
val newLineNumber = lineNumber + newLineCount
val newColumnNumber = getNewColumnNumber(newLineCount, tokenText)
val newLine = line + newLineCount
val newColumn = getNewColumn(newLineCount, tokenText)
val newStart = getNewLine(token.text.length)
val lineText = lines[line - 1]

token.addMetadata(lineNumber, columnNumber)
token.addMetadata(lineText, line, column, start, newStart)

return LexerState(result.rest, rules, result.token, newLineNumber, newColumnNumber)
return LexerState(result.rest, lines, rules, result.token, newLine, newColumn, newStart)
}

private fun getNewColumnNumber(newLineCount: Int, tokenText: String) =
private fun getNewColumn(newLineCount: Int, tokenText: String) =
when (newLineCount) {
0 -> columnNumber + tokenText.length
0 -> column + tokenText.length
else -> tokenText.length - tokenText.lastIndexOf('\n')
}

private fun getNewLine(length: Int) =
start + length

override fun toString() =
"State { string = $string token = ${token.text} }"
"State { string = $text token = ${token.text} }"
}
4 changes: 2 additions & 2 deletions src/main/kotlin/io/vexel/kobold/lexer/dsl/AnyOperator.kt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package io.vexel.kobold.lexer.dsl

import io.vexel.kobold.matchers.Any
import io.vexel.kobold.matchers.Anything
import io.vexel.kobold.matchers.Matcher

interface AnyOperatorDSL {
fun any(): Matcher
}

class AnyOperator : AnyOperatorDSL {
override fun any() = Any()
override fun any() = Anything()
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.vexel.kobold.NoRemainingTokens
import io.vexel.kobold.Rejected
import io.vexel.kobold.Token

class Any : Matcher {
class Anything : Matcher {
override fun match(tokens: List<Token>, rest: Tokens, evaluate: Evaluator) =
when (rest.any()) {
true -> Accepted(rest.take(1), rest.drop(1), tokens[tokens.count() - rest.count()])
Expand Down
8 changes: 5 additions & 3 deletions src/main/kotlin/io/vexel/kobold/parser/ParserDSL.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package io.vexel.kobold.parser

import io.vexel.kobold.Token
import io.vexel.kobold.matchers.Anything
import io.vexel.kobold.matchers.MatcherMemo
import io.vexel.kobold.matchers.Empty
import io.vexel.kobold.parser.dsl.*
Expand All @@ -9,16 +11,16 @@ class ParserDSL(private val memo : MatcherMemo = MatcherMemo()) :
ThenOperatorDSL by ThenOperator(),
NotOperatorDSL by NotOperator(),
NonTerminalOperatorDSL by NonTerminalOperators(memo),
TerminalOperatorDSL by TerminalOperators(),
TerminalOperatorsDSL by TerminalOperators(),
SequenceOperatorDSL by SequenceOperator(),
AnyOfOperatorDSL by AnyOfOperator(),
ZeroOrMoreOperatorDSL by ZeroOrMoreOperator(memo),
OneOrMoreOperatorDSL by OneOrMoreOperator(memo),
AndOperatorDSL by AndOperator(),
AnyOperatorDSL by AnyOperator(),
OptionalOperatorDSL by OptionalOperator() {
val anything = Anything()
val empty = Empty()
}

inline fun<reified T> ParserDSL.terminal() =
inline fun<reified T: Token> ParserDSL.terminal() =
terminal(T::class.java)
12 changes: 0 additions & 12 deletions src/main/kotlin/io/vexel/kobold/parser/dsl/AnyOperator.kt

This file was deleted.

14 changes: 10 additions & 4 deletions src/main/kotlin/io/vexel/kobold/parser/dsl/TerminalOperators.kt
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
package io.vexel.kobold.parser.dsl

import io.vexel.kobold.Token
import io.vexel.kobold.matchers.Matcher
import io.vexel.kobold.matchers.TerminalByContent
import io.vexel.kobold.matchers.TerminalByType

interface TerminalOperatorDSL {
fun<T> terminal(type: Class<T>): Matcher
interface TerminalOperatorsDSL {
fun<T: Token> terminal(type: Class<T>): Matcher
fun terminal(token: Token): Matcher
}

class TerminalOperators : TerminalOperatorDSL {
override fun<T> terminal(type: Class<T>) =
class TerminalOperators : TerminalOperatorsDSL {
override fun<T: Token> terminal(type: Class<T>) =
TerminalByType(type)

override fun terminal(token: Token) =
TerminalByContent(token)
}
30 changes: 20 additions & 10 deletions src/test/kotlin/io/vexel/kobold/test/lexer/Lexer should.kt
Original file line number Diff line number Diff line change
Expand Up @@ -43,28 +43,38 @@ class `Lexer should` {

val tokens = lexer.tokenize(text)
val expected = listOf(
Ats("@@@@", 1, 1),
Ats("@", 1, 9),
Ats("@@", 2, 1),
Ats("@", 2, 7),
Ats("@", 2, 9)
Ats("@@@@", "@@@@ @", 1, 1, 0, 4),
Ats("@", "@@@@ @", 1, 9, 8, 9),
Ats("@@", "@@ @ @", 2, 1, 10, 12),
Ats("@", "@@ @ @", 2, 7, 16, 17),
Ats("@", "@@ @ @", 2, 9, 18, 19)
)
assertEquals(expected, tokens)
}

class Ats(character: String, lineNumber: Int = 1, columnNumber: Int = 1) : Token(character) {
class Ats(
text: String,
lineText: String = "",
line: Int = 1,
column: Int = 1,
start: Int = 0,
end: Int = 0
) : Token(text) {
init {
this.addMetadata(lineNumber, columnNumber)
this.addMetadata(lineText, line, column, start, end)
}

override fun toString(): String {
return "Ats(text=$text, lineNumber=$line, columnNumber=$column)"
return "Ats(text=$text, line=$line, column=$column, start=$start, end=$end)"
}

override fun equals(other: Any?) =
super.equals(other) && other is Ats &&
(line == other.line) &&
column == other.column
lineText == other.lineText &&
line == other.line &&
column == other.column &&
start == other.start &&
end == other.end

override fun hashCode(): Int =
super.hashCode() + Objects.hash(line, column)
Expand Down
4 changes: 2 additions & 2 deletions src/test/kotlin/io/vexel/kobold/test/matchers/Any should.kt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package io.vexel.kobold.test.matchers

import io.vexel.kobold.matchers.Any
import io.vexel.kobold.matchers.Anything
import io.vexel.kobold.matchers.match
import io.vexel.kobold.test.parser.dsl.support.tokens
import org.junit.jupiter.api.Test
import kotlin.test.assertIs

class `Any should` {
private val grammar = Any()
private val grammar = Anything()

@Test
fun `accept any token`() {
Expand Down
6 changes: 3 additions & 3 deletions src/test/kotlin/io/vexel/kobold/test/matchers/Not should.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package io.vexel.kobold.test.matchers

import io.vexel.kobold.Token
import io.vexel.kobold.matchers.*
import io.vexel.kobold.matchers.Any
import io.vexel.kobold.matchers.Anything
import io.vexel.kobold.test.parser.dsl.support.tokens
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
Expand All @@ -25,14 +25,14 @@ class `Not should` {

@Test
fun `when combined with Any should accept an empty sequence`() {
val grammar = Not(Any())
val grammar = Not(Anything())
val result = grammar.match(tokens(""))
assertIs<io.vexel.kobold.Accepted>(result)
}

@Test
fun `when combined with Any should reject a non empty sequence`() {
val grammar = Not(Any())
val grammar = Not(Anything())
val result = grammar.match(tokens("a"))
assertIs<io.vexel.kobold.Rejected>(result)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ import io.vexel.kobold.test.parser.dsl.support.tokens
import org.junit.jupiter.api.Test
import kotlin.test.assertIs

class AnyShould {
class AnythingShould {
@Test
fun acceptAnySymbol() {
val any = parser { any().oneOrMore() }
val any = parser { anything.oneOrMore() }
assertIs<Accepted>(any.parse(tokens("aoierbvnaeorbnaietrbae")))
}

@Test
fun rejectAnEmptyList() {
val any = parser { any() }
val any = parser { anything }
assertIs<Rejected>(any.parse(tokens("")))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import kotlin.test.assertIs
class NotShould {
@Test
fun acceptAnythingNotMatchingSequence() {
val not = parser { not("a") then any() }
val not = parser { not("a") then anything }
assertIs<Accepted>(not.parse(tokens("b")))
}

Expand Down

0 comments on commit 2f5cf55

Please sign in to comment.