Skip to content

Commit

Permalink
Fixed lexer not taking the longest match. Lexer rule order is now tes…
Browse files Browse the repository at this point in the history
…ted. Renamed 'any' to 'anything' and made it a value instead of a func.
  • Loading branch information
RowDaBoat committed Feb 22, 2024
1 parent 9ca08e5 commit c5b70d1
Show file tree
Hide file tree
Showing 12 changed files with 68 additions and 32 deletions.
9 changes: 8 additions & 1 deletion Notes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,18 @@ DONE cleanup repositories
DONE implement a Rejection Reason

[ Lexer ]
TODO support pre-made matchers
whitespaces
newlines
DONE "AnyOperator" should be renamed to something less misleading
DONE change "any" to "anything"
TODO return a result accepting or rejecting the tokenized string, consider cases:
Unexpected tokens
No remaining tokens
Trailing tokens
TODO test rule order
TODO matching is done for every rule, for every token, it could probably be done more efficiently with a DFA
DONE test rule order
DONE bug: lexer is not taking the longest match
DONE add a reference to the corresponding line text to each token
DONE add start and end indices to Token
DONE proper error output
Expand Down
4 changes: 3 additions & 1 deletion src/main/kotlin/io/vexel/kobold/lexer/Lexer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ fun lexer(ruleDeclarations: LexerDSL.() -> Unit): Lexer {
class Lexer(private val rules: MutableList<LexerRule>) {
fun tokenize(text: String): List<Token> {
val lines = text.split('\n')
return generateSequence(LexerState(text, lines, rules, NothingToken())) { it.nextState() }
val initialState = LexerState(text, lines, rules, NothingToken())

return generateSequence(initialState) { it.nextState() }
.filter { it.token !is NothingToken }
.filter { it.token !is IgnoredToken }
.map { it.token }
Expand Down
6 changes: 4 additions & 2 deletions src/main/kotlin/io/vexel/kobold/lexer/LexerDSL.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.vexel.kobold.lexer
import io.vexel.kobold.matchers.MatcherMemo
import io.vexel.kobold.lexer.dsl.*
import io.vexel.kobold.lexer.rules.LexerRule
import io.vexel.kobold.matchers.Anything

class LexerDSL(
private val rules : MutableList<LexerRule> = mutableListOf(),
Expand All @@ -14,10 +15,11 @@ class LexerDSL(
OrOperatorDSL by OrOperator(),
ThenOperatorDSL by ThenOperator(),
AnyOfOperatorDSL by AnyOfOperator(),
AnyOperatorDSL by AnyOperator(),
SequenceOperatorDSL by SequenceOperator(),
ZeroOrMoreOperatorDSL by ZeroOrMoreOperator(memo),
OneOrMoreOperatorDSL by OneOrMoreOperator(memo),
AndOperatorDSL by AndOperator(),
NotOperatorDSL by NotOperator(),
OptionalOperatorDSL by OptionalOperator()
OptionalOperatorDSL by OptionalOperator() {
val anything = Anything()
}
16 changes: 13 additions & 3 deletions src/main/kotlin/io/vexel/kobold/lexer/LexerState.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package io.vexel.kobold.lexer

import io.vexel.kobold.Accepted
import io.vexel.kobold.Token
import io.vexel.kobold.lexer.rules.LexerRule
import io.vexel.kobold.lexer.rules.RuleMatched
import io.vexel.kobold.lexer.rules.RuleResult

class LexerState(
val text: String,
Expand All @@ -20,10 +22,18 @@ class LexerState(
}
}

private fun matchWithRules() =
rules.asSequence()
private fun matchWithRules(): RuleResult? {
val matched = rules
.asSequence()
.map { it.match(text) }
.firstOrNull { it is RuleMatched }
.filterIsInstance<RuleMatched>()
.toList()

return when (matched.any()) {
true -> matched.maxBy { it.token.text.length }
false -> null
}
}

private fun advanceState(result: RuleMatched): LexerState {
val token = result.token
Expand Down
12 changes: 0 additions & 12 deletions src/main/kotlin/io/vexel/kobold/lexer/dsl/AnyOperator.kt

This file was deleted.

3 changes: 2 additions & 1 deletion src/main/kotlin/io/vexel/kobold/matchers/Empty.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package io.vexel.kobold.matchers

import io.vexel.kobold.Accepted
import io.vexel.kobold.Token

class Empty : Matcher {
override fun match(tokens: List<Token>, rest: Tokens, evaluate: Evaluator) =
io.vexel.kobold.Accepted(emptySequence(), rest)
Accepted(emptySequence(), rest)
}
6 changes: 3 additions & 3 deletions src/main/kotlin/io/vexel/kobold/parser/ParserDSL.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ class ParserDSL(private val memo : MatcherMemo = MatcherMemo()) :
OrOperatorDSL by OrOperator(),
ThenOperatorDSL by ThenOperator(),
NotOperatorDSL by NotOperator(),
NonTerminalOperatorDSL by NonTerminalOperators(memo),
TerminalOperatorsDSL by TerminalOperators(),
SequenceOperatorDSL by SequenceOperator(),
AnyOfOperatorDSL by AnyOfOperator(),
ZeroOrMoreOperatorDSL by ZeroOrMoreOperator(memo),
OneOrMoreOperatorDSL by OneOrMoreOperator(memo),
AndOperatorDSL by AndOperator(),
OptionalOperatorDSL by OptionalOperator() {
OptionalOperatorDSL by OptionalOperator(),
NonTerminalMatcherDSL by NonTerminalMatcher(memo),
TerminalMatcherDSL by TerminalMatcher() {
val anything = Anything()
val empty = Empty()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ import io.vexel.kobold.Symbol
import io.vexel.kobold.matchers.Matcher
import io.vexel.kobold.matchers.NonTerminal

interface NonTerminalOperatorDSL {
interface NonTerminalMatcherDSL {
fun nonTerminal(): NonTerminal
fun nonTerminal(producer: (List<Symbol>) -> Symbol): NonTerminal
infix fun NonTerminal.from(matcher: Matcher): Matcher
}

class NonTerminalOperators(private val memo : MatcherMemo) : NonTerminalOperatorDSL {
class NonTerminalMatcher(private val memo : MatcherMemo) : NonTerminalMatcherDSL {
override fun nonTerminal(): NonTerminal =
NonTerminal(memo)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import io.vexel.kobold.matchers.Matcher
import io.vexel.kobold.matchers.TerminalByContent
import io.vexel.kobold.matchers.TerminalByType

interface TerminalOperatorsDSL {
interface TerminalMatcherDSL {
fun<T: Token> terminal(type: Class<T>): Matcher
fun terminal(token: Token): Matcher
}

class TerminalOperators : TerminalOperatorsDSL {
class TerminalMatcher : TerminalMatcherDSL {
override fun<T: Token> terminal(type: Class<T>) =
TerminalByType(type)

Expand Down
24 changes: 24 additions & 0 deletions src/test/kotlin/io/vexel/kobold/test/lexer/Lexer should.kt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,30 @@ class `Lexer should` {
assertEquals(expected, tokens)
}

@Test
fun `match always the longest sequence`() {
val lexer = lexer {
"@" with { Ats(it) }
"@@" with { Ats(it) }
}

val tokens = lexer.tokenize("@@")
val expected = listOf(Ats("@@", "@@", 1, 1, 0, 2))
assertEquals(expected, tokens)
}

@Test
fun `match the first rule of two with the same length`() {
val lexer = lexer {
"@" with { Ats(it) }
("@" or "#") with { Token(it) }
}

val tokens = lexer.tokenize("@")
val expected = listOf(Ats("@", "@", 1, 1, 0, 1))
assertEquals(expected, tokens)
}

class Ats(
text: String,
lineText: String = "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ import io.vexel.kobold.lexer.lexer
import org.junit.jupiter.api.Test
import kotlin.test.assertEquals

class `Any should` {
class `Anything should` {
@Test
fun `match any character`() {
val lexer = lexer {
any() then any() then any() with { Token(it) }
anything then anything then anything with { Token(it) }
}

val string = "hey"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
package io.vexel.kobold.test.matchers

import io.vexel.kobold.Accepted
import io.vexel.kobold.Rejected
import io.vexel.kobold.matchers.Anything
import io.vexel.kobold.matchers.match
import io.vexel.kobold.test.parser.dsl.support.tokens
import org.junit.jupiter.api.Test
import kotlin.test.assertIs

class `Any should` {
class `Anything should` {
private val grammar = Anything()

@Test
fun `accept any token`() {
val result = grammar.match(tokens("a"))
assertIs<io.vexel.kobold.Accepted>(result)
assertIs<Accepted>(result)
}

@Test
fun `reject an empty sequence`() {
val result = grammar.match(tokens(""))
assertIs<io.vexel.kobold.Rejected>(result)
assertIs<Rejected>(result)

val reason = result.reason
assertIs<io.vexel.kobold.NoRemainingTokens>(reason)
Expand Down

0 comments on commit c5b70d1

Please sign in to comment.