Skip to content
This repository has been archived by the owner on Jul 10, 2024. It is now read-only.

Commit

Permalink
fix lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
Marc Siegfarth committed Apr 23, 2024
1 parent c99cc33 commit f4ed447
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,40 @@
import com.auberer.compilerdesignlectureproject.reader.CodeLoc;
import com.auberer.compilerdesignlectureproject.reader.IReader;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;

public class Lexer implements ILexer {

private static final String[] keywordList = {
"abstract", "continue", "for", "new", "switch",
"assert", "default", "goto", "package", "synchronized",
"boolean", "do", "if", "private", "this",
"break", "double", "implements", "protected", "throw",
"byte", "else", "import", "public", "throws",
"case", "enum", "instanceof", "return", "transient",
"catch", "extends", "int", "short", "try",
"char", "final", "interface", "static", "void",
"class", "finally", "long", "strictfp", "volatile",
"const", "float", "native", "super", "while"
private final StateMachine[] stateMachines = new StateMachine[] {
new IntegerLiteralStateMachine(),
new DoubleLiteralStateMachine(),
new StringLiteralStateMachine(),
new KeywordStateMachine("int"),
new KeywordStateMachine("double"),
new KeywordStateMachine("string"),
new KeywordStateMachine("empty"),
new KeywordStateMachine("if"),
new KeywordStateMachine("else"),
new KeywordStateMachine("while"),
new KeywordStateMachine("do"),
new KeywordStateMachine("for"),
new KeywordStateMachine("func"),
new KeywordStateMachine("cnuf"),
new KeywordStateMachine("return"),
new KeywordStateMachine("switch"),
new KeywordStateMachine("case"),
new KeywordStateMachine("default"),
new KeywordStateMachine("call"),
new KeywordStateMachine("print"),
new IdentifierStateMachine()
};

private final IReader reader;
private String tokenText;
private Token currentToken;

public Lexer(IReader reader) {
this.reader = reader;
Expand All @@ -32,47 +46,75 @@ public Lexer(IReader reader) {

@Override
public Token getToken() {

if (tokenText.isEmpty() && reader.isEOF()) {
return new Token(TokenType.TOK_EOF, "\u001a", reader.getCodeLoc());
}

StateMachine stateMachine = getStateMachineFor(tokenText);

if (stateMachine == null) {
return new Token(TokenType.TOK_INVALID, tokenText, reader.getCodeLoc());
}

stateMachine.init();
stateMachine.reset();

try {
tokenText.chars().forEach(input -> stateMachine.processInput((char)input));
return new Token(stateMachine.getTokenType(), tokenText, reader.getCodeLoc());
} catch (IllegalStateException e) {
return new Token(TokenType.TOK_INVALID, tokenText, reader.getCodeLoc());
}
return currentToken;
}

@Override
public void advance() {
tokenText = "";

// skip whitespaces
while (!reader.isEOF() && (reader.getChar() == ' ' || reader.getChar() == '\n' || reader.getChar() == '\t')) {
while (!isEOF() && isWhiteSpace(reader.getChar())) {
reader.advance();
}

// read token chars
while (!reader.isEOF() && reader.getChar() != ' ' && reader.getChar() != '\n' && reader.getChar() != '\t') {
if (isEOF()) {
currentToken = new Token(TokenType.TOK_EOF, "", reader.getCodeLoc());
return;
}

List<StateMachine> remainingStateMachines = initializeStateMachineList();
StateMachine lastManStanding = null;

do {
tokenText += reader.getChar();

if (reader.getChar() == '"') {
advanceStringLiteral();
for (int i = remainingStateMachines.size()-1; i >= 0; i--) {
StateMachine stateMachine = remainingStateMachines.get(i);

try {
stateMachine.processInput(reader.getChar());
} catch (IllegalStateException e) {
remainingStateMachines.remove(stateMachine);
lastManStanding = stateMachine;
}
}

reader.advance();

if (isEOF()) {
lastManStanding = remainingStateMachines.isEmpty() ? lastManStanding : remainingStateMachines.getFirst();
break;
}

} while (!remainingStateMachines.isEmpty());

tokenText = tokenText.trim();
lastManStanding.reset();

for (char c : tokenText.toCharArray()) {
try {
lastManStanding.processInput(c);
} catch (IllegalStateException e) {
currentToken = new Token(TokenType.TOK_INVALID, tokenText, getCodeLoc());
return;
}
}

currentToken = new Token(lastManStanding.getTokenType(), tokenText, getCodeLoc());
}

private boolean isWhiteSpace(char c) {
return c == ' ' || c == '\n' || c == '\t';
}

private ArrayList<StateMachine> initializeStateMachineList() {
ArrayList<StateMachine> stateMachineList = new ArrayList<>(Arrays.asList(this.stateMachines));
stateMachineList.forEach(stateMachine -> {
stateMachine.init();
stateMachine.reset();
});
return stateMachineList;
}

@Override
Expand Down Expand Up @@ -103,39 +145,4 @@ public boolean isEOF() {
public CodeLoc getCodeLoc() {
return reader.getCodeLoc();
}

private static StateMachine getStateMachineFor(String tokenText) {
char firstChar = tokenText.charAt(0);

if (firstChar == '"') {
return new StringLiteralStateMachine();
}

if (firstChar == '_') {
return new IdentifierStateMachine();
}

if (new Range('0', '9').contains(firstChar)) {
if (tokenText.contains(".")) {
return new DoubleLiteralStateMachine();
}
return new IntegerLiteralStateMachine();
}

if (new Range('A', 'Z').contains(firstChar) || new Range('a', 'z').contains(firstChar)) {
if (Arrays.asList(keywordList).contains(tokenText)) {
return new KeywordStateMachine(tokenText);
}
return new IdentifierStateMachine();
}

return null;
}

private void advanceStringLiteral() {
do {
reader.advance();
tokenText += reader.getChar();
} while (reader.getChar() != '"');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public void testIdentifierValidation() throws Exception {
@Test
public void testKeywordValidation() throws Exception {
String fileName = "keyword-test.txt";
Reader reader = createReader(fileName, "new");
Reader reader = createReader(fileName, "int");
Lexer lexer = new Lexer(reader);

lexer.expect(TokenType.TOK_KEYWORD);
Expand Down Expand Up @@ -138,7 +138,7 @@ public void testUnexpectedToken() throws Exception {
try {
lexer.expect(TokenType.TOK_DOUBLE_LITERAL);
} catch (Exception e) {
assert e.getMessage().equals("Unexpected token (08.1.5) at 2:7. Expected: TOK_DOUBLE_LITERAL actual: TOK_INVALID");
assert e.getMessage().equals("Unexpected token (08.1.) at 2:6. Expected: TOK_DOUBLE_LITERAL actual: TOK_INVALID");
}

cleanUp(fileName, reader);
Expand Down

0 comments on commit f4ed447

Please sign in to comment.