From de76e9fd9fa1bd8ee81df75cd5e8d4d304a29947 Mon Sep 17 00:00:00 2001 From: Marc Siegfarth Date: Wed, 24 Apr 2024 09:07:37 +0200 Subject: [PATCH] add more StateMachines and TokenTypes --- .../lexer/Lexer.java | 50 ++++++++++++------- .../lexer/TokenType.java | 33 ++++++++++++ .../statemachine/KeywordStateMachine.java | 8 ++- .../statemachine/OperatorStateMachine.java | 38 ++++++++++++++ .../lexer/LexerTest.java | 6 +-- 5 files changed, 114 insertions(+), 21 deletions(-) create mode 100644 src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/OperatorStateMachine.java diff --git a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/Lexer.java b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/Lexer.java index 22582ea..a51ccc4 100644 --- a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/Lexer.java +++ b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/Lexer.java @@ -15,23 +15,39 @@ public class Lexer implements ILexer { new IntegerLiteralStateMachine(), new DoubleLiteralStateMachine(), new StringLiteralStateMachine(), - new KeywordStateMachine("int"), - new KeywordStateMachine("double"), - new KeywordStateMachine("string"), - new KeywordStateMachine("empty"), - new KeywordStateMachine("if"), - new KeywordStateMachine("else"), - new KeywordStateMachine("while"), - new KeywordStateMachine("do"), - new KeywordStateMachine("for"), - new KeywordStateMachine("func"), - new KeywordStateMachine("cnuf"), - new KeywordStateMachine("return"), - new KeywordStateMachine("switch"), - new KeywordStateMachine("case"), - new KeywordStateMachine("default"), - new KeywordStateMachine("call"), - new KeywordStateMachine("print"), + new KeywordStateMachine("int", TokenType.TOK_TYPE_INT), + new KeywordStateMachine("double", TokenType.TOK_TYPE_DOUBLE), + new KeywordStateMachine("string", TokenType.TOK_TYPE_STRING), + new KeywordStateMachine("empty", TokenType.TOK_TYPE_EMPTY), + new KeywordStateMachine("if", TokenType.TOK_IF), + new KeywordStateMachine("else", TokenType.TOK_ELSE), + new KeywordStateMachine("while", TokenType.TOK_WHILE), + new KeywordStateMachine("do", TokenType.TOK_DO), + new KeywordStateMachine("for", TokenType.TOK_FOR), + new KeywordStateMachine("func", TokenType.TOK_FUNC), + new KeywordStateMachine("cnuf", TokenType.TOK_CNUF), + new KeywordStateMachine("return", TokenType.TOK_RETURN), + new KeywordStateMachine("switch", TokenType.TOK_SWITCH), + new KeywordStateMachine("case", TokenType.TOK_CASE), + new KeywordStateMachine("default", TokenType.TOK_DEFAULT), + new KeywordStateMachine("call", TokenType.TOK_CALL), + new KeywordStateMachine("print", TokenType.TOK_PRINT), + new OperatorStateMachine("{", TokenType.TOK_LBRACE), + new OperatorStateMachine("}", TokenType.TOK_RBRACE), + new OperatorStateMachine("(", TokenType.TOK_LPAREN), + new OperatorStateMachine(")", TokenType.TOK_RPAREN), + new OperatorStateMachine(",", TokenType.TOK_COMMA), + new OperatorStateMachine(":", TokenType.TOK_COLON), + new OperatorStateMachine("+", TokenType.TOK_PLUS), + new OperatorStateMachine("-", TokenType.TOK_MINUS), + new OperatorStateMachine("*", TokenType.TOK_MUL), + new OperatorStateMachine("/", TokenType.TOK_DIV), + new OperatorStateMachine("==", TokenType.TOK_EQUAL), + new OperatorStateMachine("!=", TokenType.TOK_NOT_EQUAL), + new OperatorStateMachine("&&", TokenType.TOK_LOGICAL_AND), + new OperatorStateMachine("||", TokenType.TOK_LOGICAL_OR), + new OperatorStateMachine(";", TokenType.TOK_SEMICOLON), + new OperatorStateMachine("=", TokenType.TOK_ASSIGN), new IdentifierStateMachine() }; diff --git a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/TokenType.java b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/TokenType.java index 8561244..f26e0d5 100644 --- a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/TokenType.java +++ b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/TokenType.java @@ -8,4 +8,37 @@ public enum TokenType { TOK_IDENTIFIER, TOK_KEYWORD, TOK_EOF, + TOK_TYPE_INT, + TOK_TYPE_DOUBLE, + TOK_TYPE_STRING, + TOK_TYPE_EMPTY, + TOK_IF, + TOK_ELSE, + TOK_WHILE, + TOK_DO, + TOK_FOR, + TOK_FUNC, + TOK_CNUF, + TOK_RETURN, + TOK_SWITCH, + TOK_CASE, + TOK_DEFAULT, + TOK_CALL, + TOK_PRINT, + TOK_LBRACE, + TOK_RBRACE, + TOK_LPAREN, + TOK_RPAREN, + TOK_COMMA, + TOK_COLON, + TOK_PLUS, + TOK_MINUS, + TOK_MUL, + TOK_DIV, + TOK_EQUAL, + TOK_NOT_EQUAL, + TOK_LOGICAL_AND, + TOK_LOGICAL_OR, + TOK_SEMICOLON, + TOK_ASSIGN } diff --git a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/KeywordStateMachine.java b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/KeywordStateMachine.java index e1ed57e..b6a7df8 100644 --- a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/KeywordStateMachine.java +++ b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/KeywordStateMachine.java @@ -5,9 +5,15 @@ public class KeywordStateMachine extends StateMachine { private final String keyword; + private final TokenType tokenType; public KeywordStateMachine(String keyword) { + this(keyword, TokenType.TOK_KEYWORD); + } + + public KeywordStateMachine(String keyword, TokenType tokenType) { this.keyword = keyword; + this.tokenType = tokenType; } @Override @@ -30,7 +36,7 @@ public void init() { @Override public TokenType getTokenType() { if (isInAcceptState()) { - return TokenType.TOK_KEYWORD; + return tokenType; } return TokenType.TOK_INVALID; diff --git a/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/OperatorStateMachine.java b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/OperatorStateMachine.java new file mode 100644 index 0000000..7b6dc1c --- /dev/null +++ b/src/main/java/com/auberer/compilerdesignlectureproject/lexer/statemachine/OperatorStateMachine.java @@ -0,0 +1,38 @@ +package com.auberer.compilerdesignlectureproject.lexer.statemachine; + +import com.auberer.compilerdesignlectureproject.lexer.TokenType; + +public class OperatorStateMachine extends StateMachine { + + private final String punctuation; + private final TokenType tokenType; + + public OperatorStateMachine(String punctuation, TokenType tokenType) { + this.punctuation = punctuation; + this.tokenType = tokenType; + } + + @Override + public void init() { + // Start state + State start = new State("q0"); + start.setStartState(true); + addState(start); + + // Letter states + State[] states = new State[punctuation.length()]; + for (int i = 0; i < punctuation.length(); i++) { + states[i] = new State("q" + (i+1)); + addState(states[i]); + addCharTransition(i == 0 ? start : states[i - 1], states[i], punctuation.charAt(i)); + } + + // Set the last state as the accept state + states[punctuation.length() - 1].setAcceptState(true); + } + + @Override + public TokenType getTokenType() { + return tokenType; + } +} \ No newline at end of file diff --git a/src/test/java/com/auberer/compilerdesignlectureproject/lexer/LexerTest.java b/src/test/java/com/auberer/compilerdesignlectureproject/lexer/LexerTest.java index 2d79778..2b76d3c 100644 --- a/src/test/java/com/auberer/compilerdesignlectureproject/lexer/LexerTest.java +++ b/src/test/java/com/auberer/compilerdesignlectureproject/lexer/LexerTest.java @@ -80,7 +80,7 @@ public void testKeywordValidation() throws Exception { Reader reader = createReader(fileName, "int"); Lexer lexer = new Lexer(reader); - lexer.expect(TokenType.TOK_KEYWORD); + lexer.expect(TokenType.TOK_TYPE_INT); cleanUp(fileName, reader); } @@ -114,7 +114,7 @@ public void testValidationOfMultipleTokens() throws Exception { Reader reader = createReader(fileName, "int test\n08.15 \n 42 \"hallo\" \"hello world\""); Lexer lexer = new Lexer(reader); - lexer.expect(TokenType.TOK_KEYWORD); + lexer.expect(TokenType.TOK_TYPE_INT); lexer.expect(TokenType.TOK_IDENTIFIER); lexer.expect(TokenType.TOK_DOUBLE_LITERAL); lexer.expect(TokenType.TOK_INTEGER_LITERAL); @@ -132,7 +132,7 @@ public void testUnexpectedToken() throws Exception { Reader reader = createReader(fileName, "int test\n08.1.5 \n 42 \"hallo\" \"hello world\""); Lexer lexer = new Lexer(reader); - lexer.expect(TokenType.TOK_KEYWORD); + lexer.expect(TokenType.TOK_TYPE_INT); lexer.expect(TokenType.TOK_IDENTIFIER); try {