From e9f12455332b4ccd4a4b7e7a521605231c3b30ce Mon Sep 17 00:00:00 2001 From: Ahmed Harmouche Date: Wed, 7 Aug 2024 15:10:47 +0200 Subject: [PATCH] Start work on SourceLocation for ASTNodes --- include/AST/AST.h | 19 +++++++++++++++++-- include/AST/PrinterASTVisitor.h | 1 + include/Lexer/Token.h | 14 ++++---------- lib/AST/PrinterASTVisitor.cpp | 9 ++++++++- lib/Lexer/Lexer.cpp | 28 ++++++++++++++++++++-------- lib/Lexer/Token.cpp | 4 ++-- lib/Parser/Parser.cpp | 19 +++++++------------ test/Lexer/LexerTest.cpp | 24 ++++++++++++------------ 8 files changed, 71 insertions(+), 47 deletions(-) diff --git a/include/AST/AST.h b/include/AST/AST.h index 1378ff7..3c5d02d 100644 --- a/include/AST/AST.h +++ b/include/AST/AST.h @@ -11,6 +11,16 @@ namespace shaderpulse { namespace ast { +struct SourceLocation { + SourceLocation() = default; + SourceLocation(int startLine, int startCol, int endLine, int endCol) : startLine(startLine), startCol(startCol), + endLine(endLine), endCol(endCol) { } + int startLine; + int startCol; + int endLine; + int endCol; +}; + enum UnaryOperator { Inc, Dec, Plus, Dash, Bang, Tilde }; enum BinaryOperator { @@ -51,8 +61,13 @@ enum AssignmentOperator { class ASTNode { public: + ASTNode(SourceLocation loc = SourceLocation()) : loc(loc) { } virtual ~ASTNode() = default; virtual void accept(ASTVisitor *visitor) = 0; + SourceLocation getSourceLocation() { return loc; } + +protected: + SourceLocation loc; }; class Expression : public ASTNode { @@ -608,10 +623,10 @@ class AssignmentExpression : public Statement { class FunctionDeclaration : public ExternalDeclaration { public: - FunctionDeclaration(std::unique_ptr returnType, const std::string &name, + FunctionDeclaration(SourceLocation location, std::unique_ptr returnType, const std::string &name, std::vector> params, std::unique_ptr body) - : returnType(std::move(returnType)), name(name), + : ASTNode(location), returnType(std::move(returnType)), name(name), params(std::move(params)), body(std::move(body)) {} void accept(ASTVisitor *visitor) override { visitor->visit(this); } diff --git a/include/AST/PrinterASTVisitor.h b/include/AST/PrinterASTVisitor.h index fc85683..212b8ef 100644 --- a/include/AST/PrinterASTVisitor.h +++ b/include/AST/PrinterASTVisitor.h @@ -49,6 +49,7 @@ class PrinterASTVisitor : public ASTVisitor { void print(const std::string &text); int indentationLevel = 0; std::set levels; + std::string loc(const SourceLocation &sourceLoc); }; }; // namespace ast diff --git a/include/Lexer/Token.h b/include/Lexer/Token.h index 0fb3421..2251b58 100644 --- a/include/Lexer/Token.h +++ b/include/Lexer/Token.h @@ -1,4 +1,5 @@ #pragma once +#include "AST/AST.h" #include #include @@ -6,13 +7,6 @@ namespace shaderpulse { namespace lexer { -struct SourceLocation { - SourceLocation() = default; - SourceLocation(int line, int col) : line(line), col(col) { } - int line; - int col; -}; - class NumericLiteral { public: virtual ~NumericLiteral() = 0; @@ -83,14 +77,14 @@ class Token { bool isDoubleConstant() const; NumericLiteral *getLiteralData() const; void setLiteralData(std::unique_ptr); - void setSourceLocation(SourceLocation loc); - SourceLocation getSourceLocation() const; + void setSourceLocation(ast::SourceLocation loc); + ast::SourceLocation getSourceLocation() const; void setRawData(const std::string&); std::string getRawData() const; private: TokenKind tokenKind; - SourceLocation sourceLoc; + ast::SourceLocation sourceLoc; std::string identifierName; std::string rawData; std::unique_ptr literalData; diff --git a/lib/AST/PrinterASTVisitor.cpp b/lib/AST/PrinterASTVisitor.cpp index e3e0403..3ab68de 100644 --- a/lib/AST/PrinterASTVisitor.cpp +++ b/lib/AST/PrinterASTVisitor.cpp @@ -2,6 +2,7 @@ #include "AST/Util.h" #include "AST/PrinterASTVisitor.h" #include +#include namespace shaderpulse { @@ -268,7 +269,7 @@ void PrinterASTVisitor::visit(DiscardStatement *discardStmt) { } void PrinterASTVisitor::visit(FunctionDeclaration *funcDecl) { - print("|-FunctionDeclaration: name=" + funcDecl->getName() + ", return type=" + funcDecl->getReturnType()->toString()); + print("|-FunctionDeclaration: name=" + funcDecl->getName() + " " + loc(funcDecl->getSourceLocation()) + ", return type=" + funcDecl->getReturnType()->toString()); indent(); print("|-Args:"); @@ -296,5 +297,11 @@ void PrinterASTVisitor::visit(CaseLabel *caseLabel) { resetIndent(); } +std::string PrinterASTVisitor::loc(const SourceLocation &sourceLoc) { + std::stringstream ssLoc; + ssLoc << "[line: " << sourceLoc.startLine << ", col:" << sourceLoc.startCol << "]->" << "[line: " << sourceLoc.endLine << ", col:" << sourceLoc.endCol << "]"; + return ssLoc.str(); +} + } // namespace ast } // namespace shaderpulse diff --git a/lib/Lexer/Lexer.cpp b/lib/Lexer/Lexer.cpp index 1663126..be57254 100644 --- a/lib/Lexer/Lexer.cpp +++ b/lib/Lexer/Lexer.cpp @@ -42,6 +42,11 @@ Lexer::lexCharacterStream() { } } + auto tok = std::make_unique(); + tok->setTokenKind(TokenKind::Eof); + tok->setSourceLocation(ast::SourceLocation(lineNum, col, lineNum, col)); + tokenStream.push_back(std::move(tok)); + return tokenStream; } @@ -109,7 +114,8 @@ bool Lexer::handleIdentifier(Error &error) { tok->setTokenKind(TokenKind::Identifier); } - tok->setSourceLocation(SourceLocation(lineNum, startCol)); + int endCol = col; + tok->setSourceLocation(ast::SourceLocation(lineNum, startCol, lineNum, endCol)); tok->setRawData(token); tokenStream.push_back(std::move(tok)); @@ -156,7 +162,8 @@ bool Lexer::handleHexLiteral(Error &error) { tok->setTokenKind(isUnsigned ? TokenKind::UnsignedIntegerConstant : TokenKind::IntegerConstant); - tok->setSourceLocation(SourceLocation(lineNum, startCol)); + int endCol = col; + tok->setSourceLocation(ast::SourceLocation(lineNum, startCol, lineNum, endCol)); tok->setRawData(literalConstant); tokenStream.push_back(std::move(tok)); @@ -206,7 +213,8 @@ bool Lexer::handleOctalLiteral(Error &error) { tok->setTokenKind(isUnsigned ? TokenKind::UnsignedIntegerConstant : TokenKind::IntegerConstant); - tok->setSourceLocation(SourceLocation(lineNum, startCol)); + int endCol = col; + tok->setSourceLocation(ast::SourceLocation(lineNum, startCol, lineNum, endCol)); tok->setRawData(literalConstant); tokenStream.push_back(std::move(tok)); @@ -268,7 +276,8 @@ bool Lexer::handleDecimalOrFloatLiteral(Error &error) { std::make_unique(std::stof(literalConstant))); } - tok->setSourceLocation(SourceLocation(lineNum, startCol)); + int endCol = col; + tok->setSourceLocation(ast::SourceLocation(lineNum, startCol, lineNum, endCol)); tok->setRawData(literalConstant); tokenStream.push_back(std::move(tok)); return true; @@ -287,7 +296,8 @@ bool Lexer::handleDecimalOrFloatLiteral(Error &error) { tok->setTokenKind(TokenKind::IntegerConstant); } - tok->setSourceLocation(SourceLocation(lineNum, startCol)); + int endCol = col; + tok->setSourceLocation(ast::SourceLocation(lineNum, startCol, lineNum, endCol)); tok->setRawData(literalConstant); tokenStream.push_back(std::move(tok)); return true; @@ -343,7 +353,8 @@ bool Lexer::handleExponentialForm(std::string &literalConstant, Error &error) { tok->setTokenKind(TokenKind::FloatConstant); } - tok->setSourceLocation(SourceLocation(lineNum, startCol)); + int endCol = col; + tok->setSourceLocation(ast::SourceLocation(lineNum, startCol, lineNum, endCol)); tok->setRawData(literalConstant); tokenStream.push_back(std::move(tok)); @@ -586,8 +597,9 @@ bool Lexer::peekExponentialPart() { void Lexer::addToken(TokenKind kind) { auto tok = std::make_unique(); tok->setTokenKind(kind); - tok->setSourceLocation(SourceLocation(lineNum, col)); - std::string rawData = characters.substr(savedCharPos, curCharPos - savedCharPos + 1); + int tokenLength = curCharPos - savedCharPos + 1; + tok->setSourceLocation(ast::SourceLocation(lineNum, col - tokenLength, lineNum, col)); + std::string rawData = characters.substr(savedCharPos, tokenLength); tok->setRawData(rawData); tokenStream.push_back(std::move(tok)); } diff --git a/lib/Lexer/Token.cpp b/lib/Lexer/Token.cpp index a60e82d..fa1e9fa 100644 --- a/lib/Lexer/Token.cpp +++ b/lib/Lexer/Token.cpp @@ -40,11 +40,11 @@ void Token::setLiteralData(std::unique_ptr literalData) { this->literalData = std::move(literalData); } -void Token::setSourceLocation(SourceLocation loc) { +void Token::setSourceLocation(ast::SourceLocation loc) { sourceLoc = loc; } -SourceLocation Token::getSourceLocation() const { +ast::SourceLocation Token::getSourceLocation() const { return sourceLoc; } diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp index 41ff70a..47b7e1d 100644 --- a/lib/Parser/Parser.cpp +++ b/lib/Parser/Parser.cpp @@ -61,6 +61,7 @@ std::unique_ptr Parser::parseFunctionDeclaration() { return nullptr; } + auto startLoc = curToken->getSourceLocation(); advanceToken(); auto returnType = std::move(type); @@ -78,9 +79,9 @@ std::unique_ptr Parser::parseFunctionDeclaration() { advanceToken(); if (auto body = parseStatement()) { - return std::make_unique( - std::move(returnType), functionName, std::move(params), - std::move(body)); + auto endLoc = curToken->getSourceLocation(); + auto spanLoc = SourceLocation(startLoc.startLine, startLoc.startCol, endLoc.endLine, endLoc.endCol); + return std::make_unique(spanLoc, std::move(returnType), functionName, std::move(params), std::move(body)); } else { return nullptr; } @@ -1466,10 +1467,7 @@ std::unique_ptr Parser::parseConditionalExpression() { } void Parser::advanceToken() { - if ((cursor > -1) && ((size_t)cursor >= (tokenStream.size() - 1))) { - auto tok = std::make_unique(); - tok->setTokenKind(TokenKind::Eof); - tokenStream.push_back(std::move(tok)); + if ((cursor > -1) && (size_t)cursor >= (tokenStream.size())) { curToken = tokenStream.back().get(); } else { curToken = tokenStream[++cursor].get(); @@ -1478,17 +1476,14 @@ void Parser::advanceToken() { const Token *Parser::peek(int k) { if ((size_t)(cursor + k) >= tokenStream.size()) { - auto tok = std::make_unique(); - tok->setTokenKind(TokenKind::Eof); - tokenStream.push_back(std::move(tok)); - return tokenStream[tokenStream.size() - 1].get(); + return tokenStream.back().get(); } else { return tokenStream[cursor + k].get(); } } void Parser::reportError(ParserErrorKind kind, const std::string &msg) { - std::cout << msg << ", at line: " << curToken->getSourceLocation().line << ", col: " << curToken->getSourceLocation().col << std::endl; + std::cout << msg << ", at line: " << curToken->getSourceLocation().startLine << ", col: " << curToken->getSourceLocation().startCol << std::endl; error = ParserError(kind, msg); } diff --git a/test/Lexer/LexerTest.cpp b/test/Lexer/LexerTest.cpp index fb28ecc..710af1b 100644 --- a/test/Lexer/LexerTest.cpp +++ b/test/Lexer/LexerTest.cpp @@ -52,9 +52,9 @@ TEST(LexerTest, IntegerLiterals) { auto& tokens = (*resp).get(); - EXPECT_EQ(tokens.size(), expectedValues.size()); + EXPECT_EQ(tokens.size()-1, expectedValues.size()); - for (size_t i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size()-1; i++) { if (i > 2) { EXPECT_EQ(tokens[i].get()->getTokenKind(), TokenKind::UnsignedIntegerConstant); EXPECT_EQ(dynamic_cast(tokens.at(i)->getLiteralData())->getVal(), expectedValues[i]); @@ -81,9 +81,9 @@ TEST(LexerTest, FloatLiterals) { auto& tokens = (*resp).get(); - EXPECT_EQ(tokens.size(), expectedValues.size()); + EXPECT_EQ(tokens.size()-1, expectedValues.size()); - for (size_t i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size()-1; i++) { EXPECT_EQ(tokens[i].get()->getTokenKind(), TokenKind::FloatConstant); EXPECT_EQ(dynamic_cast(tokens.at(i)->getLiteralData())->getVal(), expectedValues[i]); } @@ -105,9 +105,9 @@ TEST(LexerTest, DoubleLiterals) { auto& tokens = (*resp).get(); - EXPECT_EQ(tokens.size(), expectedValues.size()); + EXPECT_EQ(tokens.size()-1, expectedValues.size()); - for (size_t i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size()-1; i++) { EXPECT_EQ(tokens[i].get()->getTokenKind(), TokenKind::DoubleConstant); EXPECT_EQ(dynamic_cast(tokens.at(i)->getLiteralData())->getVal(), expectedValues[i]); } @@ -129,9 +129,9 @@ TEST(LexerText, Identifiers) { auto& tokens = (*resp).get(); - EXPECT_EQ(tokens.size(), identifiersList.size()); + EXPECT_EQ(tokens.size()-1, identifiersList.size()); - for (size_t i = 0; i < tokens.size(); i++) { + for (size_t i = 0; i < tokens.size()-1; i++) { EXPECT_EQ(tokens[i].get()->getIdentifierName(), identifiersList[i]); EXPECT_EQ(tokens[i].get()->getTokenKind(), TokenKind::Identifier); } @@ -147,9 +147,9 @@ TEST(LexerTest, Keywords) { auto& tokens = (*resp).get(); - EXPECT_EQ(tokens.size(), expectedTokenKinds.size()); + EXPECT_EQ(tokens.size()-1, expectedTokenKinds.size()); - for (size_t i = 0; i < tokens.size(); ++i) { + for (size_t i = 0; i < tokens.size()-1; ++i) { EXPECT_EQ(tokens[i]->getTokenKind(), expectedTokenKinds[i]); } } @@ -164,9 +164,9 @@ TEST(LexerTest, Punctuators) { auto& tokens = (*resp).get(); - EXPECT_EQ(tokens.size(), expectedTokenKinds.size()); + EXPECT_EQ(tokens.size()-1, expectedTokenKinds.size()); - for (size_t i = 0; i < tokens.size(); ++i) { + for (size_t i = 0; i < tokens.size()-1; ++i) { EXPECT_EQ(tokens[i]->getTokenKind(), expectedTokenKinds[i]); } }