-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement simple linear assembler (#69)
- Loading branch information
Showing
18 changed files
with
502 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,4 +8,4 @@ build/ | |
# ide stuff | ||
.idea | ||
cmake-build* | ||
include/ChaiVM/interpreter/autogen | ||
include/ChaiVM/interpreter/autogen |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#pragma once | ||
|
||
#ifndef yyFlexLexer | ||
#include <FlexLexer.h> | ||
#endif | ||
|
||
#include <filesystem> | ||
#include <fstream> | ||
#include <iostream> | ||
|
||
namespace front::assembler { | ||
|
||
class AsmLex final : public yyFlexLexer { | ||
public: | ||
enum LexemType { INTEGER, FLOAT, IDENTIFIER, COMMA, UNKNOWN }; | ||
|
||
class Lexem { | ||
public: | ||
Lexem(LexemType t) : type(t) {} | ||
LexemType type; | ||
virtual ~Lexem() {} | ||
}; | ||
class Int final : public Lexem { | ||
public: | ||
Int(LexemType t, int64_t v) : Lexem(t), value(v) {} | ||
int64_t value; | ||
~Int() override {} | ||
}; | ||
class Float final : public Lexem { | ||
public: | ||
Float(LexemType t, double v) : Lexem(t), value(v) {} | ||
double value; | ||
~Float() override {} | ||
}; | ||
class Identifier final : public Lexem { | ||
public: | ||
Identifier(LexemType t, std::string v) : Lexem(t), value(v) {} | ||
std::string value; | ||
~Identifier() override {} | ||
}; | ||
class Coma final : public Lexem { | ||
public: | ||
Coma(LexemType t) : Lexem(t) {} | ||
~Coma() override {} | ||
}; | ||
class Unknown final : public Lexem { | ||
public: | ||
Unknown(LexemType t) : Lexem(t) {} | ||
~Unknown() override {} | ||
}; | ||
|
||
int yylex() override; | ||
|
||
std::unique_ptr<Lexem> &nextLexem() { | ||
this->yylex(); | ||
return currentLexem_; | ||
} | ||
std::unique_ptr<Lexem> ¤tLexem() { return currentLexem_; } | ||
|
||
private: | ||
std::unique_ptr<Lexem> currentLexem_; | ||
|
||
int processInt() { | ||
currentLexem_ = | ||
std::make_unique<Int>(LexemType::INTEGER, std::atol(yytext)); | ||
std::string a = yytext; | ||
return 0; | ||
} | ||
int processFloat() { | ||
currentLexem_ = | ||
std::make_unique<Float>(LexemType::FLOAT, std::atof(yytext)); | ||
std::string a = yytext; | ||
return 0; | ||
} | ||
int processIdentifier() { | ||
currentLexem_ = | ||
std::make_unique<Identifier>(LexemType::IDENTIFIER, yytext); | ||
std::string a = yytext; | ||
return 0; | ||
} | ||
int processComma() { | ||
currentLexem_ = std::make_unique<Coma>(LexemType::COMMA); | ||
std::string a = yytext; | ||
return 0; | ||
} | ||
int processUnknown() { | ||
currentLexem_ = std::make_unique<Unknown>(LexemType::UNKNOWN); | ||
std::string a = yytext; | ||
return 1; | ||
} | ||
}; | ||
|
||
} // namespace front::assembler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
#pragma once | ||
|
||
#include <sstream> | ||
|
||
#include "ChaiVM/interpreter/autogen/operations.hpp" | ||
#include "ChaiVM/utils/file-format/chai-file.hpp" | ||
#include "ChaiVM/utils/instr2Raw.hpp" | ||
#include "frontend/assembler/asmlex.hpp" | ||
#include "op-string.hpp" | ||
|
||
namespace front::assembler { | ||
|
||
class AssembleError : public std::runtime_error { | ||
public: | ||
AssembleError(std::string description, size_t line) | ||
: std::runtime_error("Assemble error at line " + std::to_string(line) + | ||
": " + description) {} | ||
}; | ||
|
||
class Assembler final { | ||
public: | ||
Assembler(const std::filesystem::path &inPath, | ||
const std::filesystem::path &outPath) | ||
: inputFile_(inPath, std::ios::in), outPath_(outPath) { | ||
lex_.switch_streams(&inputFile_); | ||
} | ||
|
||
/* | ||
* @todo #41:90min Implement adequate processing of the main function | ||
*/ | ||
void assemble() { | ||
processMain(); | ||
chaiFile_.toFile(outPath_); | ||
} | ||
|
||
private: | ||
AsmLex lex_; | ||
chai::utils::fileformat::ChaiFile chaiFile_; | ||
std::ifstream inputFile_; | ||
std::filesystem::path outPath_; | ||
|
||
/* | ||
* @todo #41:90min Refactor this function. Or maybe it is better to kill | ||
* myself? | ||
*/ | ||
void processMain() { | ||
lex_.nextLexem(); | ||
checkError(); | ||
while (lex_.currentLexem()->type == AsmLex::IDENTIFIER) { | ||
checkError(); | ||
processInstruction(); | ||
lex_.nextLexem(); | ||
if (lex_.currentLexem()->type == AsmLex::IDENTIFIER && | ||
OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value) == chai::interpreter::Ret) { | ||
processInstruction(); | ||
break; | ||
} | ||
} | ||
} | ||
void processInstruction() { | ||
chai::interpreter::Operation op = OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
if (op == chai::interpreter::Inv) { | ||
throw AssembleError("Invalid instruction", lex_.lineno()); | ||
} | ||
switch (opToFormat(op)) { | ||
case chai::interpreter::N: | ||
processN(); | ||
break; | ||
case chai::interpreter::R: | ||
processR(); | ||
break; | ||
case chai::interpreter::RR: | ||
processRR(); | ||
break; | ||
case chai::interpreter::I: | ||
processI(); | ||
break; | ||
case chai::interpreter::RI: | ||
processRI(); | ||
break; | ||
case chai::interpreter::Unknown: | ||
default: | ||
throw AssembleError("Unknown instruction type", lex_.lineno()); | ||
break; | ||
} | ||
} | ||
void processN() { | ||
chai::interpreter::Operation op = OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
chaiFile_.addInstr(chai::utils::instr2Raw(op, 0, 0)); | ||
} | ||
void processR() { | ||
chai::interpreter::Operation op = OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
chai::interpreter::RegisterId regId = processReg(); | ||
chaiFile_.addInstr(chai::utils::instr2Raw(op, regId, 0)); | ||
} | ||
void processRR() { | ||
chai::interpreter::Operation op = OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
chai::interpreter::RegisterId reg1Id = processReg(); | ||
expectComma(); | ||
chai::interpreter::RegisterId reg2Id = processReg(); | ||
chaiFile_.addInstr(chai::utils::instr2Raw(op, reg1Id, reg2Id)); | ||
} | ||
void processI() { | ||
chai::interpreter::Operation op = OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
lex_.nextLexem(); | ||
if (lex_.currentLexem()->type == AsmLex::INTEGER) { | ||
chaiFile_.addWithConst( | ||
op, static_cast<int64_t>( | ||
static_cast<AsmLex::Int *>(lex_.currentLexem().get()) | ||
->value)); | ||
} else if (lex_.currentLexem()->type == AsmLex::FLOAT) { | ||
chaiFile_.addWithConst( | ||
op, (static_cast<AsmLex::Float *>(lex_.currentLexem().get()) | ||
->value)); | ||
} else { | ||
throw AssembleError("Unknown instruction type", lex_.lineno()); | ||
} | ||
} | ||
void processRI() { | ||
chai::interpreter::Operation op = OpString( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
chai::interpreter::RegisterId regId = processReg(); | ||
expectComma(); | ||
lex_.nextLexem(); | ||
if (lex_.currentLexem()->type == AsmLex::INTEGER) { | ||
chaiFile_.addInstr(chai::utils::inst2RawRI( | ||
op, regId, | ||
static_cast<int64_t>( | ||
static_cast<AsmLex::Int *>(lex_.currentLexem().get()) | ||
->value))); | ||
} else if (lex_.currentLexem()->type == AsmLex::FLOAT) { | ||
chaiFile_.addInstr(chai::utils::inst2RawRI( | ||
op, regId, | ||
static_cast<AsmLex::Float *>(lex_.currentLexem().get()) | ||
->value)); | ||
} else { | ||
throw AssembleError("Unknown instruction type", lex_.lineno()); | ||
} | ||
} | ||
|
||
chai::interpreter::RegisterId processReg() { | ||
lex_.nextLexem(); | ||
if (lex_.currentLexem()->type != AsmLex::IDENTIFIER) { | ||
throw AssembleError("Expected register", lex_.lineno()); | ||
} | ||
return RegNameToRegId( | ||
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get()) | ||
->value); | ||
} | ||
|
||
void checkError() { | ||
if (lex_.currentLexem()->type == AsmLex::UNKNOWN) { | ||
throw AssembleError("Unknown lexem", lex_.lineno()); | ||
} | ||
} | ||
void expectComma() { | ||
lex_.nextLexem(); | ||
if (lex_.currentLexem()->type != AsmLex::COMMA) { | ||
throw AssembleError("Expected comma", lex_.lineno()); | ||
} | ||
} | ||
/* | ||
* @todo #41:90min Implement it in OpString class. Also rename OpString | ||
* class | ||
*/ | ||
chai::interpreter::OperationFormat | ||
opToFormat(chai::interpreter::Operation op) { | ||
return chai::interpreter::OP_TO_FORMAT[op]; | ||
} | ||
chai::interpreter::RegisterId RegNameToRegId(std::string regName) { | ||
chai::interpreter::RegisterId regId; | ||
if (regName.length() > 1 && regName[0] == 'r') { | ||
std::string digits = regName.substr(1); | ||
std::istringstream iss(digits); | ||
if (!(iss >> regId)) { | ||
throw AssembleError("Invalid register number", lex_.lineno()); | ||
} | ||
} else { | ||
throw AssembleError("Invalid register format", lex_.lineno()); | ||
} | ||
return regId; | ||
} | ||
}; | ||
|
||
} // namespace front::assembler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#pragma once | ||
|
||
#include <string> | ||
|
||
#include "ChaiVM/interpreter/autogen/operations.hpp" | ||
#include "ChaiVM/interpreter/instruction.hpp" | ||
|
||
/* | ||
* @todo #41:90min Rename this class | ||
*/ | ||
class OpString { | ||
public: | ||
OpString(chai::interpreter::Operation val) : val_(val) {} | ||
explicit OpString(const std::string &strOp) { | ||
int ind = findString(strOp); | ||
if (ind == -1) { | ||
throw std::invalid_argument( | ||
"There is no enum value corresponding to the string: \"" + | ||
strOp + "\""); | ||
} | ||
val_ = static_cast<chai::interpreter::Operation>(ind); | ||
} | ||
|
||
std::string_view toString() const { | ||
return chai::interpreter::OP_TO_STR[val_]; | ||
} | ||
|
||
constexpr operator chai::interpreter::Operation() const { return val_; } | ||
constexpr auto operator<=>(const OpString &rhs) const = default; | ||
constexpr auto operator==(const chai::interpreter::Operation &rhs) const { | ||
return this->val_ == rhs; | ||
} | ||
|
||
private: | ||
constexpr int findString(const std::string &strOp) { | ||
for (unsigned int i = 0; i < sizeof(chai::interpreter::OP_TO_STR); | ||
i++) { | ||
if (strOp == chai::interpreter::OP_TO_STR[i]) { | ||
return i; | ||
} | ||
} | ||
return -1; | ||
} | ||
|
||
chai::interpreter::Operation val_ = chai::interpreter::Inv; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,4 @@ | ||
add_subdirectory(ChaiVM) | ||
set(DIRS ChaiVM frontend) | ||
foreach(DIR ${DIRS}) | ||
add_subdirectory(${DIR}) | ||
endforeach() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
set(DIRS assembler) | ||
foreach(DIR ${DIRS}) | ||
add_subdirectory(${DIR}) | ||
endforeach() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
find_package(FLEX REQUIRED) | ||
|
||
set(LEXER_DIR ${CMAKE_CURRENT_BINARY_DIR}) | ||
set(LEXER_OUT ${LEXER_DIR}/lexer.cpp) | ||
flex_target(scanner | ||
asmlex.l | ||
${LEXER_OUT} | ||
DEFINES_FILE ${LEXER_DIR}/lexer.hpp | ||
) | ||
|
||
add_library(front_asm STATIC) | ||
target_link_libraries(front_asm PRIVATE | ||
chai_include | ||
) | ||
target_sources(front_asm PRIVATE | ||
assembler.cpp | ||
${FLEX_scanner_OUTPUTS} | ||
) | ||
target_include_directories(front_asm PUBLIC | ||
${LEXER_DIR} | ||
) |
Oops, something went wrong.
ee0e232
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puzzle
41-6e48b51c
discovered ininclude/frontend/assembler/assembler.hpp
) and submitted as #70. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.ee0e232
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puzzle
41-4dc1c56c
discovered ininclude/frontend/assembler/assembler.hpp
) and submitted as #71. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.ee0e232
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puzzle
41-a25bae73
discovered ininclude/frontend/assembler/assembler.hpp
) and submitted as #72. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.ee0e232
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puzzle
41-1dc4ad84
discovered ininclude/frontend/assembler/op-string.hpp
) and submitted as #73. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.ee0e232
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puzzle
41-a7e93de4
discovered intools/opcode2operation-generator.py
) and submitted as #74. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.ee0e232
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puzzle
41-08341c3f
discovered intools/opcode2operation-generator.py
) and submitted as #75. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.