Skip to content

Commit

Permalink
Implement simple linear assembler (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
c71n93 authored Dec 19, 2023
1 parent 0de8229 commit ee0e232
Show file tree
Hide file tree
Showing 18 changed files with 502 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ build/
# ide stuff
.idea
cmake-build*
include/ChaiVM/interpreter/autogen
include/ChaiVM/interpreter/autogen
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.26)
project(ChaiVM)
set(CMAKE_CXX_STANDARD 20)

set(COMPILER_WARNINGS "-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers")
set(COMPILER_WARNINGS "-Wall -Wextra -Wpedantic -Werror -Wno-missing-field-initializers -Wno-sign-compare")
set(CMAKE_C_FLAGS ${COMPILER_WARNINGS})
set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${COMPILER_WARNINGS}")
set(CMAKE_C_FLAGS_RELEASE "-O2 ${COMPILER_WARNINGS}")
Expand Down Expand Up @@ -31,9 +31,8 @@ target_include_directories(chai_include INTERFACE
include
)

include_directories(include)

add_executable(ChaiVM src/main.cpp)
target_link_libraries(ChaiVM PUBLIC
chai_include
chai_interpreter
)
93 changes: 93 additions & 0 deletions include/frontend/assembler/asmlex.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#pragma once

#ifndef yyFlexLexer
#include <FlexLexer.h>
#endif

#include <filesystem>
#include <fstream>
#include <iostream>

namespace front::assembler {

class AsmLex final : public yyFlexLexer {
public:
enum LexemType { INTEGER, FLOAT, IDENTIFIER, COMMA, UNKNOWN };

class Lexem {
public:
Lexem(LexemType t) : type(t) {}
LexemType type;
virtual ~Lexem() {}
};
class Int final : public Lexem {
public:
Int(LexemType t, int64_t v) : Lexem(t), value(v) {}
int64_t value;
~Int() override {}
};
class Float final : public Lexem {
public:
Float(LexemType t, double v) : Lexem(t), value(v) {}
double value;
~Float() override {}
};
class Identifier final : public Lexem {
public:
Identifier(LexemType t, std::string v) : Lexem(t), value(v) {}
std::string value;
~Identifier() override {}
};
class Coma final : public Lexem {
public:
Coma(LexemType t) : Lexem(t) {}
~Coma() override {}
};
class Unknown final : public Lexem {
public:
Unknown(LexemType t) : Lexem(t) {}
~Unknown() override {}
};

int yylex() override;

std::unique_ptr<Lexem> &nextLexem() {
this->yylex();
return currentLexem_;
}
std::unique_ptr<Lexem> &currentLexem() { return currentLexem_; }

private:
std::unique_ptr<Lexem> currentLexem_;

int processInt() {
currentLexem_ =
std::make_unique<Int>(LexemType::INTEGER, std::atol(yytext));
std::string a = yytext;
return 0;
}
int processFloat() {
currentLexem_ =
std::make_unique<Float>(LexemType::FLOAT, std::atof(yytext));
std::string a = yytext;
return 0;
}
int processIdentifier() {
currentLexem_ =
std::make_unique<Identifier>(LexemType::IDENTIFIER, yytext);
std::string a = yytext;
return 0;
}
int processComma() {
currentLexem_ = std::make_unique<Coma>(LexemType::COMMA);
std::string a = yytext;
return 0;
}
int processUnknown() {
currentLexem_ = std::make_unique<Unknown>(LexemType::UNKNOWN);
std::string a = yytext;
return 1;
}
};

} // namespace front::assembler
198 changes: 198 additions & 0 deletions include/frontend/assembler/assembler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#pragma once

#include <sstream>

#include "ChaiVM/interpreter/autogen/operations.hpp"
#include "ChaiVM/utils/file-format/chai-file.hpp"
#include "ChaiVM/utils/instr2Raw.hpp"
#include "frontend/assembler/asmlex.hpp"
#include "op-string.hpp"

namespace front::assembler {

class AssembleError : public std::runtime_error {
public:
AssembleError(std::string description, size_t line)
: std::runtime_error("Assemble error at line " + std::to_string(line) +
": " + description) {}
};

class Assembler final {
public:
Assembler(const std::filesystem::path &inPath,
const std::filesystem::path &outPath)
: inputFile_(inPath, std::ios::in), outPath_(outPath) {
lex_.switch_streams(&inputFile_);
}

/*
* @todo #41:90min Implement adequate processing of the main function
*/
void assemble() {
processMain();
chaiFile_.toFile(outPath_);
}

private:
AsmLex lex_;
chai::utils::fileformat::ChaiFile chaiFile_;
std::ifstream inputFile_;
std::filesystem::path outPath_;

/*
* @todo #41:90min Refactor this function. Or maybe it is better to kill
* myself?
*/
void processMain() {
lex_.nextLexem();
checkError();
while (lex_.currentLexem()->type == AsmLex::IDENTIFIER) {
checkError();
processInstruction();
lex_.nextLexem();
if (lex_.currentLexem()->type == AsmLex::IDENTIFIER &&
OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value) == chai::interpreter::Ret) {
processInstruction();
break;
}
}
}
void processInstruction() {
chai::interpreter::Operation op = OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
if (op == chai::interpreter::Inv) {
throw AssembleError("Invalid instruction", lex_.lineno());
}
switch (opToFormat(op)) {
case chai::interpreter::N:
processN();
break;
case chai::interpreter::R:
processR();
break;
case chai::interpreter::RR:
processRR();
break;
case chai::interpreter::I:
processI();
break;
case chai::interpreter::RI:
processRI();
break;
case chai::interpreter::Unknown:
default:
throw AssembleError("Unknown instruction type", lex_.lineno());
break;
}
}
void processN() {
chai::interpreter::Operation op = OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
chaiFile_.addInstr(chai::utils::instr2Raw(op, 0, 0));
}
void processR() {
chai::interpreter::Operation op = OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
chai::interpreter::RegisterId regId = processReg();
chaiFile_.addInstr(chai::utils::instr2Raw(op, regId, 0));
}
void processRR() {
chai::interpreter::Operation op = OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
chai::interpreter::RegisterId reg1Id = processReg();
expectComma();
chai::interpreter::RegisterId reg2Id = processReg();
chaiFile_.addInstr(chai::utils::instr2Raw(op, reg1Id, reg2Id));
}
void processI() {
chai::interpreter::Operation op = OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
lex_.nextLexem();
if (lex_.currentLexem()->type == AsmLex::INTEGER) {
chaiFile_.addWithConst(
op, static_cast<int64_t>(
static_cast<AsmLex::Int *>(lex_.currentLexem().get())
->value));
} else if (lex_.currentLexem()->type == AsmLex::FLOAT) {
chaiFile_.addWithConst(
op, (static_cast<AsmLex::Float *>(lex_.currentLexem().get())
->value));
} else {
throw AssembleError("Unknown instruction type", lex_.lineno());
}
}
void processRI() {
chai::interpreter::Operation op = OpString(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
chai::interpreter::RegisterId regId = processReg();
expectComma();
lex_.nextLexem();
if (lex_.currentLexem()->type == AsmLex::INTEGER) {
chaiFile_.addInstr(chai::utils::inst2RawRI(
op, regId,
static_cast<int64_t>(
static_cast<AsmLex::Int *>(lex_.currentLexem().get())
->value)));
} else if (lex_.currentLexem()->type == AsmLex::FLOAT) {
chaiFile_.addInstr(chai::utils::inst2RawRI(
op, regId,
static_cast<AsmLex::Float *>(lex_.currentLexem().get())
->value));
} else {
throw AssembleError("Unknown instruction type", lex_.lineno());
}
}

chai::interpreter::RegisterId processReg() {
lex_.nextLexem();
if (lex_.currentLexem()->type != AsmLex::IDENTIFIER) {
throw AssembleError("Expected register", lex_.lineno());
}
return RegNameToRegId(
static_cast<AsmLex::Identifier *>(lex_.currentLexem().get())
->value);
}

void checkError() {
if (lex_.currentLexem()->type == AsmLex::UNKNOWN) {
throw AssembleError("Unknown lexem", lex_.lineno());
}
}
void expectComma() {
lex_.nextLexem();
if (lex_.currentLexem()->type != AsmLex::COMMA) {
throw AssembleError("Expected comma", lex_.lineno());
}
}
/*
* @todo #41:90min Implement it in OpString class. Also rename OpString
* class
*/
chai::interpreter::OperationFormat
opToFormat(chai::interpreter::Operation op) {
return chai::interpreter::OP_TO_FORMAT[op];
}
chai::interpreter::RegisterId RegNameToRegId(std::string regName) {
chai::interpreter::RegisterId regId;
if (regName.length() > 1 && regName[0] == 'r') {
std::string digits = regName.substr(1);
std::istringstream iss(digits);
if (!(iss >> regId)) {
throw AssembleError("Invalid register number", lex_.lineno());
}
} else {
throw AssembleError("Invalid register format", lex_.lineno());
}
return regId;
}
};

} // namespace front::assembler
46 changes: 46 additions & 0 deletions include/frontend/assembler/op-string.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#pragma once

#include <string>

#include "ChaiVM/interpreter/autogen/operations.hpp"
#include "ChaiVM/interpreter/instruction.hpp"

/*
* @todo #41:90min Rename this class
*/
class OpString {
public:
OpString(chai::interpreter::Operation val) : val_(val) {}
explicit OpString(const std::string &strOp) {
int ind = findString(strOp);
if (ind == -1) {
throw std::invalid_argument(
"There is no enum value corresponding to the string: \"" +
strOp + "\"");
}
val_ = static_cast<chai::interpreter::Operation>(ind);
}

std::string_view toString() const {
return chai::interpreter::OP_TO_STR[val_];
}

constexpr operator chai::interpreter::Operation() const { return val_; }
constexpr auto operator<=>(const OpString &rhs) const = default;
constexpr auto operator==(const chai::interpreter::Operation &rhs) const {
return this->val_ == rhs;
}

private:
constexpr int findString(const std::string &strOp) {
for (unsigned int i = 0; i < sizeof(chai::interpreter::OP_TO_STR);
i++) {
if (strOp == chai::interpreter::OP_TO_STR[i]) {
return i;
}
}
return -1;
}

chai::interpreter::Operation val_ = chai::interpreter::Inv;
};
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
add_subdirectory(ChaiVM)
set(DIRS ChaiVM frontend)
foreach(DIR ${DIRS})
add_subdirectory(${DIR})
endforeach()
4 changes: 4 additions & 0 deletions src/frontend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
set(DIRS assembler)
foreach(DIR ${DIRS})
add_subdirectory(${DIR})
endforeach()
21 changes: 21 additions & 0 deletions src/frontend/assembler/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
find_package(FLEX REQUIRED)

set(LEXER_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(LEXER_OUT ${LEXER_DIR}/lexer.cpp)
flex_target(scanner
asmlex.l
${LEXER_OUT}
DEFINES_FILE ${LEXER_DIR}/lexer.hpp
)

add_library(front_asm STATIC)
target_link_libraries(front_asm PRIVATE
chai_include
)
target_sources(front_asm PRIVATE
assembler.cpp
${FLEX_scanner_OUTPUTS}
)
target_include_directories(front_asm PUBLIC
${LEXER_DIR}
)
Loading

6 comments on commit ee0e232

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on ee0e232 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 41-6e48b51c discovered in include/frontend/assembler/assembler.hpp) and submitted as #70. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on ee0e232 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 41-4dc1c56c discovered in include/frontend/assembler/assembler.hpp) and submitted as #71. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on ee0e232 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 41-a25bae73 discovered in include/frontend/assembler/assembler.hpp) and submitted as #72. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on ee0e232 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 41-1dc4ad84 discovered in include/frontend/assembler/op-string.hpp) and submitted as #73. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on ee0e232 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 41-a7e93de4 discovered in tools/opcode2operation-generator.py) and submitted as #74. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on ee0e232 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 41-08341c3f discovered in tools/opcode2operation-generator.py) and submitted as #75. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

Please sign in to comment.