Skip to content

Commit

Permalink
refactored and cleaned up a lot of code
Browse files Browse the repository at this point in the history
  • Loading branch information
Raekye committed Jul 22, 2019
1 parent 2d20d69 commit 92a3501
Show file tree
Hide file tree
Showing 9 changed files with 258 additions and 633 deletions.
133 changes: 48 additions & 85 deletions midori/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,46 +17,21 @@ int test_regex_engine() {
return 0;
}

int test_parser() {
Parser p;
/*
p.add_token("A", std::unique_ptr<RegexAST>(new RegexASTLiteral('a')));
p.add_production("n", { "n", "n" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
int test_parser0() {
ProductionHandler fn = [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("n", { "A" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
*/
};
Parser p;
p.add_token("EQUALS", std::unique_ptr<RegexAST>(new RegexASTLiteral('=')));
p.add_token("X", std::unique_ptr<RegexAST>(new RegexASTLiteral('x')));
p.add_token("STAR", std::unique_ptr<RegexAST>(new RegexASTLiteral('*')));
p.add_production("s", { "n" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("n", { "v", "EQUALS", "e" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("n", { "e" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("e", { "v" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("v", { "X" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("v", { "STAR", "e" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
});
p.add_production("s", { "n" }, fn);
p.add_production("n", { "v", "EQUALS", "e" }, fn);
p.add_production("n", { "e" }, fn);
p.add_production("e", { "v" }, fn);
p.add_production("v", { "X" }, fn);
p.add_production("v", { "STAR", "e" }, fn);
p.generate("s");
std::stringstream ss;
ss << "*x=x";
Expand All @@ -65,63 +40,49 @@ int test_parser() {
return 0;
}

int test_regex_parser() {
/*
std::unique_ptr<Parser> p = RegexParserGenerator::make();
for (int i = 0; i < 2; i++) {
std::stringstream ss;
ss << "a(bc){3,4}\\[|def[ghi\\t0-9\\-]+\\000000x2dz(\\x2d)d";
//ss << "[x-zabc-f]";
std::unique_ptr<Match> m = p->parse(&ss);
MatchedNonterminal* n = dynamic_cast<MatchedNonterminal*>(m.get());
ParserRegexAST* r = dynamic_cast<ParserRegexAST*>(n->value.get());
RegexASTPrinter printer;
r->regex->accept(&printer);
}
*/
int test_parser1() {
ProductionHandler fn = [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
};
Parser p;
p.add_token("A", std::unique_ptr<RegexAST>(new RegexASTLiteral('a')));
p.add_production("n", { "n", "n" }, fn);
p.add_production("n", { "A" }, fn);
p.generate("n");
std::stringstream ss;
ss << "aaa";
FileInputStream fis(&ss);
p.parse(&fis);
return 0;
}

/*
int test_parser() {
std::string indents = "";
ProductionHandler fn = [&indents, &fn](Match* m) -> void {
std::stack<Match*> s;
s.push(m);
while (s.size() > 0) {
m = s.top();
s.pop();
if (MatchedTerminal* mt = dynamic_cast<MatchedTerminal*>(m)) {
mdk::logf("%s- terminal %s, %s\n", indents.c_str(), mt->token->tag.c_str(), mt->token->lexeme.c_str());
} else if (MatchedNonterminal* mnt = dynamic_cast<MatchedNonterminal*>(m)) {
mdk::logf("%s- nonterminal ", indents.c_str());
Parser::debug_production(mnt->production);
indents += " ";
for (std::unique_ptr<Match>& x : mnt->terms) {
fn(x.get());
}
indents = indents.substr(0, indents.length() - 2);
}
}
int test_parser2() {
ProductionHandler fn = [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
(void) m;
return nullptr;
};
Parser parser;
parser.set_start("s");
parser.add_token("STAR", "\\*");
parser.add_token("X", "x");
parser.add_token("EQUALS", "=");
parser.add_production("s", { "n" }, fn);
parser.add_production("n", { "v", "EQUALS", "e" }, fn);
parser.add_production("n", { "e" }, fn);
parser.add_production("e", { "v" }, fn);
parser.add_production("v", { "X" }, fn);
parser.add_production("v", { "STAR", "e" }, fn);
Parser p;
p.add_token("LB", std::unique_ptr<RegexAST>(new RegexASTLiteral('[')));
p.add_token("RB", std::unique_ptr<RegexAST>(new RegexASTLiteral(']')));
p.add_token("DASH", std::unique_ptr<RegexAST>(new RegexASTLiteral('-')));
p.add_token("ANY", std::unique_ptr<RegexAST>(RegexASTGroup::make(true, { '[', '[', ']', ']' })));
p.add_production("class", { "LB", "class_contents", "RB" }, fn);
p.add_production("class_contents", { "class_element" }, fn);
p.add_production("class_contents", { "class_element", "class_contents" }, fn);
p.add_production("class_element", { "literal" }, fn);
p.add_production("class_element", { "class_element", "DASH", "literal" }, fn);
p.add_production("literal", { "DASH" }, fn);
p.add_production("literal", { "ANY" }, fn);
p.generate("class");
std::stringstream ss;
ss << "x=*x";
parser.parse(&ss);
ss << "[-a-c-d-]";
FileInputStream fis(&ss);
p.parse(&fis);
return 0;
}

/*
int test_generator() {
std::fstream f("src/parser.txt", std::fstream::in);
std::unique_ptr<Parser> p = Parser::from_file(&f);
Expand Down Expand Up @@ -163,8 +124,10 @@ int main() {
ULong x = ~0;
std::cout << "-1 is " << x << std::endl;
test_interval_tree();
test_parser();
test_regex_engine();
test_parser0();
test_parser2();
test_parser1();
//test_generator();
return 0;
}
12 changes: 12 additions & 0 deletions midori/src/midori/finite_automata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@ RegexDFAState::RegexDFAState(UInt id) : id(id) {
std::memset(this->_transitions, 0, RegexDFAState::OPTIMIZED_CHARS * sizeof(RegexDFAState*));
}

RegexDFAState* RegexDFAState::next(UInt ch) {
if (ch < RegexDFAState::OPTIMIZED_CHARS) {
return this->_transitions[ch];
}
std::unique_ptr<Tree::SearchList> l = this->transitions.find(Tree::Interval(ch, ch));
assert(l->size() <= 1);
if (l->size() > 0) {
return l->front().second;
}
return nullptr;
}

RegexNFAState::RegexNFAState(UInt id) : id(id), terminal(false) {
return;
}
Expand Down
1 change: 1 addition & 0 deletions midori/src/midori/finite_automata.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class RegexDFAState {
Tree transitions;

RegexDFAState(UInt);
RegexDFAState* next(UInt);
};

class RegexNFAState {
Expand Down
46 changes: 33 additions & 13 deletions midori/src/midori/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#include <iostream>
#include "utf8.h"

std::string const Lexer::TOKEN_END = "$END";
std::string const Lexer::TOKEN_BAD = "$BAD";

IInputStream::~IInputStream() {
return;
}
Expand All @@ -13,7 +16,10 @@ FileInputStream::FileInputStream(std::istream* file) : file(file) {
Long FileInputStream::get() {
UInt ch = this->file->get();
if (!this->file->good()) {
return -1;
if (this->file->eof()) {
return Lexer::CHAR_EOF;
}
return Lexer::CHAR_BAD;
}
return ch;
}
Expand All @@ -24,12 +30,12 @@ VectorInputStream::VectorInputStream(std::vector<UInt> v) : v(v), pos(0) {

Long VectorInputStream::get() {
if (this->pos >= this->v.size()) {
return -1;
return Lexer::CHAR_EOF;
}
return this->v.at(this->pos++);
}

Lexer::Lexer() : current_state(nullptr), buffer_pos(0) {
Lexer::Lexer() : current_state(nullptr), buffer_pos(0), location(0, 0) {
return;
}

Expand Down Expand Up @@ -131,6 +137,8 @@ void Lexer::generate() {
void Lexer::reset() {
this->buffer.clear();
this->buffer_pos = 0;
this->location.line = 0;
this->location.column = 0;
}

void Lexer::add_rule(std::string rule, std::unique_ptr<RegexAST> regex) {
Expand All @@ -144,6 +152,7 @@ std::unique_ptr<Token> Lexer::scan(IInputStream* in) {
std::vector<std::string> matched_tags;
std::string matched_str = "";
UInt matched_buffer_pos = this->buffer_pos;
LocationInfo matched_location = this->location;
std::string found_buffer = "";
std::unique_ptr<Token> t;
while (true) {
Expand All @@ -152,24 +161,34 @@ std::unique_ptr<Token> Lexer::scan(IInputStream* in) {
matched_tags = this->current_state->terminals;
matched_str.append(found_buffer);
matched_buffer_pos = this->buffer_pos;
matched_location = this->location;
found_buffer = "";
}
Long ch = this->read(in);
RegexDFAState* next = nullptr;
if (ch >= 0) {
if (ch < RegexDFAState::OPTIMIZED_CHARS) {
next = this->current_state->_transitions[ch];
} else {
std::unique_ptr<RegexDFAState::Tree::SearchList> l = this->current_state->transitions.find(RegexDFAState::Tree::Interval(ch, ch));
assert(l->size() <= 1);
if (l->size() > 0) {
next = l->front().second;
if (ch == '\n') {
this->location.line++;
this->location.column = 0;
} else {
this->location.column++;
}
if (ch < 0) {
if (matched) {
t.reset(new Token(matched_tags, matched_str, matched_location));
break;
}
if (ch == Lexer::CHAR_EOF) {
if (this->buffer_pos == matched_buffer_pos) {
t.reset(new Token({ Lexer::TOKEN_END }, "", this->location));
break;
}
}
t.reset(new Token({ Lexer::TOKEN_BAD }, found_buffer, matched_location));
break;
}
RegexDFAState* next = this->current_state->next(ch);
if (next == nullptr) {
if (matched) {
t.reset(new Token(matched_tags, matched_str, LocationInfo(0, 0)));
t.reset(new Token(matched_tags, matched_str, matched_location));
}
break;
}
Expand All @@ -178,6 +197,7 @@ std::unique_ptr<Token> Lexer::scan(IInputStream* in) {
this->current_state = next;
}
this->buffer_pos = matched_buffer_pos;
this->location = matched_location;
return t;
}

Expand Down
22 changes: 15 additions & 7 deletions midori/src/midori/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@ class VectorInputStream : public IInputStream {
};

class Lexer {
public:
static std::string const TOKEN_END;
static std::string const TOKEN_BAD;
static const Long CHAR_EOF = -1;
static const Long CHAR_BAD = -2;

Lexer();

void add_rule(std::string, std::unique_ptr<RegexAST>);
std::unique_ptr<Token> scan(IInputStream*);
void generate();
void reset();

private:
std::vector<std::string> rules;
std::vector<std::unique_ptr<RegexAST>> rules_regex;
Expand All @@ -64,14 +77,9 @@ class Lexer {
std::vector<UInt> buffer;
UInt buffer_pos;

Long read(IInputStream*);
public:
Lexer();
LocationInfo location;

void add_rule(std::string, std::unique_ptr<RegexAST>);
std::unique_ptr<Token> scan(IInputStream*);
void generate();
void reset();
Long read(IInputStream*);
};

#endif /* MIDORI_LEXER_H_INCLUDED */
Loading

0 comments on commit 92a3501

Please sign in to comment.