Skip to content

Commit

Permalink
rewrite rules implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
Raekye committed Jul 25, 2019
1 parent 11e335d commit ea382b9
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Starting from `midori`, CMake is used.
Example build: `mkdir build && cd build && cmake .. && make && make test`.

- `madoka`: one of my first passes, arguably my first success I could call a "compiler". Pre 2014-summer
- `sayaka` (in progress): successor to `madoka/`, had ideas on what to do differently. The ideas were pre 2014-summer, most of the work on it is post 2014-summer
- `sayaka`: successor to `madoka/`, had ideas on what to do differently. The ideas were pre 2014-summer, most of the work on it is post 2014-summer
- `siyu`: hand written LL(1) regex parser, NFA state generator, DFA state generator, lexer-generator, and parser-generator
- `tk`: successor to `siyu/`, completed LALR(1) parser generator
- `midori`: successor to `tk/`, hmmmm
Expand Down
4 changes: 2 additions & 2 deletions midori/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ int test_parser2() {
int test_regex_engine() {
RegexEngine re;
//std::string pattern = "(abc){0,3}[a-zA-Z]|def.\\.[^a-zA-Z]?+-^\\n+[^\\t\\xff-\\u12345678]";
std::string pattern = "[-a-b-cd---]";
std::string pattern = "[-a-b-cd---]{3}";
std::unique_ptr<RegexAST> r = re.parse(pattern);
RegexASTPrinter printer;
r->accept(&printer);
Expand All @@ -130,7 +130,7 @@ int main() {
test_parser0();
test_parser2();
test_parser1();
test_regex_engine();
test_generator();
test_regex_engine();
return 0;
}
23 changes: 21 additions & 2 deletions midori/src/midori/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@ void Parser::add_skip(std::string symbol) {
}

void Parser::add_production(std::string target, std::vector<std::string> symbols, ProductionHandler handler) {
this->add_production(target, symbols, handler, nullptr);
}
void Parser::add_production(std::string target, std::vector<std::string> symbols, ProductionHandler handler, RewriteHandler rewrite) {
std::unique_ptr<Production> p(new Production);
p->target = target;
p->symbols = symbols;
p->handler = handler;
p->rewrite = rewrite;
this->nonterminals[target].push_back(p.get());
this->productions.push_back(std::move(p));
}
Expand Down Expand Up @@ -132,6 +136,11 @@ bool Parser::parse_advance(std::unique_ptr<Token> t, bool* accept) {
this->parse_stack_matches.pop();
mnt->terms[next_reduction->second->symbols.size() - i - 1] = std::move(m);
}
if (next_reduction->second->rewrite != nullptr) {
std::unique_ptr<MatchedNonterminal> transformed = next_reduction->second->rewrite(std::move(mnt));
this->pull_tokens(transformed.get());
return false;
}
if (next_reduction->second->handler != nullptr) {
mnt->value = next_reduction->second->handler(mnt.get());
}
Expand All @@ -158,6 +167,18 @@ bool Parser::parse_advance(std::unique_ptr<Token> t, bool* accept) {
return true;
}

void Parser::pull_tokens(Match* m) {
if (MatchedTerminal* mt = dynamic_cast<MatchedTerminal*>(m)) {
this->push_token(std::move(mt->token));
return;
}
MatchedNonterminal* mnt = dynamic_cast<MatchedNonterminal*>(m);
assert(mnt != nullptr);
for (std::vector<std::unique_ptr<Match>>::reverse_iterator it = mnt->terms.rbegin(); it != mnt->terms.rend(); it++) {
this->pull_tokens(it->get());
}
}

void Parser::generate_itemsets() {
assert(this->states.size() == 0);
std::unique_ptr<ItemSet> start(new ItemSet);
Expand Down Expand Up @@ -235,8 +256,6 @@ void Parser::generate_first_sets() {
while (changed) {
changed = false;
for (const std::unique_ptr<Production>& p : this->productions) {
mdk::printf("[debug] generating first set for\n");
Parser::debug_production(p.get());
std::set<std::string>& f = this->firsts[p->target];
if (Parser::production_is_epsilon(p.get())) {
//changed = changed || f.insert(Parser::EPSILON).second;
Expand Down
4 changes: 4 additions & 0 deletions midori/src/midori/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class MatchedNonterminal;
class ParserAST;

typedef std::function<std::unique_ptr<ParserAST>(MatchedNonterminal*)> ProductionHandler;
typedef std::function<std::unique_ptr<MatchedNonterminal>(std::unique_ptr<MatchedNonterminal>)> RewriteHandler;
typedef std::pair<Production*, Int> Item;

class ParserAST {
Expand All @@ -39,6 +40,7 @@ struct Production {
std::string target;
std::vector<std::string> symbols;
ProductionHandler handler;
RewriteHandler rewrite;
};

struct ItemSet {
Expand Down Expand Up @@ -84,6 +86,7 @@ class Parser {
void add_token(std::string, std::unique_ptr<RegexAST>);
void add_skip(std::string);
void add_production(std::string, std::vector<std::string>, ProductionHandler);
void add_production(std::string, std::vector<std::string>, ProductionHandler, RewriteHandler);
void generate(std::string);
std::unique_ptr<MatchedNonterminal> parse(IInputStream*);
void reset();
Expand Down Expand Up @@ -121,6 +124,7 @@ class Parser {
}

void push_token(std::unique_ptr<Token>);
void pull_tokens(Match*);
std::unique_ptr<Token> next_token(IInputStream*);

bool parse_advance(std::unique_ptr<Token>, bool*);
Expand Down
13 changes: 13 additions & 0 deletions midori/src/midori/regex_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,19 @@ std::unique_ptr<Parser> RegexEngine::make() {
Long l2 = std::stol(r2->str.c_str(), nullptr, 10);
return std::unique_ptr<ParserAST>(new ParserASTRange(l1, l2));
});
p->add_production("mul_range", { "LBRACE", "dec_int", "RBRACE" }, nullptr, [](std::unique_ptr<MatchedNonterminal> m) -> std::unique_ptr<MatchedNonterminal> {
std::unique_ptr<MatchedNonterminal> ret(new MatchedNonterminal);
LocationInfo loc;
std::unique_ptr<MatchedTerminal> t0(new MatchedTerminal(std::unique_ptr<Token>(new Token({ "DEC" }, "0", loc))));
std::unique_ptr<MatchedTerminal> t1(new MatchedTerminal(std::unique_ptr<Token>(new Token({ "COMMA" }, "", loc))));
ret->terms.push_back(std::move(m->terms.at(0)));
ret->terms.push_back(std::move(t0));
ret->terms.push_back(std::move(t1));
ret->terms.push_back(std::move(m->terms.at(1)));
ret->terms.push_back(std::move(m->terms.at(2)));
return ret;
});

p->add_production("dec_int", { "DEC", "dec_int" }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
std::string str1 = m->terminal(0)->token->lexeme;
std::string str2 = dynamic_cast<ParserASTString*>(m->nonterminal(1)->value.get())->str;
Expand Down

0 comments on commit ea382b9

Please sign in to comment.