Skip to content

Commit

Permalink
added parser precedence
Browse files Browse the repository at this point in the history
  • Loading branch information
Raekye committed Aug 7, 2019
1 parent 5b2330a commit e6da081
Show file tree
Hide file tree
Showing 4 changed files with 236 additions and 44 deletions.
163 changes: 126 additions & 37 deletions midori/src/midori/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#include "helper.h"
#include <utility>

Precedence const Precedence::UNDEFINED(0, Precedence::Associativity::NONE);
std::string const Parser::ROOT = "$root";
std::string const Parser::TOKEN_MIDORI = "#";
std::string const Parser::TOKEN_MIDORI = "$#";

ParserAST::~ParserAST() {
return;
Expand All @@ -15,13 +16,7 @@ Match::~Match() {
MatchedTerminal::MatchedTerminal(std::unique_ptr<Token> t) : token(std::move(t)) {
return;
}
MatchedNonterminal::MatchedNonterminal() : MatchedNonterminal(nullptr, 0) {
return;
}
MatchedNonterminal::MatchedNonterminal(Production* p) : MatchedNonterminal(p, p->symbols.size()) {
return;
}
MatchedNonterminal::MatchedNonterminal(Production* p, size_t n) : production(p) , terms(n), value(nullptr) {
MatchedNonterminal::MatchedNonterminal(Production* p) : production(p), terms((p == nullptr) ? 0 : p->symbols.size()), value(nullptr) {
return;
}

Expand All @@ -34,25 +29,62 @@ void Parser::add_skip(std::string symbol) {
this->lexer.add_skip(symbol);
}

void Parser::add_production(std::string target, std::vector<std::string> symbols, ProductionHandler handler) {
this->add_production(target, symbols, handler, nullptr);
Int Parser::add_production(std::string target, std::vector<std::string> symbols, ProductionHandler handler) {
return this->add_production(target, symbols, handler, nullptr);
}
void Parser::add_production(std::string target, std::vector<std::string> symbols, ProductionHandler handler, RewriteHandler rewrite) {
Int Parser::add_production(std::string target, std::vector<std::string> symbols, ProductionHandler handler, RewriteHandler rewrite) {
Int n = this->productions.size();
std::unique_ptr<Production> p(new Production);
p->index = this->productions.size();
p->index = n;
p->target = target;
p->symbols = symbols;
(void) p->precedence;
p->handler = handler;
p->rewrite = rewrite;
this->nonterminals[target].push_back(p.get());
this->productions.push_back(std::move(p));
return n;
}

bool Parser::set_precedence_class(std::string precedence, UInt level, Precedence::Associativity assoc) {
std::map<UInt, std::string>::iterator it = this->precedence_levels.find(level);
if (it != this->precedence_levels.end()) {
if (precedence != it->second) {
return false;
}
}
std::map<std::string, Precedence>::iterator it2 = this->precedence_classes.find(precedence);
if (it2 == this->precedence_classes.end()) {
this->precedence_classes.emplace(std::piecewise_construct, std::forward_as_tuple(precedence), std::forward_as_tuple(level, assoc));
} else {
this->precedence_levels.erase(it2->second.level);
it2->second.level = level;
it2->second.assoc = assoc;
}
this->precedence_levels[level] = precedence;
return true;
}

bool Parser::set_precedence(std::string symbol, std::string precedence) {
if (this->precedence_classes.find(precedence) == this->precedence_classes.end()) {
return false;
}
this->precedence[symbol] = precedence;
return true;
}

bool Parser::set_precedence(Int production, std::string precedence) {
if (this->precedence_classes.find(precedence) == this->precedence_classes.end()) {
return false;
}
Production* p = this->productions.at(production).get();
p->precedence = precedence;
return true;
}

void Parser::generate(Type type, std::string start) {
this->terminals.insert(Lexer::TOKEN_END);
this->add_production(Parser::ROOT, { start }, [](MatchedNonterminal* m) -> std::unique_ptr<ParserAST> {
return std::move(m->value);
});
this->add_production(Parser::ROOT, { start }, nullptr);
this->lexer.generate();
this->generate_first_sets();
this->generate_itemsets(type);
Expand All @@ -79,7 +111,7 @@ std::unique_ptr<MatchedNonterminal> Parser::parse(IInputStream* in) {
for (std::string const& tag : t->tags) {
std::cout << " " << tag;
}
std::cout << ": " << t->lexeme << " (" << t->loc.line << ", " << t->loc.column << ")" <<std::endl;
std::cout << ": \"" << t->lexeme << "\" (" << t->loc.line << ", " << t->loc.column << ")" <<std::endl;
if (t->tags.at(0) == Lexer::TOKEN_BAD) {
std::cout << "Bad token " << std::endl;
break;
Expand Down Expand Up @@ -118,6 +150,28 @@ std::vector<GrammarConflict> Parser::conflicts() {

#pragma mark - Parser - private

Precedence Parser::precedence_of(std::string token) {
std::map<std::string, std::string>::iterator it = this->precedence.find(token);
if (it == this->precedence.end()) {
return Precedence::UNDEFINED;
}
return this->precedence_classes.at(it->second);
}

Precedence Parser::precedence_of(Production* p) {
if (p->precedence.length() > 0) {
std::map<std::string, Precedence>::iterator it = this->precedence_classes.find(p->precedence);
assert(it != this->precedence_classes.end());
return it->second;
}
for (std::vector<std::string>::reverse_iterator it = p->symbols.rbegin(); it != p->symbols.rend(); it++) {
if (this->symbol_is_token(*it)) {
return this->precedence_of(*it);
}
}
return Precedence::UNDEFINED;
}

/*
* Dragon book page 221
*/
Expand Down Expand Up @@ -254,28 +308,7 @@ void Parser::generate_itemsets(Type type) {

void Parser::generate_actions() {
for (std::unique_ptr<ItemSet> const& is : this->lr1_states) {
for (std::map<std::string, ItemSet*>::value_type const& kv : is->next) {
is->actions.emplace(std::piecewise_construct, std::forward_as_tuple(kv.first), std::forward_as_tuple(kv.second, nullptr));
std::map<std::string, std::vector<Production*>>::iterator it = is->reductions.find(kv.first);
if (it == is->reductions.end()) {
continue;
}
GrammarConflict gc;
gc.type = GrammarConflict::Type::ShiftReduce;
gc.state = is.get();
gc.symbol = kv.first;
for (Item const& i : kv.second->kernel) {
assert(i.production->symbols.at(i.dot - 1) == kv.first);
gc.productions.push_back(i.production);
}
gc.productions.insert(gc.productions.end(), it->second.begin(), it->second.end());
this->_conflicts.push_back(gc);
}
for (std::map<std::string, std::vector<Production*>>::value_type const& kv : is->reductions) {
std::map<std::string, Action>::iterator it = is->actions.find(kv.first);
if (it != is->actions.end()) {
continue;
}
Production* p = kv.second.front();
if (kv.second.size() > 1) {
GrammarConflict gc;
Expand All @@ -301,6 +334,50 @@ void Parser::generate_actions() {
}
is->actions.emplace(std::piecewise_construct, std::forward_as_tuple(kv.first), std::forward_as_tuple(nullptr, p));
}
for (std::map<std::string, ItemSet*>::value_type const& kv : is->next) {
std::map<std::string, Action>::iterator it = is->actions.find(kv.first);
if (it == is->actions.end()) {
is->actions.emplace(std::piecewise_construct, std::forward_as_tuple(kv.first), std::forward_as_tuple(kv.second, nullptr));
continue;
}
assert(it->second.shift == nullptr);
assert(it->second.reduce != nullptr);
Precedence ps = this->precedence_of(kv.first);
Precedence pr = this->precedence_of(it->second.reduce);
if (ps.is_defined() || pr.is_defined()) {
if (pr.level > ps.level) {
continue;
} if (ps.level > pr.level) {
it->second.shift = kv.second;
it->second.reduce = nullptr;
continue;
} else if (ps.level == pr.level) {
assert(ps.assoc == pr.assoc);
if (ps.assoc == Precedence::Associativity::LEFT) {
continue;
} else if (ps.assoc == Precedence::Associativity::RIGHT) {
it->second.shift = kv.second;
it->second.reduce = nullptr;
continue;
} else if (ps.assoc == Precedence::Associativity::NONASSOC) {
is->actions.erase(it);
continue;
}
}
}
GrammarConflict gc;
gc.type = GrammarConflict::Type::ShiftReduce;
gc.state = is.get();
gc.symbol = kv.first;
for (Item const& i : kv.second->kernel) {
assert(i.production->symbols.at(i.dot - 1) == kv.first);
gc.productions.push_back(i.production);
}
gc.productions.push_back(it->second.reduce);
this->_conflicts.push_back(gc);
it->second.shift = kv.second;
it->second.reduce = nullptr;
}
}
}

Expand Down Expand Up @@ -683,6 +760,18 @@ void Parser::debug() {
Parser::debug_production(p);
}
}
std::cout << "Actions:" << std::endl;
for (std::map<std::string, Action>::value_type const& kv : is->actions) {
std::cout << kv.first;
if (kv.second.shift != nullptr) {
assert(kv.second.reduce == nullptr);
std::cout << " -> " << kv.second.shift->index << std::endl;
} else {
assert(kv.second.reduce != nullptr);
std::cout << " <- ";
Parser::debug_production(kv.second.reduce);
}
}
std::cout << "=== done " << is->index << std::endl;
std::cout << std::endl;
}
Expand Down
40 changes: 34 additions & 6 deletions midori/src/midori/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ struct ItemSet;
typedef std::function<std::unique_ptr<ParserAST>(MatchedNonterminal*)> ProductionHandler;
typedef std::function<std::unique_ptr<MatchedNonterminal>(std::unique_ptr<MatchedNonterminal>)> RewriteHandler;


class ParserAST {
public:
virtual ~ParserAST() = 0;
Expand Down Expand Up @@ -58,23 +57,42 @@ class MatchedNonterminal : public Match {
std::vector<std::unique_ptr<Match>> terms;
std::unique_ptr<ParserAST> value;

MatchedNonterminal();
MatchedNonterminal(Production*);
MatchedTerminal* terminal(Int i) {
return dynamic_cast<MatchedTerminal*>(this->terms.at(i).get());
}
MatchedNonterminal* nonterminal(Int i) {
return dynamic_cast<MatchedNonterminal*>(this->terms.at(i).get());
}
};

private:
MatchedNonterminal(Production*, size_t);
struct Precedence {
enum Associativity {
LEFT,
RIGHT,
NONASSOC,
NONE,
};

static Precedence const UNDEFINED;

UInt level;
Associativity assoc;

Precedence(UInt l, Associativity a) : level(l), assoc(a) {
return;
}

bool is_defined() const {
return this->level > 0;
}
};

struct Production {
Int index;
std::string target;
std::vector<std::string> symbols;
std::string precedence;
ProductionHandler handler;
RewriteHandler rewrite;

Expand Down Expand Up @@ -146,8 +164,11 @@ class Parser {

void add_token(std::string, std::unique_ptr<RegexAST>);
void add_skip(std::string);
void add_production(std::string, std::vector<std::string>, ProductionHandler);
void add_production(std::string, std::vector<std::string>, ProductionHandler, RewriteHandler);
Int add_production(std::string, std::vector<std::string>, ProductionHandler);
Int add_production(std::string, std::vector<std::string>, ProductionHandler, RewriteHandler);
bool set_precedence_class(std::string, UInt level, Precedence::Associativity);
bool set_precedence(std::string, std::string);
bool set_precedence(Int, std::string);
void generate(Type, std::string);
std::unique_ptr<MatchedNonterminal> parse(IInputStream*);
std::vector<GrammarConflict> conflicts();
Expand All @@ -168,6 +189,10 @@ class Parser {
std::map<std::string, std::vector<Production*>> nonterminals;
std::vector<std::unique_ptr<Production>> productions;

std::map<std::string, std::string> precedence;
std::map<std::string, Precedence> precedence_classes;
std::map<UInt, std::string> precedence_levels;

std::set<std::string> nullable;
std::map<std::string, std::set<std::string>> firsts;

Expand All @@ -190,6 +215,9 @@ class Parser {
return this->terminals.find(s) != this->terminals.end();
}

Precedence precedence_of(std::string);
Precedence precedence_of(Production*);

void generate_first_sets();
// for LALR(1), first generate the LR(0) itemsets,
// then calculate the lookaheads for each kernel item,
Expand Down
2 changes: 1 addition & 1 deletion midori/src/midori/regex_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ std::unique_ptr<Parser> RegexEngine::make() {
return std::unique_ptr<ParserAST>(new ParserValueRange(RegexASTGroup::Range((UInt) l1, (UInt) l2)));
});
p->add_production("mul_range", { "LBRACE", "dec_int", "RBRACE" }, nullptr, [](std::unique_ptr<MatchedNonterminal> m) -> std::unique_ptr<MatchedNonterminal> {
std::unique_ptr<MatchedNonterminal> ret(new MatchedNonterminal);
std::unique_ptr<MatchedNonterminal> ret(new MatchedNonterminal(nullptr));
LocationInfo loc;
std::unique_ptr<MatchedTerminal> t0(new MatchedTerminal(std::unique_ptr<Token>(new Token({ "DEC" }, "0", loc))));
std::unique_ptr<MatchedTerminal> t1(new MatchedTerminal(std::unique_ptr<Token>(new Token({ "COMMA" }, "", loc))));
Expand Down
Loading

0 comments on commit e6da081

Please sign in to comment.