diff --git a/midori/src/midori/parser.cpp b/midori/src/midori/parser.cpp index 573c82d..237b798 100644 --- a/midori/src/midori/parser.cpp +++ b/midori/src/midori/parser.cpp @@ -2,8 +2,9 @@ #include "helper.h" #include +Precedence const Precedence::UNDEFINED(0, Precedence::Associativity::NONE); std::string const Parser::ROOT = "$root"; -std::string const Parser::TOKEN_MIDORI = "#"; +std::string const Parser::TOKEN_MIDORI = "$#"; ParserAST::~ParserAST() { return; @@ -15,13 +16,7 @@ Match::~Match() { MatchedTerminal::MatchedTerminal(std::unique_ptr t) : token(std::move(t)) { return; } -MatchedNonterminal::MatchedNonterminal() : MatchedNonterminal(nullptr, 0) { - return; -} -MatchedNonterminal::MatchedNonterminal(Production* p) : MatchedNonterminal(p, p->symbols.size()) { - return; -} -MatchedNonterminal::MatchedNonterminal(Production* p, size_t n) : production(p) , terms(n), value(nullptr) { +MatchedNonterminal::MatchedNonterminal(Production* p) : production(p), terms((p == nullptr) ? 0 : p->symbols.size()), value(nullptr) { return; } @@ -34,25 +29,62 @@ void Parser::add_skip(std::string symbol) { this->lexer.add_skip(symbol); } -void Parser::add_production(std::string target, std::vector symbols, ProductionHandler handler) { - this->add_production(target, symbols, handler, nullptr); +Int Parser::add_production(std::string target, std::vector symbols, ProductionHandler handler) { + return this->add_production(target, symbols, handler, nullptr); } -void Parser::add_production(std::string target, std::vector symbols, ProductionHandler handler, RewriteHandler rewrite) { +Int Parser::add_production(std::string target, std::vector symbols, ProductionHandler handler, RewriteHandler rewrite) { + Int n = this->productions.size(); std::unique_ptr p(new Production); - p->index = this->productions.size(); + p->index = n; p->target = target; p->symbols = symbols; + (void) p->precedence; p->handler = handler; p->rewrite = rewrite; this->nonterminals[target].push_back(p.get()); this->productions.push_back(std::move(p)); + return n; +} + +bool Parser::set_precedence_class(std::string precedence, UInt level, Precedence::Associativity assoc) { + std::map::iterator it = this->precedence_levels.find(level); + if (it != this->precedence_levels.end()) { + if (precedence != it->second) { + return false; + } + } + std::map::iterator it2 = this->precedence_classes.find(precedence); + if (it2 == this->precedence_classes.end()) { + this->precedence_classes.emplace(std::piecewise_construct, std::forward_as_tuple(precedence), std::forward_as_tuple(level, assoc)); + } else { + this->precedence_levels.erase(it2->second.level); + it2->second.level = level; + it2->second.assoc = assoc; + } + this->precedence_levels[level] = precedence; + return true; +} + +bool Parser::set_precedence(std::string symbol, std::string precedence) { + if (this->precedence_classes.find(precedence) == this->precedence_classes.end()) { + return false; + } + this->precedence[symbol] = precedence; + return true; +} + +bool Parser::set_precedence(Int production, std::string precedence) { + if (this->precedence_classes.find(precedence) == this->precedence_classes.end()) { + return false; + } + Production* p = this->productions.at(production).get(); + p->precedence = precedence; + return true; } void Parser::generate(Type type, std::string start) { this->terminals.insert(Lexer::TOKEN_END); - this->add_production(Parser::ROOT, { start }, [](MatchedNonterminal* m) -> std::unique_ptr { - return std::move(m->value); - }); + this->add_production(Parser::ROOT, { start }, nullptr); this->lexer.generate(); this->generate_first_sets(); this->generate_itemsets(type); @@ -79,7 +111,7 @@ std::unique_ptr Parser::parse(IInputStream* in) { for (std::string const& tag : t->tags) { std::cout << " " << tag; } - std::cout << ": " << t->lexeme << " (" << t->loc.line << ", " << t->loc.column << ")" <lexeme << "\" (" << t->loc.line << ", " << t->loc.column << ")" <tags.at(0) == Lexer::TOKEN_BAD) { std::cout << "Bad token " << std::endl; break; @@ -118,6 +150,28 @@ std::vector Parser::conflicts() { #pragma mark - Parser - private +Precedence Parser::precedence_of(std::string token) { + std::map::iterator it = this->precedence.find(token); + if (it == this->precedence.end()) { + return Precedence::UNDEFINED; + } + return this->precedence_classes.at(it->second); +} + +Precedence Parser::precedence_of(Production* p) { + if (p->precedence.length() > 0) { + std::map::iterator it = this->precedence_classes.find(p->precedence); + assert(it != this->precedence_classes.end()); + return it->second; + } + for (std::vector::reverse_iterator it = p->symbols.rbegin(); it != p->symbols.rend(); it++) { + if (this->symbol_is_token(*it)) { + return this->precedence_of(*it); + } + } + return Precedence::UNDEFINED; +} + /* * Dragon book page 221 */ @@ -254,28 +308,7 @@ void Parser::generate_itemsets(Type type) { void Parser::generate_actions() { for (std::unique_ptr const& is : this->lr1_states) { - for (std::map::value_type const& kv : is->next) { - is->actions.emplace(std::piecewise_construct, std::forward_as_tuple(kv.first), std::forward_as_tuple(kv.second, nullptr)); - std::map>::iterator it = is->reductions.find(kv.first); - if (it == is->reductions.end()) { - continue; - } - GrammarConflict gc; - gc.type = GrammarConflict::Type::ShiftReduce; - gc.state = is.get(); - gc.symbol = kv.first; - for (Item const& i : kv.second->kernel) { - assert(i.production->symbols.at(i.dot - 1) == kv.first); - gc.productions.push_back(i.production); - } - gc.productions.insert(gc.productions.end(), it->second.begin(), it->second.end()); - this->_conflicts.push_back(gc); - } for (std::map>::value_type const& kv : is->reductions) { - std::map::iterator it = is->actions.find(kv.first); - if (it != is->actions.end()) { - continue; - } Production* p = kv.second.front(); if (kv.second.size() > 1) { GrammarConflict gc; @@ -301,6 +334,50 @@ void Parser::generate_actions() { } is->actions.emplace(std::piecewise_construct, std::forward_as_tuple(kv.first), std::forward_as_tuple(nullptr, p)); } + for (std::map::value_type const& kv : is->next) { + std::map::iterator it = is->actions.find(kv.first); + if (it == is->actions.end()) { + is->actions.emplace(std::piecewise_construct, std::forward_as_tuple(kv.first), std::forward_as_tuple(kv.second, nullptr)); + continue; + } + assert(it->second.shift == nullptr); + assert(it->second.reduce != nullptr); + Precedence ps = this->precedence_of(kv.first); + Precedence pr = this->precedence_of(it->second.reduce); + if (ps.is_defined() || pr.is_defined()) { + if (pr.level > ps.level) { + continue; + } if (ps.level > pr.level) { + it->second.shift = kv.second; + it->second.reduce = nullptr; + continue; + } else if (ps.level == pr.level) { + assert(ps.assoc == pr.assoc); + if (ps.assoc == Precedence::Associativity::LEFT) { + continue; + } else if (ps.assoc == Precedence::Associativity::RIGHT) { + it->second.shift = kv.second; + it->second.reduce = nullptr; + continue; + } else if (ps.assoc == Precedence::Associativity::NONASSOC) { + is->actions.erase(it); + continue; + } + } + } + GrammarConflict gc; + gc.type = GrammarConflict::Type::ShiftReduce; + gc.state = is.get(); + gc.symbol = kv.first; + for (Item const& i : kv.second->kernel) { + assert(i.production->symbols.at(i.dot - 1) == kv.first); + gc.productions.push_back(i.production); + } + gc.productions.push_back(it->second.reduce); + this->_conflicts.push_back(gc); + it->second.shift = kv.second; + it->second.reduce = nullptr; + } } } @@ -683,6 +760,18 @@ void Parser::debug() { Parser::debug_production(p); } } + std::cout << "Actions:" << std::endl; + for (std::map::value_type const& kv : is->actions) { + std::cout << kv.first; + if (kv.second.shift != nullptr) { + assert(kv.second.reduce == nullptr); + std::cout << " -> " << kv.second.shift->index << std::endl; + } else { + assert(kv.second.reduce != nullptr); + std::cout << " <- "; + Parser::debug_production(kv.second.reduce); + } + } std::cout << "=== done " << is->index << std::endl; std::cout << std::endl; } diff --git a/midori/src/midori/parser.h b/midori/src/midori/parser.h index cc18cae..08b5f76 100644 --- a/midori/src/midori/parser.h +++ b/midori/src/midori/parser.h @@ -20,7 +20,6 @@ struct ItemSet; typedef std::function(MatchedNonterminal*)> ProductionHandler; typedef std::function(std::unique_ptr)> RewriteHandler; - class ParserAST { public: virtual ~ParserAST() = 0; @@ -58,7 +57,6 @@ class MatchedNonterminal : public Match { std::vector> terms; std::unique_ptr value; - MatchedNonterminal(); MatchedNonterminal(Production*); MatchedTerminal* terminal(Int i) { return dynamic_cast(this->terms.at(i).get()); @@ -66,15 +64,35 @@ class MatchedNonterminal : public Match { MatchedNonterminal* nonterminal(Int i) { return dynamic_cast(this->terms.at(i).get()); } +}; -private: - MatchedNonterminal(Production*, size_t); +struct Precedence { + enum Associativity { + LEFT, + RIGHT, + NONASSOC, + NONE, + }; + + static Precedence const UNDEFINED; + + UInt level; + Associativity assoc; + + Precedence(UInt l, Associativity a) : level(l), assoc(a) { + return; + } + + bool is_defined() const { + return this->level > 0; + } }; struct Production { Int index; std::string target; std::vector symbols; + std::string precedence; ProductionHandler handler; RewriteHandler rewrite; @@ -146,8 +164,11 @@ class Parser { void add_token(std::string, std::unique_ptr); void add_skip(std::string); - void add_production(std::string, std::vector, ProductionHandler); - void add_production(std::string, std::vector, ProductionHandler, RewriteHandler); + Int add_production(std::string, std::vector, ProductionHandler); + Int add_production(std::string, std::vector, ProductionHandler, RewriteHandler); + bool set_precedence_class(std::string, UInt level, Precedence::Associativity); + bool set_precedence(std::string, std::string); + bool set_precedence(Int, std::string); void generate(Type, std::string); std::unique_ptr parse(IInputStream*); std::vector conflicts(); @@ -168,6 +189,10 @@ class Parser { std::map> nonterminals; std::vector> productions; + std::map precedence; + std::map precedence_classes; + std::map precedence_levels; + std::set nullable; std::map> firsts; @@ -190,6 +215,9 @@ class Parser { return this->terminals.find(s) != this->terminals.end(); } + Precedence precedence_of(std::string); + Precedence precedence_of(Production*); + void generate_first_sets(); // for LALR(1), first generate the LR(0) itemsets, // then calculate the lookaheads for each kernel item, diff --git a/midori/src/midori/regex_engine.cpp b/midori/src/midori/regex_engine.cpp index 0e4a8cb..7d8ae07 100644 --- a/midori/src/midori/regex_engine.cpp +++ b/midori/src/midori/regex_engine.cpp @@ -332,7 +332,7 @@ std::unique_ptr RegexEngine::make() { return std::unique_ptr(new ParserValueRange(RegexASTGroup::Range((UInt) l1, (UInt) l2))); }); p->add_production("mul_range", { "LBRACE", "dec_int", "RBRACE" }, nullptr, [](std::unique_ptr m) -> std::unique_ptr { - std::unique_ptr ret(new MatchedNonterminal); + std::unique_ptr ret(new MatchedNonterminal(nullptr)); LocationInfo loc; std::unique_ptr t0(new MatchedTerminal(std::unique_ptr(new Token({ "DEC" }, "0", loc)))); std::unique_ptr t1(new MatchedTerminal(std::unique_ptr(new Token({ "COMMA" }, "", loc)))); diff --git a/midori/tests/parser.cpp b/midori/tests/parser.cpp index 72bb730..1ddcba3 100644 --- a/midori/tests/parser.cpp +++ b/midori/tests/parser.cpp @@ -175,3 +175,78 @@ TEST_F(ParserTest, RegexGroup) { ASSERT_NE(p.parse(&fis), nullptr); } } + +TEST_F(ParserTest, Precedence) { + auto prepare = [](Parser* p) -> void { + p->add_token("a", std::unique_ptr(new RegexASTLiteral('a'))); + p->add_token("plus", std::unique_ptr(new RegexASTLiteral('+'))); + p->add_token("minus", std::unique_ptr(new RegexASTLiteral('-'))); + p->set_precedence("plus", "precedence"); + p->set_precedence("minus", "precedence"); + p->add_production("expr", { "expr", "plus", "expr" }, [](MatchedNonterminal* m) -> std::unique_ptr { + Int x = m->nonterminal(0)->value->get(); + Int y = m->nonterminal(2)->value->get(); + std::cout << "x " << x << ", y " << y << std::endl; + return std::unique_ptr(new ParserValue(x + y)); + }); + p->add_production("expr", { "expr", "minus", "expr" }, [](MatchedNonterminal* m) -> std::unique_ptr { + Int x = m->nonterminal(0)->value->get(); + Int y = m->nonterminal(2)->value->get(); + return std::unique_ptr(new ParserValue(x - y)); + }); + p->add_production("expr", { "a" }, [](MatchedNonterminal* m) -> std::unique_ptr { + return std::unique_ptr(new ParserValue(1)); + }); + }; + for (Parser::Type const t : this->types) { + Parser p; + prepare(&p); + p.generate(t, "expr"); + ASSERT_EQ(p.conflicts().size(), 4); + std::stringstream ss; + ss << "a-a+a"; + FileInputStream fis(&ss); + std::unique_ptr m = p.parse(&fis); + Int z = m->value->get(); + ASSERT_EQ(z, -1); + } + for (Parser::Type const t : this->types) { + Parser p; + p.set_precedence_class("precedence", 1, Precedence::Associativity::LEFT); + prepare(&p); + p.generate(t, "expr"); + ASSERT_EQ(p.conflicts().size(), 0); + std::stringstream ss; + ss << "a-a+a"; + FileInputStream fis(&ss); + std::unique_ptr m = p.parse(&fis); + Int z = m->value->get(); + ASSERT_EQ(z, 1); + } + for (Parser::Type const t : this->types) { + Parser p; + p.set_precedence_class("precedence", 1, Precedence::Associativity::RIGHT); + prepare(&p); + p.generate(t, "expr"); + ASSERT_EQ(p.conflicts().size(), 0); + std::stringstream ss; + ss << "a-a+a"; + FileInputStream fis(&ss); + std::unique_ptr m = p.parse(&fis); + Int z = m->value->get(); + ASSERT_EQ(z, -1); + } + for (Parser::Type const t : this->types) { + Parser p; + p.set_precedence_class("precedence", 1, Precedence::Associativity::NONE); + p.set_precedence_class("precedence", 1, Precedence::Associativity::NONASSOC); + prepare(&p); + p.generate(t, "expr"); + ASSERT_EQ(p.conflicts().size(), 0); + std::stringstream ss; + ss << "a-a+a"; + FileInputStream fis(&ss); + std::unique_ptr m = p.parse(&fis); + ASSERT_EQ(m, nullptr); + } +}