From ee405735db1128ed020557e7f30bc8cef8986713 Mon Sep 17 00:00:00 2001 From: Raekye Date: Sat, 24 Aug 2019 15:42:26 -0400 Subject: [PATCH] hmmmm --- README.md | 16 ++++++++-- midori/CMakeLists.txt | 6 +++- midori/src/midori/CMakeLists.txt | 1 + midori/src/midori/parser.cpp | 54 ++++++++++++++++++-------------- midori/src/midori/parser.h | 10 +++--- 5 files changed, 55 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index bdce0d2..51010b6 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Example build: `mkdir build && cd build && cmake .. && make && make test`. - `sayaka`: successor to `madoka/`, had ideas on what to do differently. The ideas were pre 2014-summer, most of the work on it is post 2014-summer - `siyu`: hand written LL(1) regex parser, NFA state generator, DFA state generator, lexer-generator, and parser-generator - `tk`: successor to `siyu/`, completed LALR(1) parser generator -- `midori`: successor to `tk/`, hmmmm +- `midori`: successor to `tk/`, fixed/much improved LALR(1) parser generator ### Siyu - hand written, recursive descent basic regex parser (builds AST) @@ -25,7 +25,12 @@ Example build: `mkdir build && cd build && cmake .. && make && make test`. ### Midori - lexer/finite automata now support ranges using interval trees -- rewrote parser generator/parsing algorithm several times ([SLR(1)][2], [LR(1)][3], [LALR(1)][4]) +- rewrote parser generator/parsing algorithm several times: + - [SLR(1)][2] + - [LR(1)][3] + - [LALR(1)][4] based on the dragon book + - [LALR(1)][5] using DeRemer and Pennello's [lookahead algorithm][6], based on [PLY][7], + and as described in The Theory and Practice of Compiler Writing, and Parsing Theory Volume 2 ## Regex grammar - multiplication is repetition @@ -197,8 +202,10 @@ dec_int - https://web.cs.dal.ca/~sjackson/lalr1.html - https://stackoverflow.com/questions/8242509/how-does-the-yacc-bison-lalr1-algorithm-treat-empty-rules - https://stackoverflow.com/questions/57120176/grammar-matching-regex-character-classes-trailing-dash/ -- Compilers: Principles, Techniques, and Tools (the Dragon book) +- Compilers: Principles, Techniques, and Tools (the dragon book) - Parsing Theory Volume 2: LR(k) and LL(k) Parsing +- The Theory and Practice of Compiler Writing +- Efficient Computation of LALR(1) Look-Ahead Sets, DeRemer and Pennello (1982) ([link][6]) - Efficient Parsing for Natural Language: A Fast Algorithm for Practical Systems - http://scottmcpeak.com/elkhound/elkhound.ps - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/ @@ -207,3 +214,6 @@ dec_int [2]: https://github.com/Raekye/hmmm/tree/1130d9626c838b36b54155926df05da25e4e828f/midori/src/midori/parser.cpp [3]: https://github.com/Raekye/hmmm/tree/ca9659d56b1876f5a325463ebcdb04aec0e3cfbe/midori/src/midori/parser.cpp [4]: https://github.com/Raekye/hmmm/tree/a4cb4c7e844ef49d675a9faac622d8d57c8da184/midori/src/midori/parser.cpp +[5]: https://github.com/Raekye/hmmm/tree/b0b7932e6c7ba5db770fd2ebe5ea3c5b6bfe0a79/midori/src/midori/parser.cpp +[6]: https://dl.acm.org/citation.cfm?id=357187 +[7]: https://github.com/dabeaz/ply diff --git a/midori/CMakeLists.txt b/midori/CMakeLists.txt index 4425598..946d99b 100644 --- a/midori/CMakeLists.txt +++ b/midori/CMakeLists.txt @@ -1,8 +1,12 @@ cmake_minimum_required (VERSION 3.11) +set(CMAKE_CXX_STANDARD 11) + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_CXX_STANDARD 11) +add_library(coverage INTERFACE) +target_compile_options(coverage INTERFACE -O0 -g --coverage) +target_link_libraries(coverage INTERFACE --coverage) enable_testing() diff --git a/midori/src/midori/CMakeLists.txt b/midori/src/midori/CMakeLists.txt index 8c8a806..46a6cd5 100644 --- a/midori/src/midori/CMakeLists.txt +++ b/midori/src/midori/CMakeLists.txt @@ -12,3 +12,4 @@ set(SOURCES add_library(midori SHARED ${SOURCES}) target_compile_options(midori PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-unknown-pragmas) +target_link_libraries(midori PUBLIC coverage) diff --git a/midori/src/midori/parser.cpp b/midori/src/midori/parser.cpp index 5fac9ff..35e3c85 100644 --- a/midori/src/midori/parser.cpp +++ b/midori/src/midori/parser.cpp @@ -33,7 +33,7 @@ Int Parser::add_production(std::string target, std::vector symbols, return this->add_production(target, symbols, handler, nullptr); } Int Parser::add_production(std::string target, std::vector symbols, ProductionHandler handler, RewriteHandler rewrite) { - Int n = this->productions.size(); + UInt n = this->productions.size(); std::unique_ptr p(new Production); p->index = n; p->target = target; @@ -98,7 +98,7 @@ void Parser::generate(Type type, std::string start) { */ this->generate_reads_relations(); this->generate_read_sets(); - this->generate_includes_lookback(); + this->generate_includes_lookback_relations(); this->generate_follow_sets(); this->generate_lookaheads(); this->lr1_states = std::move(this->lr0_states); @@ -253,7 +253,7 @@ void Parser::generate_lr1_closure(ItemSet* is) { continue; } std::vector l; - size_t j = (size_t) (i.dot + 1); + size_t j = i.dot + 1; while (j < i.production->symbols.size()) { std::string s2 = i.production->symbols.at(j); std::map>::iterator it = this->firsts.find(s2); @@ -530,6 +530,9 @@ void Parser::generate_lalr_itemsets() { } } +// see The Theory and Practice of Compiler Writing, page 382 +// see Tarjan's strongly connected components algorithm +// - https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm template void Parser::digraph(std::vector* nodes, GraphRelation r, GraphFunction g, std::map>* f) { std::map weights; std::stack stack; @@ -542,8 +545,8 @@ template void Parser::digraph(std::vector* nodes, Gr template void Parser::traverse(T x, std::stack* stack, std::map* weights, std::vector* nodes, GraphRelation r, GraphFunction g, std::map>* f) { stack->push(x); - size_t d = stack->size(); - weights->operator[](x) = (Int) d; + Int d = (Int) stack->size(); + weights->operator[](x) = d; f->operator[](x) = g(x); typename std::map::iterator it_x = weights->find(x); std::set& f_x = f->operator[](x); @@ -561,7 +564,7 @@ template void Parser::traverse(T x, std::stack* stac std::set& f_y = f->operator[](y); f_x.insert(f_y.begin(), f_y.end()); } - if (it_x->second == (Int) d) { + if (it_x->second == d) { while (true) { T z = stack->top(); stack->pop(); @@ -574,6 +577,9 @@ template void Parser::traverse(T x, std::stack* stac } } +// see Efficient Computation of LALR(1) Look-Ahead Sets, DeRemer and Pennello (1982) +// - https://dl.acm.org/citation.cfm?id=357187 +// see The Theory and Practice of Compiler Writing, pages 375-383 void Parser::generate_reads_relations() { ItemSet* root = this->lr0_states.front().get(); assert(root->kernel.size() == 1); @@ -595,6 +601,7 @@ void Parser::generate_reads_relations() { } this->nonterminal_transitions.emplace_back(is.get(), s); LalrTransition& lt = this->nonterminal_transitions.back(); + // create the mapping even if the sets end up being empty std::set& y = this->directly_reads_relation[lt]; std::set& z = this->reads_relation[lt]; ItemSet* js = is->next.at(s); @@ -615,18 +622,18 @@ void Parser::generate_reads_relations() { } void Parser::generate_read_sets() { - std::map>& drr = this->directly_reads_relation; - std::map>& rr = this->reads_relation; - GraphRelation r = [ &rr ](LalrTransition lt) -> std::set& { - return rr.at(lt); + std::map>* drr = &(this->directly_reads_relation); + std::map>* rr = &(this->reads_relation); + GraphRelation r = [ rr ](LalrTransition lt) -> std::set& { + return rr->at(lt); }; - GraphFunction g = [ &drr ](LalrTransition lt) -> std::set& { - return drr.at(lt); + GraphFunction g = [ drr ](LalrTransition lt) -> std::set& { + return drr->at(lt); }; Parser::digraph(&(this->nonterminal_transitions), r, g, &(this->reads)); } -void Parser::generate_includes_lookback() { +void Parser::generate_includes_lookback_relations() { for (LalrTransition const& lt : this->nonterminal_transitions) { std::cout << "Looking at transition "; Parser::debug_lalr_transition(lt); @@ -649,14 +656,15 @@ void Parser::generate_includes_lookback() { continue; } size_t l = k + 1; - while (l < i.production->symbols.size()) { - std::string s = i.production->symbols.at(l); - if (this->nullable.find(s) == this->nullable.end()) { + size_t n = i.production->symbols.size(); + while (l < n) { + std::string s2 = i.production->symbols.at(l); + if (this->nullable.find(s2) == this->nullable.end()) { break; } l++; } - if (l == i.production->symbols.size()) { + if (l == n) { includes.push_back(lt2); } } @@ -676,13 +684,13 @@ void Parser::generate_includes_lookback() { } void Parser::generate_follow_sets() { - std::map>& ir = this->includes_relation; - std::map>& rs = this->reads; - GraphRelation r = [ &ir ](LalrTransition lt) -> std::set& { - return ir[lt]; + std::map>* ir = &(this->includes_relation); + std::map>* rs = &(this->reads); + GraphRelation r = [ ir ](LalrTransition lt) -> std::set& { + return ir->operator[](lt); }; - GraphFunction g = [ &rs ](LalrTransition lt) -> std::set& { - return rs.at(lt); + GraphFunction g = [ rs ](LalrTransition lt) -> std::set& { + return rs->at(lt); }; Parser::digraph(&(this->nonterminal_transitions), r, g, &(this->follows)); } diff --git a/midori/src/midori/parser.h b/midori/src/midori/parser.h index 9f41e7a..79444d9 100644 --- a/midori/src/midori/parser.h +++ b/midori/src/midori/parser.h @@ -89,7 +89,7 @@ struct Precedence { }; struct Production { - Int index; + UInt index; std::string target; std::vector symbols; std::string precedence; @@ -103,7 +103,7 @@ struct Production { struct Item { Production* production; - Int dot; + UInt dot; // for LR0 items, `terminal` is unused std::string terminal; @@ -112,7 +112,7 @@ struct Item { } bool is_done() const { - return this->dot == (Int) this->production->symbols.size(); + return this->dot == this->production->symbols.size(); } std::string next_symbol() const { @@ -134,7 +134,7 @@ struct Action { }; struct ItemSet { - Int index; + UInt index; bool accept; std::set kernel; std::set closure; @@ -279,7 +279,7 @@ class Parser { template static void traverse(T, std::stack*, std::map*, std::vector*, GraphRelation, GraphFunction, std::map>*); void generate_reads_relations(); void generate_read_sets(); - void generate_includes_lookback(); + void generate_includes_lookback_relations(); void generate_follow_sets(); void generate_lookaheads();