diff --git a/midori/src/main.cpp b/midori/src/main.cpp index 6b1aab2..be8f4d5 100644 --- a/midori/src/main.cpp +++ b/midori/src/main.cpp @@ -22,7 +22,7 @@ int test_generator() { std::fstream f("../src/parser.txt", std::fstream::in); std::unique_ptr p = ParserGenerator::from_file(&f); std::stringstream ss; - ss << "a1ab2bc3cd4d"; + ss << "a1ab2b \tc3cd4d"; FileInputStream fis(&ss); std::unique_ptr m = p->parse(&fis); assert(m != nullptr); @@ -32,7 +32,7 @@ int test_generator() { int main() { ULong x = ~0; std::cout << "-1 is " << x << std::endl; - test_generator(); test_regex_engine(); + test_generator(); return 0; } diff --git a/midori/src/midori/generator.cpp b/midori/src/midori/generator.cpp index 1aa7701..67a4035 100644 --- a/midori/src/midori/generator.cpp +++ b/midori/src/midori/generator.cpp @@ -3,28 +3,22 @@ #include "regex_engine.h" #include -class ParserASTStringList : public ParserAST { -public: - std::vector list; - ParserASTStringList(std::vector list) : list(list) { - return; - } -}; - -class ParserASTStringListList : public ParserAST { -public: - std::vector> list; - ParserASTStringListList(std::vector> list) : list(list) { - return; - } -}; +typedef std::vector StringList; +typedef std::vector StringListList; +typedef ParserValue ParserValueStringList; +typedef ParserValue ParserValueStringListList; std::unique_ptr ParserGenerator::from_file(std::istream* is) { RegexEngine re; std::unique_ptr ret(new Parser); std::unique_ptr p(new Parser); + std::string start; + p->add_token("WHITESPACE", re.parse("[ \\t]")); p->add_token("NL", re.parse("\\n")); + p->add_token("PERCENT", re.parse("%")); + p->add_token("START", re.parse("start")); + p->add_token("SKIP", re.parse("skip")); p->add_token("TOKEN", re.parse("[A-Z][a-zA-Z0-9_]*")); p->add_token("REGEX", re.parse("/[^\\n]+")); p->add_token("NONTERMINAL", re.parse("[a-z][a-zA-Z0-9_]*")); @@ -39,10 +33,23 @@ std::unique_ptr ParserGenerator::from_file(std::istream* is) { p->add_production("contents", { "line", "NL", "contents" }, nullptr); p->add_production("contents", { "line" }, nullptr); + p->add_production("line", { "line_start" }, nullptr); + p->add_production("line", { "line_skip" }, nullptr); p->add_production("line", { "line_token" }, nullptr); p->add_production("line", { "line_production" }, nullptr); p->add_production("line", {}, nullptr); + p->add_production("line_start", { "PERCENT", "START", "COLON", "NONTERMINAL" }, [ &start ](MatchedNonterminal* m) -> std::unique_ptr { + start = m->terminal(3)->token->lexeme; + return nullptr; + }); + + p->add_production("line_skip", { "PERCENT", "SKIP", "COLON", "TOKEN" }, [ &ret ](MatchedNonterminal* m) -> std::unique_ptr { + std::string skip = m->terminal(3)->token->lexeme; + ret->add_skip(skip); + return nullptr; + }); + p->add_production("line_token", { "TOKEN", "COLON", "REGEX" }, [ &ret, &re ](MatchedNonterminal* m) -> std::unique_ptr { std::string t = m->terminal(0)->token->lexeme; std::string r = m->terminal(2)->token->lexeme.substr(1); @@ -55,30 +62,29 @@ std::unique_ptr ParserGenerator::from_file(std::istream* is) { return nullptr; }); - p->add_production("line_production", { "NONTERMINAL", "COLON", "production_list", "SEMICOLON" }, [ &ret ](MatchedNonterminal* m) -> std::unique_ptr { + p->add_production("nl_optional", { "NL" }, nullptr); + p->add_production("nl_optional", {}, nullptr); + + p->add_production("line_production", { "NONTERMINAL", "nl_optional", "COLON", "production_list", "nl_optional", "SEMICOLON" }, [ &ret ](MatchedNonterminal* m) -> std::unique_ptr { std::string target = m->terminal(0)->token->lexeme; - std::unique_ptr n = std::move(m->nonterminal(2)->value); - ParserASTStringListList* l = dynamic_cast(n.get()); - for (std::vector const& p : l->list) { + StringListList& l = m->nonterminal(3)->value->get(); + for (std::vector const& p : l) { ret->add_production(target, p, nullptr); } return nullptr; }); - p->add_production("nl_optional", { "NL" }, nullptr); - p->add_production("nl_optional", {}, nullptr); - p->add_production("production_list", { "production_list", "nl_optional", "BAR", "production" }, [](MatchedNonterminal* m) -> std::unique_ptr { std::unique_ptr n1 = std::move(m->nonterminal(0)->value); std::unique_ptr n2 = std::move(m->nonterminal(3)->value); - ParserASTStringListList* l = dynamic_cast(n1.get()); - l->list.push_back(dynamic_cast(n2.get())->list); + StringListList& l = n1->get(); + l.push_back(n2->get()); return n1; }); p->add_production("production_list", { "nl_optional", "production" }, [](MatchedNonterminal* m) -> std::unique_ptr { std::unique_ptr n = std::move(m->nonterminal(1)->value); - ParserASTStringList* l = dynamic_cast(n.get()); - return std::unique_ptr(new ParserASTStringListList({ l->list })); + StringList& l = n->get(); + return std::unique_ptr(new ParserValueStringListList({ l })); }); /* * TODO: why doesn't the following work? @@ -95,31 +101,35 @@ std::unique_ptr ParserGenerator::from_file(std::istream* is) { }); p->add_production("production", { "EPSILON" }, [](MatchedNonterminal* m) -> std::unique_ptr { (void) m; - return std::unique_ptr(new ParserASTStringList({})); + return std::unique_ptr(new ParserValueStringList({})); }); p->add_production("symbol_list", { "symbol_list", "symbol" }, [](MatchedNonterminal* m) -> std::unique_ptr { std::unique_ptr n = std::move(m->nonterminal(0)->value); - ParserASTStringList* l = dynamic_cast(n.get()); - ParserASTString* s = dynamic_cast(m->nonterminal(1)->value.get()); - l->list.push_back(s->str); + StringList& l = n->get(); + std::string str = m->nonterminal(1)->value->get(); + l.push_back(str); return n; }); p->add_production("symbol_list", { "symbol" }, [](MatchedNonterminal* m) -> std::unique_ptr { - ParserASTString* s = dynamic_cast(m->nonterminal(0)->value.get()); - return std::unique_ptr(new ParserASTStringList({ s->str })); + std::string str = m->nonterminal(0)->value->get(); + return std::unique_ptr(new ParserValueStringList({ str })); }); p->add_production("symbol", { "TOKEN" }, [](MatchedNonterminal* m) -> std::unique_ptr { - return std::unique_ptr(new ParserASTString(m->terminal(0)->token->lexeme)); + return std::unique_ptr(new ParserValue(m->terminal(0)->token->lexeme)); }); p->add_production("symbol", { "NONTERMINAL" }, [](MatchedNonterminal* m) -> std::unique_ptr { - return std::unique_ptr(new ParserASTString(m->terminal(0)->token->lexeme)); + return std::unique_ptr(new ParserValue(m->terminal(0)->token->lexeme)); }); p->generate(Parser::Type::LALR1, "grammar"); FileInputStream fis(is); std::unique_ptr m = p->parse(&fis); - ret->generate(Parser::Type::LALR1, "root"); + if (start.length() == 0) { + std::cout << "No start rule" << std::endl; + return nullptr; + } + ret->generate(Parser::Type::LALR1, start); return ret; } diff --git a/midori/src/midori/lexer.h b/midori/src/midori/lexer.h index efbc5a1..a45059e 100644 --- a/midori/src/midori/lexer.h +++ b/midori/src/midori/lexer.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "global.h" #include "regex_ast.h" diff --git a/midori/src/midori/parser.cpp b/midori/src/midori/parser.cpp index 057b5ff..ad56900 100644 --- a/midori/src/midori/parser.cpp +++ b/midori/src/midori/parser.cpp @@ -8,10 +8,6 @@ ParserAST::~ParserAST() { return; } -ParserASTString::ParserASTString(std::string s) : str(s) { - return; -} - Match::~Match() { return; } diff --git a/midori/src/midori/parser.h b/midori/src/midori/parser.h index 94a650e..35a8541 100644 --- a/midori/src/midori/parser.h +++ b/midori/src/midori/parser.h @@ -14,19 +14,29 @@ class ParserAST; class MatchedNonterminal; struct Production; +template class ParserValue; typedef std::function(MatchedNonterminal*)> ProductionHandler; typedef std::function(std::unique_ptr)> RewriteHandler; + class ParserAST { public: virtual ~ParserAST() = 0; + + template T& get() { + // TODO: why does this compile? + return dynamic_cast*>(this)->value; + } }; -class ParserASTString : public ParserAST { +template class ParserValue : public ParserAST { public: - std::string str; - ParserASTString(std::string); + T value; + + ParserValue(T v) : value(std::move(v)) { + return; + } }; struct Production { diff --git a/midori/src/midori/regex_engine.cpp b/midori/src/midori/regex_engine.cpp index ef2a9d9..0e4a8cb 100644 --- a/midori/src/midori/regex_engine.cpp +++ b/midori/src/midori/regex_engine.cpp @@ -6,22 +6,8 @@ #include #include -class ParserASTRegex : public ParserAST { -public: - std::unique_ptr regex; - ParserASTRegex(std::unique_ptr r) : regex(std::move(r)) { - return; - } -}; - -class ParserASTRange : public ParserAST { -public: - UInt min; - UInt max; - ParserASTRange(UInt a, UInt b) : min(a), max(b) { - return; - } -}; +typedef ParserValue> ParserValueRegex; +typedef ParserValue ParserValueRange; RegexEngine::RegexEngine() { this->parser = RegexEngine::make(); @@ -35,8 +21,8 @@ std::unique_ptr RegexEngine::parse(std::string pattern) { if (m == nullptr) { return nullptr; } - ParserASTRegex* r = dynamic_cast(m->value.get()); - return std::move(r->regex); + std::unique_ptr& r = m->value->get>(); + return std::move(r); } std::unique_ptr RegexEngine::make() { @@ -75,24 +61,20 @@ std::unique_ptr RegexEngine::make() { return std::move(n->value); }); p->add_production("lr_or", { "lr_add", "OR", "lr_or" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n1 = m->nonterminal(0); - MatchedNonterminal* n2 = m->nonterminal(2); - ParserASTRegex* r1 = dynamic_cast(n1->value.get()); - ParserASTRegex* r2 = dynamic_cast(n2->value.get()); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTOr(std::move(r1->regex), std::move(r2->regex))))); + std::unique_ptr& r1 = m->nonterminal(0)->value->get>(); + std::unique_ptr& r2 = m->nonterminal(2)->value->get>(); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTOr(std::move(r1), std::move(r2))))); }); p->add_production("lr_or", { "lr_add" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(0); return std::move(n->value); }); p->add_production("lr_add", { "lr_mul", "lr_add" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n1 = m->nonterminal(0); - MatchedNonterminal* n2 = m->nonterminal(1); - ParserASTRegex* r1 = dynamic_cast(n1->value.get()); - ParserASTRegex* r2 = dynamic_cast(n2->value.get()); + std::unique_ptr& r1 = m->nonterminal(0)->value->get>(); + std::unique_ptr& r2 = m->nonterminal(1)->value->get>(); std::vector> vec; - vec.push_back(std::move(r1->regex)); - vec.push_back(std::move(r2->regex)); + vec.push_back(std::move(r1)); + vec.push_back(std::move(r2)); std::unique_ptr ret(new RegexASTChain(std::move(vec))); /* * The following does not work @@ -101,7 +83,7 @@ std::unique_ptr RegexEngine::make() { std::move(r2->regex), })); */ - return std::unique_ptr(new ParserASTRegex(std::move(ret))); + return std::unique_ptr(new ParserValueRegex(std::move(ret))); }); p->add_production("lr_add", { "lr_mul" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(0); @@ -110,29 +92,25 @@ std::unique_ptr RegexEngine::make() { p->add_production("lr_mul", { "not_lr", "STAR" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n1 = m->nonterminal(0); - ParserASTRegex* r = dynamic_cast(n1->value.get()); - std::unique_ptr ret(new RegexASTMultiplication(std::move(r->regex), 0, 0)); - return std::unique_ptr(new ParserASTRegex(std::move(ret))); + std::unique_ptr& r = m->nonterminal(0)->value->get>(); + std::unique_ptr ret(new RegexASTMultiplication(std::move(r), 0, 0)); + return std::unique_ptr(new ParserValueRegex(std::move(ret))); }); p->add_production("lr_mul", { "not_lr", "PLUS" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n1 = m->nonterminal(0); - ParserASTRegex* r = dynamic_cast(n1->value.get()); - std::unique_ptr ret(new RegexASTMultiplication(std::move(r->regex), 1, 0)); - return std::unique_ptr(new ParserASTRegex(std::move(ret))); + std::unique_ptr& r = m->nonterminal(0)->value->get>(); + std::unique_ptr ret(new RegexASTMultiplication(std::move(r), 1, 0)); + return std::unique_ptr(new ParserValueRegex(std::move(ret))); }); p->add_production("lr_mul", { "not_lr", "QUESTION" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n1 = m->nonterminal(0); - ParserASTRegex* r = dynamic_cast(n1->value.get()); - std::unique_ptr ret(new RegexASTMultiplication(std::move(r->regex), 0, 1)); - return std::unique_ptr(new ParserASTRegex(std::move(ret))); + std::unique_ptr& r = m->nonterminal(0)->value->get>(); + std::unique_ptr ret(new RegexASTMultiplication(std::move(r), 0, 1)); + return std::unique_ptr(new ParserValueRegex(std::move(ret))); }); p->add_production("lr_mul", { "not_lr", "mul_range" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n1 = m->nonterminal(0); - MatchedNonterminal* n2 = m->nonterminal(1); - ParserASTRegex* r = dynamic_cast(n1->value.get()); - ParserASTRange* range = dynamic_cast(n2->value.get()); - std::unique_ptr ret(new RegexASTMultiplication(std::move(r->regex), (UInt) range->min, (UInt) range->max)); - return std::unique_ptr(new ParserASTRegex(std::move(ret))); + std::unique_ptr& r = m->nonterminal(0)->value->get>(); + RegexASTGroup::Range& range = m->nonterminal(1)->value->get(); + std::unique_ptr ret(new RegexASTMultiplication(std::move(r), range.first, range.second)); + return std::unique_ptr(new ParserValueRegex(std::move(ret))); }); p->add_production("lr_mul", { "not_lr" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(0); @@ -153,7 +131,7 @@ std::unique_ptr RegexEngine::make() { }); p->add_production("not_lr", { "DOT" }, [](MatchedNonterminal* m) -> std::unique_ptr { (void) m; - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(RegexASTGroup::make(false, { 0, RegexASTGroup::UNICODE_MAX })))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(RegexASTGroup::make(false, { 0, RegexASTGroup::UNICODE_MAX })))); }); p->add_production("parentheses", { "LPAREN", "lr_or", "RPAREN" }, [](MatchedNonterminal* m) -> std::unique_ptr { @@ -184,7 +162,7 @@ std::unique_ptr RegexEngine::make() { MatchedTerminal* n = m->terminal(0); Long ch = utf8::codepoint_from_string(n->token->lexeme, 0, nullptr); assert(ch >= 0); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTLiteral(ch)))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTLiteral(ch)))); }); p->add_production("group", { "LBRACKET", "group_contents", "RBRACKET" }, [](MatchedNonterminal* m) -> std::unique_ptr { @@ -193,24 +171,24 @@ std::unique_ptr RegexEngine::make() { }); p->add_production("group", { "LBRACKET", "HAT", "group_contents", "RBRACKET" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(2); - ParserASTRegex* r = dynamic_cast(n->value.get()); - RegexASTGroup* g = dynamic_cast(r->regex.get()); + std::unique_ptr& r = n->value->get>(); + RegexASTGroup* g = dynamic_cast(r.get()); g->negate = true; return std::move(n->value); }); p->add_production("group_contents", { "group_element", "group_contents" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n1 = m->nonterminal(0); MatchedNonterminal* n2 = m->nonterminal(1); - ParserASTRegex* r1 = dynamic_cast(n1->value.get()); - ParserASTRegex* r2 = dynamic_cast(n2->value.get()); - RegexASTGroup* g1 = dynamic_cast(r1->regex.get()); - RegexASTGroup* g2 = dynamic_cast(r2->regex.get()); + std::unique_ptr& r1 = n1->value->get>(); + std::unique_ptr& r2 = n2->value->get>(); + RegexASTGroup* g1 = dynamic_cast(r1.get()); + RegexASTGroup* g2 = dynamic_cast(r2.get()); assert(g1->span->next == nullptr); std::unique_ptr car(new RegexASTGroup::RangeList); car->range.first = g1->span->range.first; car->range.second = g1->span->range.second; car->next = std::move(g2->span); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTGroup(false, std::move(car))))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTGroup(false, std::move(car))))); }); p->add_production("group_contents", { "group_element" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(0); @@ -226,12 +204,10 @@ std::unique_ptr RegexEngine::make() { p->add_production("group_element", { "group_element", "DASH", "group_literal" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n1 = m->nonterminal(0); MatchedNonterminal* n2 = m->nonterminal(2); - ParserASTRegex* r1 = dynamic_cast(n1->value.get()); - ParserASTRegex* r2 = dynamic_cast(n2->value.get()); - std::unique_ptr p1(std::move(r1->regex)); - std::unique_ptr p2(std::move(r2->regex)); - RegexASTGroup* g1 = dynamic_cast(p1.get()); - RegexASTGroup* g2 = dynamic_cast(p2.get()); + std::unique_ptr& r1 = n1->value->get>(); + std::unique_ptr& r2 = n2->value->get>(); + RegexASTGroup* g1 = dynamic_cast(r1.get()); + RegexASTGroup* g2 = dynamic_cast(r2.get()); assert(g1->span->next == nullptr); assert(g2->span->next == nullptr); assert(g2->span->range.first == g2->span->range.second); @@ -240,7 +216,7 @@ std::unique_ptr RegexEngine::make() { UInt c = g2->span->range.first; UInt lower = std::min({ a, b, c }); UInt upper = std::max({ a, b, c }); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(RegexASTGroup::make(false, { lower, upper })))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(RegexASTGroup::make(false, { lower, upper })))); }); p->add_production("group_element", { "group_literal" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(0); @@ -248,10 +224,9 @@ std::unique_ptr RegexEngine::make() { }); p->add_production("group_literal", { "group_literal_char" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n = m->nonterminal(0); - ParserASTRegex* r = dynamic_cast(n->value.get()); - RegexASTLiteral* l = dynamic_cast(r->regex.get()); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(RegexASTGroup::make(false, { l->ch, l->ch })))); + std::unique_ptr& r = m->nonterminal(0)->value->get>(); + RegexASTLiteral* l = dynamic_cast(r.get()); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(RegexASTGroup::make(false, { l->ch, l->ch })))); }); p->add_production("group_literal_char", { "group_escaped_literal" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedNonterminal* n = m->nonterminal(0); @@ -259,13 +234,13 @@ std::unique_ptr RegexEngine::make() { }); p->add_production("group_literal_char", { "DASH" }, [](MatchedNonterminal* m) -> std::unique_ptr { (void) m; - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTLiteral('-')))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTLiteral('-')))); }); p->add_production("group_literal_char", { "GROUP_ANY" }, [](MatchedNonterminal* m) -> std::unique_ptr { MatchedTerminal* n = m->terminal(0); Long ch = utf8::codepoint_from_string(n->token->lexeme, 0, nullptr); assert(ch >= 0); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTLiteral(ch)))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTLiteral(ch)))); }); p->add_production("group_escaped_literal", { "ESCAPE", "group_escape_code" }, [](MatchedNonterminal* m) -> std::unique_ptr { @@ -314,21 +289,21 @@ std::unique_ptr RegexEngine::make() { return std::move(n->value); }); p->add_production("escape_absolute", { "X", "hex_int_short" }, [](MatchedNonterminal* m) -> std::unique_ptr { - std::string str = dynamic_cast(m->nonterminal(1)->value.get())->str; + std::string str = m->nonterminal(1)->value->get(); Long l = std::stol(str, nullptr, 16); assert(l >= 0); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTLiteral((UInt) l)))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTLiteral((UInt) l)))); }); p->add_production("hex_int_short", { "hex_digit", "hex_digit" }, [](MatchedNonterminal* m) -> std::unique_ptr { - std::string str1 = dynamic_cast(m->nonterminal(0)->value.get())->str; - std::string str2 = dynamic_cast(m->nonterminal(1)->value.get())->str; - return std::unique_ptr(new ParserASTString(str1 + str2)); + std::string str1 = m->nonterminal(0)->value->get(); + std::string str2 = m->nonterminal(1)->value->get(); + return std::unique_ptr(new ParserValue(str1 + str2)); }); p->add_production("escape_absolute", { "U", "hex_int_long" }, [](MatchedNonterminal* m) -> std::unique_ptr { - std::string str = dynamic_cast(m->nonterminal(1)->value.get())->str; + std::string str = m->nonterminal(1)->value->get(); Long l = std::stol(str, nullptr, 16); assert(l >= 0); - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTLiteral((UInt) l)))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTLiteral((UInt) l)))); }); p->add_production("hex_int_long", { "hex_digit", "hex_digit", "hex_digit", "hex_digit", @@ -336,11 +311,9 @@ std::unique_ptr RegexEngine::make() { }, [](MatchedNonterminal* m) -> std::unique_ptr { std::string str = ""; for (Int i = 0; i < 8; i++) { - str += dynamic_cast(m->nonterminal(i)->value.get())->str; + str += m->nonterminal(i)->value->get(); } - std::string str1 = dynamic_cast(m->nonterminal(0)->value.get())->str; - std::string str2 = dynamic_cast(m->nonterminal(1)->value.get())->str; - return std::unique_ptr(new ParserASTString(str)); + return std::unique_ptr(new ParserValue(str)); }); /* p->add_production("hex_digit", { "DEC" }, [](MatchedNonterminal* m) -> std::unique_ptr { @@ -348,17 +321,15 @@ std::unique_ptr RegexEngine::make() { }); */ p->add_production("hex_digit", { "HEX" }, [](MatchedNonterminal* m) -> std::unique_ptr { - return std::unique_ptr(new ParserASTString(m->terminal(0)->token->lexeme)); + return std::unique_ptr(new ParserValue(m->terminal(0)->token->lexeme)); }); p->add_production("mul_range", { "LBRACE", "dec_int", "COMMA", "dec_int", "RBRACE" }, [](MatchedNonterminal* m) -> std::unique_ptr { - MatchedNonterminal* n1 = m->nonterminal(1); - MatchedNonterminal* n2 = m->nonterminal(3); - ParserASTString* r1 = dynamic_cast(n1->value.get()); - ParserASTString* r2 = dynamic_cast(n2->value.get()); - Long l1 = std::stol(r1->str.c_str(), nullptr, 10); - Long l2 = std::stol(r2->str.c_str(), nullptr, 10); - return std::unique_ptr(new ParserASTRange(l1, l2)); + std::string str1 = m->nonterminal(1)->value->get(); + std::string str2 = m->nonterminal(3)->value->get(); + Long l1 = std::stol(str1.c_str(), nullptr, 10); + Long l2 = std::stol(str2.c_str(), nullptr, 10); + return std::unique_ptr(new ParserValueRange(RegexASTGroup::Range((UInt) l1, (UInt) l2))); }); p->add_production("mul_range", { "LBRACE", "dec_int", "RBRACE" }, nullptr, [](std::unique_ptr m) -> std::unique_ptr { std::unique_ptr ret(new MatchedNonterminal); @@ -375,11 +346,11 @@ std::unique_ptr RegexEngine::make() { p->add_production("dec_int", { "DEC", "dec_int" }, [](MatchedNonterminal* m) -> std::unique_ptr { std::string str1 = m->terminal(0)->token->lexeme; - std::string str2 = dynamic_cast(m->nonterminal(1)->value.get())->str; - return std::unique_ptr(new ParserASTString(str1 + str2)); + std::string str2 = m->nonterminal(1)->value->get(); + return std::unique_ptr(new ParserValue(str1 + str2)); }); p->add_production("dec_int", { "DEC" }, [](MatchedNonterminal* m) -> std::unique_ptr { - return std::unique_ptr(new ParserASTString(m->terminal(0)->token->lexeme)); + return std::unique_ptr(new ParserValue(m->terminal(0)->token->lexeme)); }); p->generate(Parser::Type::LALR1, "regex"); @@ -389,6 +360,6 @@ std::unique_ptr RegexEngine::make() { void RegexEngine::add_literal(Parser* p, std::string nonterminal, std::string token, UInt ch) { p->add_production(nonterminal, { token }, [ch](MatchedNonterminal* m) -> std::unique_ptr { (void) m; - return std::unique_ptr(new ParserASTRegex(std::unique_ptr(new RegexASTLiteral(ch)))); + return std::unique_ptr(new ParserValueRegex(std::unique_ptr(new RegexASTLiteral(ch)))); }); } diff --git a/midori/src/parser.txt b/midori/src/parser.txt index c816157..d7f6fc5 100644 --- a/midori/src/parser.txt +++ b/midori/src/parser.txt @@ -1,13 +1,19 @@ +%start: root + +%skip: WHITESPACE + LETTER: /[a-z] NUMBER: /[0-9] +WHITESPACE: /[ \t]+ root: zip_list; -zip_list: - zip_list zip - | $epsilon; +zip_list + : zip_list zip + | $epsilon + ; zip: half half; diff --git a/midori/tests/regex_engine.cpp b/midori/tests/regex_engine.cpp index 1c27683..b810d08 100644 --- a/midori/tests/regex_engine.cpp +++ b/midori/tests/regex_engine.cpp @@ -29,3 +29,50 @@ TEST(RegexEngineTest, Dash) { ASSERT_EQ(t->tags.at(0), Lexer::TOKEN_BAD); ASSERT_EQ(t->lexeme, std::string(1, ('d' + 1))); } + +TEST(RegexEngineTest, GroupNegation) { + RegexEngine re; + std::unique_ptr r = re.parse("[^a-c]"); + RegexASTPrinter p; + std::cout << "=== Here" << std::endl; + r->accept(&p); + std::cout << "=== End here" << std::endl; + Lexer l; + l.add_rule("a", re.parse("[^a-c]")); + l.generate(); + for (char ch = 'a'; ch <= 'c'; ch++) { + l.reset(); + std::stringstream ss; + ss << std::string(1, ch); + FileInputStream fis(&ss); + std::unique_ptr t = l.scan(&fis); + ASSERT_EQ(t->tags.at(0), Lexer::TOKEN_BAD); + } + l.reset(); + std::stringstream ss; + ss << "d"; + FileInputStream fis(&ss); + std::unique_ptr t = l.scan(&fis); + ASSERT_EQ(t->tags.at(0), "a"); + ASSERT_EQ(t->lexeme, "d"); +} + +TEST(RegexEngineTest, Rewrite) { + RegexEngine re; + Lexer l; + l.add_rule("a", re.parse("a{3}")); + l.generate(); + for (Int i = 0; i <= 3; i++) { + l.reset(); + std::stringstream ss; + std::string str(i, 'a'); + ss << str; + FileInputStream fis(&ss); + std::unique_ptr t = l.scan(&fis); + ASSERT_EQ(t->tags.at(0), "a"); + ASSERT_EQ(t->lexeme, str); + t = l.scan(&fis); + ASSERT_EQ(t->tags.at(0), "a"); + ASSERT_EQ(t->lexeme, ""); + } +}