// g++ --std=c++11
+
+// Beispiel regulärer Ausdrücke.
+//
+// R ::= x | R + R | R . R | R* | (R)
+//
+// wobei x ein KLEIN Buchstaben ist.
+
+
+
+#include <iostream>
+#include <string>
+#include <memory>
+#include <vector>
+using namespace std;
+
+
+// AST Darstellung.
+
+class RE_ {
+public:
+ virtual string show() = 0; // Quasi eine pretty-print Methode.
+};
+
+// Zwecks virtueller Methodenauswahl, müssen Objekte via Referenz erreichbar sein.
+// Wir verwenden "smart pointers".
+typedef shared_ptr<RE_> RE; // TEMPLATE
+
+
+// Epsilon, the empty word.
+class Eps : public RE_ {
+public:
+ string show() { return "eps"; }
+};
+
+// Alphabet symbols = letters.
+class Letter : public RE_ {
+ char x;
+public:
+ Letter(char y) { x = y; }
+ string show() { return string(1,x); }
+};
+
+// Alternatives.
+class Alt : public RE_ {
+ RE left;
+ RE right;
+public:
+ Alt(RE l, RE r) { left = l; right = r; }
+ string show() {
+ string s("(");
+ s.append(left->show()); // OVERLOADING
+ s.append("+");
+ s.append(right->show());
+ s.append(")");
+ return s;
+ }
+};
+
+
+// Concatenation.
+class Conc : public RE_ {
+ RE left;
+ RE right;
+public:
+ Conc(RE l, RE r) { left = l; right = r; }
+ string show() {
+ string s("(");
+ s.append(left->show());
+ s.append(".");
+ s.append(right->show());
+ s.append(")");
+ return s;
+ }
+};
+
+// Kleene star.
+class Star : public RE_ {
+ RE r;
+public:
+ Star(RE s) { r = s; }
+ string show() {
+ string s("(");
+ s.append(r->show());
+ s.append("*)");
+ return s;
+ }
+};
+
+// Helpers
+
+// Point to note.
+// Co-variant subtyping here!
+// shared_ptr<Eps> <= shared_part<RE_>.
+RE mkEps() { return make_shared<Eps>(Eps()); } // SUBTYPING
+
+RE mkLetter(char x) { return make_shared<Letter>(Letter(x)); }
+
+RE mkAlt(RE l, RE r) { return make_shared<Alt>(Alt(l,r)); }
+
+RE mkConc(RE l, RE r) { return make_shared<Conc>(Conc(l,r)); }
+
+RE mkStar(RE r) { return make_shared<Star>(Star(r)); }
+
+
+/////////////////////////////////////
+// Tokenizer
+
+typedef enum {
+ EOS, // End of string
+ LETTER,
+ OPEN,
+ CLOSE,
+ CONC,
+ ALT,
+ KLEENE,
+} Token_t;
+
+
+class Token {
+public:
+ Token() {}
+ Token(Token_t tt) {
+ kind = tt;
+ }
+ Token(Token_t tt, char v) {
+ kind = tt; val = v;
+ }
+ bool eos() {
+ return kind == EOS;
+ }
+ Token_t kind;
+ char val;
+} ;
+
+
+
+class Tokenize {
+ string s;
+ int pos;
+public:
+ Tokenize(string s) {
+ this->s = s;
+ pos = 0;
+ }
+
+ Token next() {
+ if(s.length() <= pos)
+ return Token(EOS);
+
+ while(1) {
+
+ if(s.length() <= pos)
+ return Token(EOS);
+
+ switch(s[pos]) {
+ case '(': pos++;
+ return Token(OPEN);
+ case ')': pos++;
+ return Token(CLOSE);
+ case '+': pos++;
+ return Token(ALT);
+ case '.': pos++;
+ return Token(CONC);
+ case '*': pos++;
+ return Token(KLEENE);
+ case ' ':
+ pos++;
+ break;
+ default: char ch = s[pos];
+ if(ch >= 'a' && ch <= 'z') {
+ pos++;
+ return Token(LETTER, ch);
+ } else {
+ cout << "Unerlaubtes Symbol";
+ exit(1);
+ }
+ }
+ }
+ } // next
+
+ vector<Token> scan() {
+ vector<Token> v;
+ Token t;
+
+ do {
+ t = next();
+ v.push_back(t);
+ }
+ while(! t.eos());
+
+ return v;
+ } // scan
+};
+
+// Wrapper class, provide the (current) token.
+class Tokenizer : Tokenize {
+public:
+ Token token;
+ Tokenizer(string s) : Tokenize(s) { token = next(); }
+ void nextToken() {
+ token = next();
+ }
+};
+
+
+/////////////////////////////////////
+// Top-Down Parser fuer
+//
+// R ::= x | R + R | R . R | R* | (R)
+
+// R ::= T R2
+// R2 ::= . T R2 |
+// T ::= F T2
+// T2 ::= + F T2 |
+// F ::= x | x* | (R) | (R)*
+
+// Beachte.
+// "+" bindet staerker als "."
+// "Kleene star" wird betrachtet im Fall von "letter" und geklammerten Ausdruecken.
+// Deshalb bindet "*" staerker als die anderen Operatoren.
+
+
+// Newer C++ versions come with "optional".
+// We simply build our own and follow the original idea as found in Haskell's Maybe
+template<typename T>
+class Optional {
+ bool b;
+ T val;
+public:
+ Optional() : b(false) {}
+ Optional(T v) : val(v), b(true) {}
+ bool isJust() { return b; }
+ bool isNothing() { return !b; }
+ T fromJust() { return val; }
+};
+
+template<typename T>
+Optional<T> nothing() {
+ return Optional<T>();
+}
+
+template<typename T>
+Optional<T> just(T v) {
+ return Optional<T>(v);
+}
+
+
+// Functional style top-down parser.
+
+Optional<RE> parseR(Tokenizer &t);
+Optional<RE> parseR2(Tokenizer &t, RE left);
+Optional<RE> parseT(Tokenizer &t);
+Optional<RE> parseT2(Tokenizer &t, RE left);
+Optional<RE> parseF(Tokenizer &t);
+
+
+// R ::= T R2
+Optional<RE> parseR(Tokenizer &t) {
+ Optional<RE> left = parseT(t);
+ if(left.isNothing())
+ return left;
+
+ return parseR2(t,left.fromJust());
+}
+
+// R2 ::= . T R2 |
+Optional<RE> parseR2(Tokenizer &t, RE left) {
+
+ if(t.token.kind == CONC) {
+ t.nextToken();
+
+ Optional<RE> right = parseT(t);
+ if(right.isNothing())
+ return right;
+
+ return parseR2(t, mkConc(left, right.fromJust()));
+ }
+
+ return just<RE>(left);
+}
+
+// T ::= F T2
+Optional<RE> parseT(Tokenizer &t) {
+ Optional<RE> left = parseF(t);
+ if(left.isNothing())
+ return left;
+
+ return parseT2(t,left.fromJust());
+}
+
+// T2 ::= + F T2 |
+Optional<RE> parseT2(Tokenizer &t, RE left) {
+
+ if(t.token.kind == ALT) {
+ t.nextToken();
+
+ Optional<RE> right = parseF(t);
+ if(right.isNothing())
+ return right;
+
+ return parseT2(t, mkAlt(left, right.fromJust()));
+ }
+
+ return just<RE>(left);
+}
+
+// F ::= x | x* | (R) | (R)*
+Optional<RE> parseF(Tokenizer &t) {
+ switch(t.token.kind) {
+ case LETTER: {
+ char ch = t.token.val;
+ t.nextToken();
+
+ if(t.token.kind == KLEENE) {
+ t.nextToken();
+ return just<RE>(mkStar(mkLetter(ch)));
+ }
+ return just<RE>(mkLetter(ch));
+ }
+ case OPEN: {
+ t.nextToken();
+ Optional<RE> re = parseR(t);
+ if (re.isNothing())
+ return re;
+
+ if (t.token.kind != CLOSE)
+ return nothing<RE>();
+
+ t.nextToken();
+
+ if(t.token.kind == KLEENE) {
+ t.nextToken();
+ re = mkStar(re.fromJust());
+ }
+ return re;
+ }
+
+ default:
+ return nothing<RE>();
+ } // switch
+}
+
+
+
+// Beispiele
+
+
+void display(Optional<RE> r) {
+ if(r.isNothing()) {
+ cout << "nothing \n";
+ } else {
+ cout << (r.fromJust())->show() << "\n";
+ }
+ return;
+}
+
+
+void parse(string s) {
+ Tokenizer t = Tokenizer(s);
+
+ auto r = parseR(t);
+
+ // Whole input shall be consumed.
+ if (t.token.kind == EOS) {
+ display(r);
+ }
+ else {
+ display(nothing<RE>());
+ }
+
+}
+
+
+void test() {
+
+ // (x + eps) . (y*)
+ RE r = mkConc(mkAlt(mkLetter('x'), mkEps()), mkStar(mkLetter('y')));
+
+ cout << "\n" << r->show();
+
+}
+
+
+
+int main() {
+
+ parse("x.y+z");
+ parse("x* + y*");
+ parse("x + y *");
+ parse("(x+y)*");
+
+ parse("x x");
+
+}