diff --git a/lec-cpp-advanced-syntax.html b/lec-cpp-advanced-syntax.html deleted file mode 100644 index 3ce3071..0000000 --- a/lec-cpp-advanced-syntax.html +++ /dev/null @@ -1,744 +0,0 @@ - - - -
- - - - --Martin Sulzmann -
-Konkrete versus abstrakte Syntax.
-Darstellung in C++?
-Allgemeiner Parser. Siehe Cocke–Younger–Kasami -algorithm.
-Kubische Komplexität.
-Wir verwenden Top-Down -Parsing.
-Deterministisches parsen in linearer Zeit.
-Am Beispiel von reguläre Ausdrücken.
-Weitere Beispiele und Details siehe hier.
-R ::= x // Alphabet symbol
- | R + R // Alternative
- | R . R // Konkatenation
- | R* // Kleensche Hülle
- | (R) // Klammerung
-beschrieben durch eine kontext-freie Grammatik in EBNF.
-x*
-
-(x*) . (y*)
-
-(x + y) . z
-Beachte:
-Neben x
verwenden wir auch Alphabetsymbole
-y
, z
, …
Klammerung notwendig um Ausdruck eindeutig zu -beschreiben
Wir unterscheiden zwischen konkreter und abstrakter -Syntax Darstellung.
-Wir verwenden
-Klassenhierarchien um die verschiedene Konstruktre (Alternative, -…) darzustellen
wobei Klammerung nicht extra dargestellt werden muss
Unten-stehender Programmcode verwendet
-smart_ptr
// Polymorphie am Beispiel regulärer Ausdrücke.
-//
-// R ::= x | R + R | R . R | R* | (R)
-//
-// 1. Templates zwecks Emulation von parametrischer Polymorphie (aka "generics")
-// 2. Overloading
-// 3. Subtyping
-
-
-
-#include <iostream>
-#include <string>
-#include <memory>
-using namespace std;
-
-
-class RE_ {
-public:
- virtual string show() = 0; // Quasi eine pretty-print Methode.
- virtual bool nullable() = 0; // Ueberprueft, ob der regulaere Ausdruck,
- // das leere Wort enthaelt.
-
-};
-
-// Zwecks virtueller Methodenauswahl, müssen Objekte via Referenz erreichbar sein.
-// Wir verwenden "smart pointers".
-typedef shared_ptr<RE_> RE; // TEMPLATE
-
-
-// Epsilon, the empty word.
-class Eps : public RE_ {
-public:
- string show() { return "eps"; }
- bool nullable() { return true; }
-};
-
-// Alphabet symbols = letters.
-class Letter : public RE_ {
- char x;
-public:
- Letter(char y) { x = y; }
- string show() { return string(1,x); }
- bool nullable() { return false; }
-};
-
-// Alternatives.
-class Alt : public RE_ {
- RE left;
- RE right;
-public:
- Alt(RE l, RE r) { left = l; right = r; }
- string show() {
- string s("(");
- s.append(left->show()); // OVERLOADING
- s.append("+");
- s.append(right->show());
- s.append(")");
- return s;
- }
- bool nullable() { return (left->nullable() || right->nullable()); }
-};
-
-
-// Concatenation.
-class Conc : public RE_ {
- RE left;
- RE right;
-public:
- Conc(RE l, RE r) { left = l; right = r; }
- string show() {
- string s("(");
- s.append(left->show());
- s.append(".");
- s.append(right->show());
- s.append(")");
- return s;
- }
- bool nullable() { return (left->nullable() && right->nullable()); }
-};
-
-// Kleene star.
-class Star : public RE_ {
- RE r;
-public:
- Star(RE s) { r = s; }
- string show() {
- string s("(");
- s.append(r->show());
- s.append("*)");
- return s;
- }
- bool nullable() { return true; }
-};
-
-// Helpers
-
-// Point to note.
-// Co-variant subtyping here!
-// shared_ptr<Eps> <= shared_part<RE_>.
-RE mkEps() { return make_shared<Eps>(Eps()); } // SUBTYPING
-
-RE mkLetter(char x) { return make_shared<Letter>(Letter(x)); }
-
-RE mkAlt(RE l, RE r) { return make_shared<Alt>(Alt(l,r)); }
-
-RE mkConc(RE l, RE r) { return make_shared<Conc>(Conc(l,r)); }
-
-RE mkStar(RE r) { return make_shared<Star>(Star(r)); }
-
-
-// Beispiele
-
-void test() {
-
- // (x + eps) . (y*)
- RE r = mkConc(mkAlt(mkLetter('x'), mkEps()), mkStar(mkLetter('y')));
-
- cout << "\n" << r->show();
-
- cout << "\n" << r->nullable();
-
-}
-
-
-
-int main() {
-
- test();
-
-}
-
-
-/*
-
-Beachte.
-
-TEMPLATE (siehe oben).
-
- shared_ptr sind "generisch" in dem zu verwaltenden Datentyp.
-
-OVERLOADING (siehe oben).
-
- left->show() entspricht (*left).show()
-
- left ist vom Typ RE, RE ist gleich shared_ptr<RE_>
-
- Der Dereferenzierungsoperator "*" ist überladen.
-
-SUBTYPING (siehe oben).
-
- make_shared<Eps>(Eps()) ist vom Typ shared_ptr<Eps>
-
- Rückgabetyp ist shared_ptr<RE_>
-
- Beachte, shared_ptr<RE_> ist verschieden von shared_ptr<Eps>!
- Wieso ist der Programmtext Typkorrekt!?
-
- Es gilt hier Ko-variantes Subtyping.
-
- 1. Subtyping zwischen Basistype
-
- Aus der Klassendeklaration leiten wir ab Eps <= RE_
- D.h ein Objekt der Klasse Eps ist auch
- vom Typ RE_
-
-
- 2. Subtyping zwischen Typen(konstruktoren) mit Typargument, z.B. shared_ptr<....>
-
- shared_ptr<Eps> <= shared_ptr<RE_> weil Eps <= RE_
-
- D.h. Subtyping Relation zwischen shared_ptr Instanzen richtet sich nach
- der Subtyping Relation zwischen den zugrunde liegenden Basistypen.
-
- Die Richtung von "<=" bleibt erhalten. Die Subtype Relation verhaelt sich hier ko-variant.
-
- Gibt es auch kontra-variant?
- Ja, betrachte Coercive Subtyping Beispiel.
-
-int func(float x) {
- if (x < 1.0) return 1;
- return 0;
-}
-
-
-In mathematischer Schreibweise, func hat den Typ float -> int.
-Es gilt aber auch
-
- float -> int <= int -> float
-
- weil int <= float
-
- In diesem Fall verhaelt sich Subtyping Kontra-variant im Argument und Ko-variant im Resultat.
-
-
-
- */
// g++ --std=c++11
-
-// Beispiel regulärer Ausdrücke.
-//
-// R ::= x | R + R | R . R | R* | (R)
-//
-// wobei x ein KLEIN Buchstaben ist.
-
-
-
-#include <iostream>
-#include <string>
-#include <memory>
-#include <vector>
-using namespace std;
-
-
-// AST Darstellung.
-
-class RE_ {
-public:
- virtual string show() = 0; // Quasi eine pretty-print Methode.
-};
-
-// Zwecks virtueller Methodenauswahl, müssen Objekte via Referenz erreichbar sein.
-// Wir verwenden "smart pointers".
-typedef shared_ptr<RE_> RE; // TEMPLATE
-
-
-// Epsilon, the empty word.
-class Eps : public RE_ {
-public:
- string show() { return "eps"; }
-};
-
-// Alphabet symbols = letters.
-class Letter : public RE_ {
- char x;
-public:
- Letter(char y) { x = y; }
- string show() { return string(1,x); }
-};
-
-// Alternatives.
-class Alt : public RE_ {
- RE left;
- RE right;
-public:
- Alt(RE l, RE r) { left = l; right = r; }
- string show() {
- string s("(");
- s.append(left->show()); // OVERLOADING
- s.append("+");
- s.append(right->show());
- s.append(")");
- return s;
- }
-};
-
-
-// Concatenation.
-class Conc : public RE_ {
- RE left;
- RE right;
-public:
- Conc(RE l, RE r) { left = l; right = r; }
- string show() {
- string s("(");
- s.append(left->show());
- s.append(".");
- s.append(right->show());
- s.append(")");
- return s;
- }
-};
-
-// Kleene star.
-class Star : public RE_ {
- RE r;
-public:
- Star(RE s) { r = s; }
- string show() {
- string s("(");
- s.append(r->show());
- s.append("*)");
- return s;
- }
-};
-
-// Helpers
-
-// Point to note.
-// Co-variant subtyping here!
-// shared_ptr<Eps> <= shared_part<RE_>.
-RE mkEps() { return make_shared<Eps>(Eps()); } // SUBTYPING
-
-RE mkLetter(char x) { return make_shared<Letter>(Letter(x)); }
-
-RE mkAlt(RE l, RE r) { return make_shared<Alt>(Alt(l,r)); }
-
-RE mkConc(RE l, RE r) { return make_shared<Conc>(Conc(l,r)); }
-
-RE mkStar(RE r) { return make_shared<Star>(Star(r)); }
-
-
-/////////////////////////////////////
-// Tokenizer
-
-typedef enum {
- EOS, // End of string
- LETTER,
- OPEN,
- CLOSE,
- CONC,
- ALT,
- KLEENE,
-} Token_t;
-
-
-class Token {
-public:
- Token() {}
- Token(Token_t tt) {
- kind = tt;
- }
- Token(Token_t tt, char v) {
- kind = tt; val = v;
- }
- bool eos() {
- return kind == EOS;
- }
- Token_t kind;
- char val;
-} ;
-
-
-
-class Tokenize {
- string s;
- int pos;
-public:
- Tokenize(string s) {
- this->s = s;
- pos = 0;
- }
-
- Token next() {
- if(s.length() <= pos)
- return Token(EOS);
-
- while(1) {
-
- if(s.length() <= pos)
- return Token(EOS);
-
- switch(s[pos]) {
- case '(': pos++;
- return Token(OPEN);
- case ')': pos++;
- return Token(CLOSE);
- case '+': pos++;
- return Token(ALT);
- case '.': pos++;
- return Token(CONC);
- case '*': pos++;
- return Token(KLEENE);
- case ' ':
- pos++;
- break;
- default: char ch = s[pos];
- if(ch >= 'a' && ch <= 'z') {
- pos++;
- return Token(LETTER, ch);
- } else {
- cout << "Unerlaubtes Symbol";
- exit(1);
- }
- }
- }
- } // next
-
- vector<Token> scan() {
- vector<Token> v;
- Token t;
-
- do {
- t = next();
- v.push_back(t);
- }
- while(! t.eos());
-
- return v;
- } // scan
-};
-
-// Wrapper class, provide the (current) token.
-class Tokenizer : Tokenize {
-public:
- Token token;
- Tokenizer(string s) : Tokenize(s) { token = next(); }
- void nextToken() {
- token = next();
- }
-};
-
-
-/////////////////////////////////////
-// Top-Down Parser fuer
-//
-// R ::= x | R + R | R . R | R* | (R)
-
-// R ::= T R2
-// R2 ::= . T R2 |
-// T ::= F T2
-// T2 ::= + F T2 |
-// F ::= x | x* | (R) | (R)*
-
-// Beachte.
-// "+" bindet staerker als "."
-// "Kleene star" wird betrachtet im Fall von "letter" und geklammerten Ausdruecken.
-// Deshalb bindet "*" staerker als die anderen Operatoren.
-
-
-// Newer C++ versions come with "optional".
-// We simply build our own and follow the original idea as found in Haskell's Maybe
-template<typename T>
-class Optional {
- bool b;
- T val;
-public:
- Optional() : b(false) {}
- Optional(T v) : val(v), b(true) {}
- bool isJust() { return b; }
- bool isNothing() { return !b; }
- T fromJust() { return val; }
-};
-
-template<typename T>
-Optional<T> nothing() {
- return Optional<T>();
-}
-
-template<typename T>
-Optional<T> just(T v) {
- return Optional<T>(v);
-}
-
-
-// Functional style top-down parser.
-
-Optional<RE> parseR(Tokenizer &t);
-Optional<RE> parseR2(Tokenizer &t, RE left);
-Optional<RE> parseT(Tokenizer &t);
-Optional<RE> parseT2(Tokenizer &t, RE left);
-Optional<RE> parseF(Tokenizer &t);
-
-
-// R ::= T R2
-Optional<RE> parseR(Tokenizer &t) {
- Optional<RE> left = parseT(t);
- if(left.isNothing())
- return left;
-
- return parseR2(t,left.fromJust());
-}
-
-// R2 ::= . T R2 |
-Optional<RE> parseR2(Tokenizer &t, RE left) {
-
- if(t.token.kind == CONC) {
- t.nextToken();
-
- Optional<RE> right = parseT(t);
- if(right.isNothing())
- return right;
-
- return parseR2(t, mkConc(left, right.fromJust()));
- }
-
- return just<RE>(left);
-}
-
-// T ::= F T2
-Optional<RE> parseT(Tokenizer &t) {
- Optional<RE> left = parseF(t);
- if(left.isNothing())
- return left;
-
- return parseT2(t,left.fromJust());
-}
-
-// T2 ::= + F T2 |
-Optional<RE> parseT2(Tokenizer &t, RE left) {
-
- if(t.token.kind == ALT) {
- t.nextToken();
-
- Optional<RE> right = parseF(t);
- if(right.isNothing())
- return right;
-
- return parseT2(t, mkAlt(left, right.fromJust()));
- }
-
- return just<RE>(left);
-}
-
-// F ::= x | x* | (R) | (R)*
-Optional<RE> parseF(Tokenizer &t) {
- switch(t.token.kind) {
- case LETTER: {
- char ch = t.token.val;
- t.nextToken();
-
- if(t.token.kind == KLEENE) {
- t.nextToken();
- return just<RE>(mkStar(mkLetter(ch)));
- }
- return just<RE>(mkLetter(ch));
- }
- case OPEN: {
- t.nextToken();
- Optional<RE> re = parseR(t);
- if (re.isNothing())
- return re;
-
- if (t.token.kind != CLOSE)
- return nothing<RE>();
-
- t.nextToken();
-
- if(t.token.kind == KLEENE) {
- t.nextToken();
- re = mkStar(re.fromJust());
- }
- return re;
- }
-
- default:
- return nothing<RE>();
- } // switch
-}
-
-
-
-// Beispiele
-
-
-void display(Optional<RE> r) {
- if(r.isNothing()) {
- cout << "nothing \n";
- } else {
- cout << (r.fromJust())->show() << "\n";
- }
- return;
-}
-
-
-void parse(string s) {
- Tokenizer t = Tokenizer(s);
-
- auto r = parseR(t);
-
- // Whole input shall be consumed.
- if (t.token.kind == EOS) {
- display(r);
- }
- else {
- display(nothing<RE>());
- }
-
-}
-
-
-void test() {
-
- // (x + eps) . (y*)
- RE r = mkConc(mkAlt(mkLetter('x'), mkEps()), mkStar(mkLetter('y')));
-
- cout << "\n" << r->show();
-
-}
-
-
-
-int main() {
-
- parse("x.y+z");
- parse("x* + y*");
- parse("x + y *");
- parse("(x+y)*");
-
- parse("x x");
-
-}