From 0954edacf3a66e1ef62668549ac1f46d32e7a770 Mon Sep 17 00:00:00 2001 From: sulzmann Date: Tue, 28 Nov 2023 15:56:43 +0100 Subject: [PATCH] Delete lec-cpp-advanced-syntax.html --- lec-cpp-advanced-syntax.html | 744 ----------------------------------- 1 file changed, 744 deletions(-) delete mode 100644 lec-cpp-advanced-syntax.html diff --git a/lec-cpp-advanced-syntax.html b/lec-cpp-advanced-syntax.html deleted file mode 100644 index 3ce3071..0000000 --- a/lec-cpp-advanced-syntax.html +++ /dev/null @@ -1,744 +0,0 @@ - - - - - - - - - Syntax Analyse in C++ - - - - - -
-

Syntax Analyse in C++

-

-Martin Sulzmann -

-
-
-

Syntax Analyse

-

Darstellung von Syntax

-

Konkrete versus abstrakte Syntax.

-

Darstellung in C++?

-

Parsing

-

Allgemeiner Parser. Siehe Cocke–Younger–Kasami -algorithm.

-

Kubische Komplexität.

-

Wir verwenden Top-Down -Parsing.

-

Deterministisches parsen in linearer Zeit.

-

Am Beispiel von reguläre Ausdrücken.

-

Weitere Beispiele und Details siehe hier.

-
-
-

Reguläre Ausdrücke

-
R ::= x           // Alphabet symbol
-    | R + R       // Alternative
-    | R . R       // Konkatenation
-    | R*          // Kleensche Hülle
-    | (R)         // Klammerung
-

beschrieben durch eine kontext-freie Grammatik in EBNF.

-

Beispiele

-
x*
-
-(x*) . (y*)
-
-(x + y) . z
-

Beachte:

- -

Darstellung in C++

-

Wir unterscheiden zwischen konkreter und abstrakter -Syntax Darstellung.

-

Wir verwenden

- -

Unten-stehender Programmcode verwendet

- -
// Polymorphie am Beispiel  regulärer Ausdrücke.
-//
-// R ::= x | R + R | R . R | R* | (R)
-//
-// 1. Templates zwecks Emulation von parametrischer Polymorphie (aka "generics")
-// 2. Overloading
-// 3. Subtyping
-
-
-
-#include <iostream>
-#include <string>
-#include <memory>
-using namespace std;
-
-
-class RE_ {
-public:
-  virtual string show() = 0;     // Quasi eine pretty-print Methode.
-  virtual bool nullable() = 0;   // Ueberprueft, ob der regulaere Ausdruck,
-                                 // das leere Wort enthaelt.
-
-};
-
-// Zwecks virtueller Methodenauswahl, müssen Objekte via Referenz erreichbar sein.
-// Wir verwenden "smart pointers".
-typedef shared_ptr<RE_> RE;                            // TEMPLATE
-
-
-// Epsilon, the empty word.
-class Eps : public RE_ {
-public:
-  string show() { return "eps"; }
-  bool nullable() { return true; }
-};
-
-// Alphabet symbols = letters.
-class Letter : public RE_ {
-  char x;
-public:
-  Letter(char y) { x = y; }
-  string show() { return string(1,x); }
-  bool nullable() { return false; }
-};
-
-// Alternatives.
-class Alt : public RE_ {
-  RE left;
-  RE right;
-public:
-  Alt(RE l, RE r) { left = l; right = r; }
-  string show() {
-    string s("(");
-    s.append(left->show());                          // OVERLOADING
-    s.append("+");
-    s.append(right->show());
-    s.append(")");
-    return s;
-  }
-  bool nullable() { return (left->nullable() || right->nullable()); }
-};
-
-
-// Concatenation.
-class Conc : public RE_ {
-  RE left;
-  RE right;
-public:
-  Conc(RE l, RE r) { left = l; right = r; }
-  string show() {
-    string s("(");
-    s.append(left->show());
-    s.append(".");
-    s.append(right->show());
-    s.append(")");
-    return s;
-  }
-  bool nullable() { return (left->nullable() && right->nullable()); }
-};
-
-// Kleene star.
-class Star : public RE_ {
-  RE r;
-public:
-  Star(RE s) { r = s; }
-  string show() {
-    string s("(");
-    s.append(r->show());
-    s.append("*)");
-    return s;
-  }
-  bool nullable() { return true; }
-};
-
-// Helpers
-
-// Point to note.
-// Co-variant subtyping here!
-// shared_ptr<Eps> <= shared_part<RE_>.
-RE mkEps() { return make_shared<Eps>(Eps()); }                     // SUBTYPING
-
-RE mkLetter(char x) { return make_shared<Letter>(Letter(x)); }
-
-RE mkAlt(RE l, RE r) { return make_shared<Alt>(Alt(l,r)); }
-
-RE mkConc(RE l, RE r) { return make_shared<Conc>(Conc(l,r)); }
-
-RE mkStar(RE r) { return make_shared<Star>(Star(r)); }
-
-
-// Beispiele
-
-void test() {
-
-  // (x + eps) . (y*)
-  RE r = mkConc(mkAlt(mkLetter('x'), mkEps()), mkStar(mkLetter('y')));
-
-  cout << "\n" << r->show();
-
-  cout << "\n" << r->nullable();
-
-}
-
-
-
-int main() {
-
-  test();
-
-}
-
-
-/*
-
-Beachte.
-
-TEMPLATE (siehe oben).
-
-     shared_ptr sind "generisch" in dem zu verwaltenden Datentyp.
-
-OVERLOADING (siehe oben).
-
-      left->show() entspricht   (*left).show()
-
-      left ist vom Typ RE, RE ist gleich shared_ptr<RE_>
-
-      Der Dereferenzierungsoperator "*" ist überladen.
-
-SUBTYPING (siehe oben).
-
-      make_shared<Eps>(Eps())  ist vom Typ   shared_ptr<Eps>
-
-      Rückgabetyp ist shared_ptr<RE_>
-
-      Beachte,  shared_ptr<RE_> ist verschieden von shared_ptr<Eps>!
-      Wieso ist der Programmtext Typkorrekt!?
-
-     Es gilt hier Ko-variantes Subtyping.
-
-      1. Subtyping zwischen Basistype
-
-      Aus der Klassendeklaration leiten wir ab  Eps <= RE_
-      D.h ein Objekt der Klasse Eps ist auch
-      vom Typ RE_
-
-
-     2. Subtyping zwischen Typen(konstruktoren) mit Typargument, z.B. shared_ptr<....>
-
-       shared_ptr<Eps> <= shared_ptr<RE_>  weil Eps <= RE_
-
-       D.h. Subtyping Relation zwischen shared_ptr Instanzen richtet sich nach
-       der Subtyping Relation zwischen den zugrunde liegenden Basistypen.
-
-       Die Richtung von "<=" bleibt erhalten. Die Subtype Relation verhaelt sich hier ko-variant.
-
-       Gibt es auch kontra-variant?
-       Ja, betrachte Coercive Subtyping Beispiel.
-
-int func(float x) {
-    if (x < 1.0) return 1;
-    return 0;
-}
-
-
-In mathematischer Schreibweise, func hat den Typ float -> int.
-Es gilt aber auch
-
-  float -> int  <=   int -> float
-
- weil int <= float
-
-  In diesem Fall verhaelt sich Subtyping Kontra-variant im Argument und Ko-variant im Resultat.
-
-
-
- */
-
-
-

Top-down parser für reguläre Ausdrücke

-
// g++ --std=c++11
-
-// Beispiel  regulärer Ausdrücke.
-//
-// R ::= x | R + R | R . R | R* | (R)
-//
-//   wobei x ein KLEIN Buchstaben ist.
-
-
-
-#include <iostream>
-#include <string>
-#include <memory>
-#include <vector>
-using namespace std;
-
-
-// AST Darstellung.
-
-class RE_ {
-public:
-  virtual string show() = 0;     // Quasi eine pretty-print Methode.
-};
-
-// Zwecks virtueller Methodenauswahl, müssen Objekte via Referenz erreichbar sein.
-// Wir verwenden "smart pointers".
-typedef shared_ptr<RE_> RE;                            // TEMPLATE
-
-
-// Epsilon, the empty word.
-class Eps : public RE_ {
-public:
-  string show() { return "eps"; }
-};
-
-// Alphabet symbols = letters.
-class Letter : public RE_ {
-  char x;
-public:
-  Letter(char y) { x = y; }
-  string show() { return string(1,x); }
-};
-
-// Alternatives.
-class Alt : public RE_ {
-  RE left;
-  RE right;
-public:
-  Alt(RE l, RE r) { left = l; right = r; }
-  string show() {
-    string s("(");
-    s.append(left->show());                          // OVERLOADING
-    s.append("+");
-    s.append(right->show());
-    s.append(")");
-    return s;
-  }
-};
-
-
-// Concatenation.
-class Conc : public RE_ {
-  RE left;
-  RE right;
-public:
-  Conc(RE l, RE r) { left = l; right = r; }
-  string show() {
-    string s("(");
-    s.append(left->show());
-    s.append(".");
-    s.append(right->show());
-    s.append(")");
-    return s;
-  }
-};
-
-// Kleene star.
-class Star : public RE_ {
-  RE r;
-public:
-  Star(RE s) { r = s; }
-  string show() {
-    string s("(");
-    s.append(r->show());
-    s.append("*)");
-    return s;
-  }
-};
-
-// Helpers
-
-// Point to note.
-// Co-variant subtyping here!
-// shared_ptr<Eps> <= shared_part<RE_>.
-RE mkEps() { return make_shared<Eps>(Eps()); }                     // SUBTYPING
-
-RE mkLetter(char x) { return make_shared<Letter>(Letter(x)); }
-
-RE mkAlt(RE l, RE r) { return make_shared<Alt>(Alt(l,r)); }
-
-RE mkConc(RE l, RE r) { return make_shared<Conc>(Conc(l,r)); }
-
-RE mkStar(RE r) { return make_shared<Star>(Star(r)); }
-
-
-/////////////////////////////////////
-// Tokenizer
-
-typedef enum {
-  EOS,           // End of string
-  LETTER,
-  OPEN,
-  CLOSE,
-  CONC,
-  ALT,
-  KLEENE,
-} Token_t;
-
-
-class Token {
-public:
-  Token() {}
-  Token(Token_t tt) {
-    kind = tt;
-  }
-  Token(Token_t tt, char v) {
-    kind = tt; val = v;
-  }
-  bool eos() {
-    return kind == EOS;
-  }
-  Token_t kind;
-  char val;
-} ;
-
-
-
-class Tokenize {
-  string s;
-  int pos;
-public:
-  Tokenize(string s) {
-         this->s = s;
-     pos = 0;
-  }
-
-  Token next() {
-    if(s.length() <= pos)
-      return Token(EOS);
-
-    while(1) {
-
-       if(s.length() <= pos)
-         return Token(EOS);
-
-       switch(s[pos]) {
-       case '(': pos++;
-     return Token(OPEN);
-       case ')': pos++;
-     return Token(CLOSE);
-       case '+': pos++;
-     return Token(ALT);
-       case '.': pos++;
-     return Token(CONC);
-       case '*': pos++;
-     return Token(KLEENE);
-       case ' ':
-     pos++;
-     break;
-       default:  char ch = s[pos];
-             if(ch >= 'a' && ch <= 'z') {
-                     pos++;
-             return Token(LETTER, ch);
-         } else {
-           cout << "Unerlaubtes Symbol";
-           exit(1);
-         }
-       }
-    }
-  } // next
-
-  vector<Token> scan() {
-    vector<Token> v;
-    Token t;
-
-    do {
-      t = next();
-      v.push_back(t);
-    }
-    while(! t.eos());
-
-    return v;
-  } // scan
-};
-
-// Wrapper class, provide the (current) token.
-class Tokenizer : Tokenize {
-public:
-  Token token;
-  Tokenizer(string s) : Tokenize(s) { token = next(); }
-  void nextToken() {
-    token = next();
-  }
-};
-
-
-/////////////////////////////////////
-// Top-Down Parser fuer
-//
-//  R ::= x | R + R | R . R | R* | (R)
-
-//  R ::= T R2
-//  R2 ::= . T R2 |
-//  T ::= F T2
-//  T2 ::= + F T2 |
-//  F ::= x | x* | (R) | (R)*
-
-// Beachte.
-// "+" bindet staerker als "."
-// "Kleene star" wird betrachtet im Fall von "letter" und geklammerten Ausdruecken.
-// Deshalb bindet "*" staerker als die anderen Operatoren.
-
-
-// Newer C++ versions come with "optional".
-// We simply build our own and follow the original idea as found in Haskell's Maybe
-template<typename T>
-class Optional {
-  bool b;
-  T val;
-public:
-  Optional() : b(false) {}
-  Optional(T v) : val(v), b(true) {}
-  bool isJust() { return b; }
-  bool isNothing() { return !b; }
-  T fromJust() { return val; }
-};
-
-template<typename T>
-Optional<T> nothing() {
-  return Optional<T>();
-}
-
-template<typename T>
-Optional<T> just(T v) {
-  return Optional<T>(v);
-}
-
-
-// Functional style top-down parser.
-
-Optional<RE> parseR(Tokenizer &t);
-Optional<RE> parseR2(Tokenizer &t, RE left);
-Optional<RE> parseT(Tokenizer &t);
-Optional<RE> parseT2(Tokenizer &t, RE left);
-Optional<RE> parseF(Tokenizer &t);
-
-
-//  R ::= T R2
-Optional<RE> parseR(Tokenizer &t) {
-  Optional<RE> left = parseT(t);
-  if(left.isNothing())
-    return left;
-
-  return parseR2(t,left.fromJust());
-}
-
-//  R2 ::= . T R2 |
-Optional<RE> parseR2(Tokenizer &t, RE left) {
-
-    if(t.token.kind == CONC) {
-        t.nextToken();
-
-    Optional<RE> right = parseT(t);
-    if(right.isNothing())
-      return right;
-
-    return parseR2(t, mkConc(left, right.fromJust()));
-    }
-
-    return just<RE>(left);
-}
-
-//  T ::= F T2
-Optional<RE> parseT(Tokenizer &t) {
-  Optional<RE> left = parseF(t);
-  if(left.isNothing())
-    return left;
-
-  return parseT2(t,left.fromJust());
-}
-
-// T2 ::= + F T2 |
-Optional<RE> parseT2(Tokenizer &t, RE left) {
-
-    if(t.token.kind == ALT) {
-        t.nextToken();
-
-    Optional<RE> right = parseF(t);
-    if(right.isNothing())
-      return right;
-
-    return parseT2(t, mkAlt(left, right.fromJust()));
-    }
-
-    return just<RE>(left);
-}
-
-//  F ::= x | x* | (R) | (R)*
-Optional<RE> parseF(Tokenizer &t) {
-  switch(t.token.kind) {
-  case LETTER: {
-    char ch = t.token.val;
-    t.nextToken();
-
-    if(t.token.kind == KLEENE) {
-      t.nextToken();
-      return just<RE>(mkStar(mkLetter(ch)));
-    }
-    return just<RE>(mkLetter(ch));
-  }
-  case OPEN: {
-    t.nextToken();
-    Optional<RE> re = parseR(t);
-    if (re.isNothing())
-      return re;
-
-    if (t.token.kind != CLOSE)
-      return nothing<RE>();
-
-    t.nextToken();
-
-    if(t.token.kind == KLEENE) {
-      t.nextToken();
-      re = mkStar(re.fromJust());
-    }
-    return re;
-  }
-
-  default:
-    return nothing<RE>();
-  } // switch
-}
-
-
-
-// Beispiele
-
-
-void display(Optional<RE> r) {
-  if(r.isNothing()) {
-    cout << "nothing \n";
-  } else {
-    cout << (r.fromJust())->show() << "\n";
-  }
-  return;
-}
-
-
-void parse(string s) {
-  Tokenizer t = Tokenizer(s);
-
-  auto r = parseR(t);
-
-  // Whole input shall be consumed.
-  if (t.token.kind == EOS) {
-    display(r);
-    }
-  else {
-    display(nothing<RE>());
-  }
-
-}
-
-
-void test() {
-
-  // (x + eps) . (y*)
-  RE r = mkConc(mkAlt(mkLetter('x'), mkEps()), mkStar(mkLetter('y')));
-
-  cout << "\n" << r->show();
-
-}
-
-
-
-int main() {
-
-  parse("x.y+z");
-  parse("x* + y*");
-  parse("x + y *");
-  parse("(x+y)*");
-
-  parse("x x");
-
-}
-
- -