atualizando materiais de aula

if688 · Nov 28, 2023 · 0842cc5 · 0842cc5
1 parent 7d47244
commit 0842cc5
Show file tree

Hide file tree

Showing 27 changed files with 1,146 additions and 4 deletions.
diff --git a/2023-11-23.md b/2023-11-23.md
@@ -0,0 +1,31 @@
+# IF688 - Teoria e Implementação de Linguagens Computacionais
+
+## Introdução a Análise Semântica e _Abstract Syntax Trees_ 
+
+### Objetivo
+
+O objetivo desta aula é introduzir o conceito de análise semântica e apresentar árvores sintáticas abstratas (_abstract syntax trees_ - AST). 
+
+### Questões para Discussão
+
+- Quais as limitações de gramáticas livres de contexto?
+- Que tipo de erro ou problema não pode ser capturado pelas fases de análise léxica e sintática?
+- Qual a diferença entre árvores sintáticas concretas e abstratas?
+
+### Material usado em aula
+
+- [Slides (pdf)](https://drive.google.com/file/d/1aod-7wnQcyC3SQal8vdmguik3tRij2rA/view?usp=drive_web&authuser=0)
+- Código desenvolvido em sala de aula
+  - [versão para o início da aula (.zip com código incompleto)](https://drive.google.com/file/d/1O9EOYKHKFuTX7nPsNegCXYZKSGc0cHaK/view)
+  - [versão do código ao final da aula (.zip com código escrito durante a aula)](https://drive.google.com/file/d/1DeIfQtvuWHdZruoaJ7JJ-EFU2Mg7OQFG/view)
+
+### Vídeos
+
+- [Análise Semântica - Visão Geral](https://www.youtube.com/watch?v=VvLdrq-CKiI&list=PLHoVp5NAbKJYc5sSNRfOOjjxdp-WgJ8M4)
+- [Análise Semântica - Introdução a ASTs](https://www.youtube.com/watch?v=Wz4TSKOrBrM&list=PLHoVp5NAbKJYc5sSNRfOOjjxdp-WgJ8M4&index=5)
+
+### Links Relacionados
+
+- [Semantic Analysis](https://en.wikipedia.org/wiki/Semantic_analysis_(compilers))
+- [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
+- [Abstract Syntax Tree Implementation Idioms, by Joel Jones](http://www.hillside.net/plop/plop2003/Papers/Jones-ImplementingASTs.pdf)
diff --git a/2023-11-23/ast/1.txt b/2023-11-23/ast/1.txt
@@ -0,0 +1,2 @@
+42+(57+22);
+((12+3)+((4+5)+(7)));
diff --git a/2023-11-23/ast/2.txt b/2023-11-23/ast/2.txt
@@ -0,0 +1,7 @@
+22+4*10+(3+1);
+15+(12); 
+((((((15))+1+2)+3)));
+2*3;
+4/2;
+1+2;
+1-1;
diff --git a/2023-11-23/ast/3.txt b/2023-11-23/ast/3.txt
@@ -0,0 +1,3 @@
+1 + 3 - 4 * 5;
+1 + 3 - 4 * x;
+w + y - z * x;
diff --git a/2023-11-23/ast/astnodes.py b/2023-11-23/ast/astnodes.py
@@ -0,0 +1,42 @@
+class Expr(object):
+    pass
+
+class NumExpr(Expr):
+    def __init__(self, valor):
+        self.valor = valor
+    def __str__(self):
+        return "NumExpr("+str(self.valor)+")"
+
+class IdExpr(Expr):
+    def __init__(self, nome):
+        self.nome = nome
+    def __str__(self):
+        return "IdExpr("+str(self.valor)+")"
+
+class SumExpr(Expr):
+    def __init__(self, e_esq, e_dir):
+        self.esq = e_esq
+        self.dir = e_dir
+    def __str__(self):
+        return "SumExpr("+str(self.esq)+","+str(self.dir)+")"
+
+class SubExpr(Expr):
+    def __init__(self, e_esq, e_dir):
+        self.esq = e_esq
+        self.dir = e_dir
+    def __str__(self):
+        return "SubExpr("+str(self.esq)+","+str(self.dir)+")"
+
+class MulExpr(Expr):
+    def __init__(self, e_esq, e_dir):
+        self.esq = e_esq
+        self.dir = e_dir
+    def __str__(self):
+        return "MulExpr("+str(self.esq)+","+str(self.dir)+")"
+
+class DivExpr(Expr):
+    def __init__(self, e_esq, e_dir):
+        self.esq = e_esq
+        self.dir = e_dir
+    def __str__(self):
+        return "DivExpr("+str(self.esq)+","+str(self.dir)+")"
diff --git a/2023-11-23/ast/astvisitor.py b/2023-11-23/ast/astvisitor.py
@@ -0,0 +1,9 @@
+class NodeVisitor(object):
+    def visit(self, node):
+        method_name = 'visit_' + type(node).__name__
+        visitor = getattr(self, method_name, self.generic_visit)
+        return visitor(node)
+
+    def generic_visit(self, node):
+        raise Exception('No visit_{} method'.format(type(node).__name__))
+
diff --git a/2023-11-23/ast/lexer.py b/2023-11-23/ast/lexer.py
@@ -0,0 +1,165 @@
+import enum
+import sys
+
+class Lexer:
+    def __init__(self, input):
+        self.source = input + '\n' #código-fonte (entrada)
+        self.curChar = '' #caractere atual dentro do código-fonte
+        self.curPos = -1
+        self.nextChar()
+        pass
+
+    # Processa o proximo caractere
+    def nextChar(self):
+        self.curPos = self.curPos + 1
+        if self.curPos >= len(self.source):
+            self.curChar = '\0' #EOF
+        else:
+            self.curChar = self.source[self.curPos]
+
+    # Retorna o caractere seguinte (ainda não lido).
+    def peek(self):
+        if self.curPos+1 >= len(self.source):
+            return '\0'
+        else: 
+            return self.source[self.curPos+1]
+
+    # Token inválido encontrado, método usado para imprimir mensagem de erro e encerrar.
+    def abort(self, message):
+        sys.exit("Erro léxico! " + message)
+
+    # Pular espaço em branco, exceto novas linhas, que são usadas como separadores.
+    def skipWhitespace(self):
+        while self.curChar == ' ' or self.curChar == '\n' or self.curChar == '\t' or self.curChar == '\r':
+            self.nextChar()
+
+    # Pular comentários.
+    def skipComment(self):
+        if self.curChar=='#':
+            while self.curChar != '\n':
+                self.nextChar()
+
+    # Return o próximo token.
+    def getToken(self):
+        self.skipWhitespace()
+        self.skipComment()
+        token = None
+        if self.curChar == '+':
+            token = Token(self.curChar, TokenType.PLUS)
+        elif self.curChar == '-':
+            token = Token(self.curChar, TokenType.MINUS)
+        elif self.curChar == '*':
+            token = Token(self.curChar, TokenType.ASTERISK)
+        elif self.curChar == '/':
+            token = Token(self.curChar, TokenType.SLASH)
+        elif self.curChar == '(':
+            token = Token(self.curChar, TokenType.L_PAREN)
+        elif self.curChar == ')':
+            token = Token(self.curChar, TokenType.R_PAREN)
+        elif self.curChar == ';':
+            token = Token(self.curChar, TokenType.SEMICOLON)
+        elif self.curChar == '\0':
+            token = Token(self.curChar, TokenType.EOF)
+        #se for = EQ, se for == EQEQ
+        elif self.curChar == '=':
+            if self.peek() == '=':
+                c = self.curChar
+                self.nextChar()
+                token = Token(c + self.curChar, TokenType.EQEQ)
+            else: 
+                token = Token(self.curChar, TokenType.EQ)
+        elif self.curChar == '!':
+            if self.peek() == '=':
+                c = self.curChar
+                self.nextChar()
+                token = Token(c + self.curChar, TokenType.NOTEQ)
+            else: 
+                self.abort("Esperava o símbolo de = e recebeu "+self.peek())
+        elif self.curChar == '>':
+            if self.peek() == '=':
+                c = self.curChar
+                self.nextChar()
+                token = Token(c + self.curChar, TokenType.GTEQ)
+            else: 
+                token = Token(self.curChar, TokenType.GT)
+        elif self.curChar == '<':
+            if self.peek() == '=':
+                c = self.curChar
+                self.nextChar()
+                token = Token(c + self.curChar, TokenType.LTEQ)
+            else: 
+                token = Token(self.curChar, TokenType.LT)
+        elif self.curChar == '\"':
+            self.nextChar()
+            startPos = self.curPos
+            while self.curChar != '\"':
+                if self.curChar == '\\' or self.curChar == '\t' or self.curChar == '\r'  or self.curChar == '%':
+                    self.abort("Caractere ilegal dentro de uma string")
+                self.nextChar()
+            stringText = self.source[startPos : self.curPos]
+            token = Token(stringText, TokenType.STRING)
+        elif self.curChar.isdigit():
+            startPos = self.curPos
+            while self.peek().isdigit():
+                self.nextChar()
+            if self.peek() == '.': #decimais
+                self.nextChar()
+                if not self.peek().isdigit():
+                    self.abort("Caractere ilegal dentro de um número: "+ self.peek())
+                while self.peek().isdigit():
+                    self.nextChar()
+            number = self.source[startPos : self.curPos + 1]
+            token = Token(number, TokenType.NUMBER)
+        elif self.curChar.isalpha():
+            startPos = self.curPos
+            while self.peek().isalnum():
+                self.nextChar()
+            word = self.source[startPos : self.curPos + 1]
+            keyword = Token.checkIfKeyword(word)
+            if keyword == None:
+                token = Token(word, TokenType.IDENT)
+            else: 
+                token = Token(word, keyword)
+        else: 
+            #Token desconhecido
+            self.abort("Token desconhecido: "+self.curChar)
+
+        self.nextChar()
+        return token
+
+class Token:
+    def __init__(self, tokenText, tokenKind):
+        self.text = tokenText #lexema, a instância específica encontrada
+        self.kind = tokenKind # o tipo de token (TokenType) classificado
+
+    @staticmethod
+    def checkIfKeyword(word):
+        for kind in TokenType:
+            if kind.name == word.upper() and kind.value > 100 and kind.value < 200:
+                return kind
+        return None
+
+class TokenType(enum.Enum):
+    EOF = -1
+    NUMBER = 1
+    IDENT = 2
+    STRING = 3
+    SEMICOLON = 4
+    L_PAREN = 5
+    R_PAREN = 6
+    #PALAVRAS RESERVADAS
+    PRINT = 101
+    TRUE = 102
+    FALSE = 103
+    #OPERADORES
+    EQ = 201  
+    PLUS = 202
+    MINUS = 203
+    ASTERISK = 204
+    SLASH = 205
+    EQEQ = 206
+    NOTEQ = 207
+    LT = 208
+    LTEQ = 209
+    GT = 210
+    GTEQ = 211
diff --git a/2023-11-23/ast/main.py b/2023-11-23/ast/main.py
@@ -0,0 +1,17 @@
+from lexer import *
+from parse import *
+import sys
+
+def main(): 
+    if len(sys.argv) != 2:
+        sys.exit("Erro: Precisamos de um arquivo como argumento.")
+    with open(sys.argv[1], 'r') as inputFile:
+        input = inputFile.read()
+
+    lexer = Lexer(input)
+    parser = Parser(lexer)
+    exps = parser.parse()
+    for exp in exps: 
+        print(exp)
+
+main()
diff --git a/2023-11-23/ast/parse.py b/2023-11-23/ast/parse.py
@@ -0,0 +1,101 @@
+import sys
+from lexer import *
+from astnodes import *
+
+class Parser: 
+    def __init__(self, lexer):
+        self.lexer = lexer
+        self.curToken = None
+        self.peekToken = None
+        self.nextToken()
+        self.nextToken()
+
+    #Retorna true se o token **atual** casa com kind
+    def checkToken(self, kind):
+        return kind == self.curToken.kind
+
+    #Retorna true se o próximo token **(peek)** casa com kind
+    def checkPeek(self, kind):
+        return kind == self.peekToken.kind
+
+    def match(self, kind):
+        if not self.checkToken(kind):
+            self.abort("Esperava por " + kind.name + ", apareceu " + self.curToken.kind.name)
+        self.nextToken()
+
+    # Avançando com os ponteiros dos tokens (atual e peek)
+    def nextToken(self):
+        self.curToken = self.peekToken
+        self.peekToken = self.lexer.getToken()
+
+    def abort(self, msg):
+        sys.exit("Erro sintático: "+msg)
+
+    #     S' ::= S$
+    #     S ::= (E;)*
+    #     E ::= T (("+"|"-") T)*
+    #     T ::= F (("*"|"/") F)* 
+    #     F ::= num | id | "(" E ")"
+
+    #     S' ::= S$
+    def parse(self):
+        exps = self.S()
+        self.match(TokenType.EOF)
+        return exps 
+
+
+    # S ::= (E;)*
+    def S(self):
+        explist = []
+        while not(self.checkToken(TokenType.EOF)):
+            explist.append(self.E())
+            self.match(TokenType.SEMICOLON)
+        return explist
+
+    # E ::= T (("+"|"-") T)*
+    # E -> T
+    # E -> T + T
+    # E -> T - T
+    def E(self):
+        e = self.T()
+        while self.checkToken(TokenType.PLUS) or self.checkToken(TokenType.MINUS):
+            if self.checkToken(TokenType.PLUS):
+                self.match(TokenType.PLUS)
+                t = self.T()
+                e = SumExpr(e, t)
+            elif self.checkToken(TokenType.MINUS):
+                self.match(TokenType.MINUS)
+                t = self.T()
+                e = SubExpr(e, t)
+        return e
+    # T ::= F (("*"|"/") F)* 
+    def T(self):
+        e = self.F()
+        while self.checkToken(TokenType.ASTERISK) or self.checkToken(TokenType.SLASH):
+            if self.checkToken(TokenType.ASTERISK):
+                self.match(TokenType.ASTERISK)
+                f = self.F()
+                e = MulExpr(e, f)
+            elif self.checkToken(TokenType.SLASH):
+                self.match(TokenType.SLASH)
+                f = self.F()
+                e = DivExpr(e, f)
+        return e
+    # F ::= num | id | "(" E ")"
+    def F(self):
+        e = None
+        if self.checkToken(TokenType.NUMBER):
+            valorTokenAtual = self.curToken.text
+            valorNumerico = int(valorTokenAtual)
+            e = NumExpr(valorNumerico)
+            self.match(TokenType.NUMBER)
+        elif self.checkToken(TokenType.IDENT):
+            e = IdExpr(self.curToken.text)
+            self.match(TokenType.IDENT)
+        elif self.checkToken(TokenType.L_PAREN):
+            self.match(TokenType.L_PAREN)
+            e = self.E()
+            self.match(TokenType.R_PAREN)
+        else: 
+            self.abort("Token inesperado, esperava um número, um identificador, ou um abre parênteses, recebeu: " + self.curToken.text)
+        return e