-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7d47244
commit 0842cc5
Showing
27 changed files
with
1,146 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# IF688 - Teoria e Implementação de Linguagens Computacionais | ||
|
||
## Introdução a Análise Semântica e _Abstract Syntax Trees_ | ||
|
||
### Objetivo | ||
|
||
O objetivo desta aula é introduzir o conceito de análise semântica e apresentar árvores sintáticas abstratas (_abstract syntax trees_ - AST). | ||
|
||
### Questões para Discussão | ||
|
||
- Quais as limitações de gramáticas livres de contexto? | ||
- Que tipo de erro ou problema não pode ser capturado pelas fases de análise léxica e sintática? | ||
- Qual a diferença entre árvores sintáticas concretas e abstratas? | ||
|
||
### Material usado em aula | ||
|
||
- [Slides (pdf)](https://drive.google.com/file/d/1aod-7wnQcyC3SQal8vdmguik3tRij2rA/view?usp=drive_web&authuser=0) | ||
- Código desenvolvido em sala de aula | ||
- [versão para o início da aula (.zip com código incompleto)](https://drive.google.com/file/d/1O9EOYKHKFuTX7nPsNegCXYZKSGc0cHaK/view) | ||
- [versão do código ao final da aula (.zip com código escrito durante a aula)](https://drive.google.com/file/d/1DeIfQtvuWHdZruoaJ7JJ-EFU2Mg7OQFG/view) | ||
|
||
### Vídeos | ||
|
||
- [Análise Semântica - Visão Geral](https://www.youtube.com/watch?v=VvLdrq-CKiI&list=PLHoVp5NAbKJYc5sSNRfOOjjxdp-WgJ8M4) | ||
- [Análise Semântica - Introdução a ASTs](https://www.youtube.com/watch?v=Wz4TSKOrBrM&list=PLHoVp5NAbKJYc5sSNRfOOjjxdp-WgJ8M4&index=5) | ||
|
||
### Links Relacionados | ||
|
||
- [Semantic Analysis](https://en.wikipedia.org/wiki/Semantic_analysis_(compilers)) | ||
- [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) | ||
- [Abstract Syntax Tree Implementation Idioms, by Joel Jones](http://www.hillside.net/plop/plop2003/Papers/Jones-ImplementingASTs.pdf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
42+(57+22); | ||
((12+3)+((4+5)+(7))); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
22+4*10+(3+1); | ||
15+(12); | ||
((((((15))+1+2)+3))); | ||
2*3; | ||
4/2; | ||
1+2; | ||
1-1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
1 + 3 - 4 * 5; | ||
1 + 3 - 4 * x; | ||
w + y - z * x; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
class Expr(object): | ||
pass | ||
|
||
class NumExpr(Expr): | ||
def __init__(self, valor): | ||
self.valor = valor | ||
def __str__(self): | ||
return "NumExpr("+str(self.valor)+")" | ||
|
||
class IdExpr(Expr): | ||
def __init__(self, nome): | ||
self.nome = nome | ||
def __str__(self): | ||
return "IdExpr("+str(self.valor)+")" | ||
|
||
class SumExpr(Expr): | ||
def __init__(self, e_esq, e_dir): | ||
self.esq = e_esq | ||
self.dir = e_dir | ||
def __str__(self): | ||
return "SumExpr("+str(self.esq)+","+str(self.dir)+")" | ||
|
||
class SubExpr(Expr): | ||
def __init__(self, e_esq, e_dir): | ||
self.esq = e_esq | ||
self.dir = e_dir | ||
def __str__(self): | ||
return "SubExpr("+str(self.esq)+","+str(self.dir)+")" | ||
|
||
class MulExpr(Expr): | ||
def __init__(self, e_esq, e_dir): | ||
self.esq = e_esq | ||
self.dir = e_dir | ||
def __str__(self): | ||
return "MulExpr("+str(self.esq)+","+str(self.dir)+")" | ||
|
||
class DivExpr(Expr): | ||
def __init__(self, e_esq, e_dir): | ||
self.esq = e_esq | ||
self.dir = e_dir | ||
def __str__(self): | ||
return "DivExpr("+str(self.esq)+","+str(self.dir)+")" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
class NodeVisitor(object): | ||
def visit(self, node): | ||
method_name = 'visit_' + type(node).__name__ | ||
visitor = getattr(self, method_name, self.generic_visit) | ||
return visitor(node) | ||
|
||
def generic_visit(self, node): | ||
raise Exception('No visit_{} method'.format(type(node).__name__)) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
import enum | ||
import sys | ||
|
||
class Lexer: | ||
def __init__(self, input): | ||
self.source = input + '\n' #código-fonte (entrada) | ||
self.curChar = '' #caractere atual dentro do código-fonte | ||
self.curPos = -1 | ||
self.nextChar() | ||
pass | ||
|
||
# Processa o proximo caractere | ||
def nextChar(self): | ||
self.curPos = self.curPos + 1 | ||
if self.curPos >= len(self.source): | ||
self.curChar = '\0' #EOF | ||
else: | ||
self.curChar = self.source[self.curPos] | ||
|
||
# Retorna o caractere seguinte (ainda não lido). | ||
def peek(self): | ||
if self.curPos+1 >= len(self.source): | ||
return '\0' | ||
else: | ||
return self.source[self.curPos+1] | ||
|
||
# Token inválido encontrado, método usado para imprimir mensagem de erro e encerrar. | ||
def abort(self, message): | ||
sys.exit("Erro léxico! " + message) | ||
|
||
# Pular espaço em branco, exceto novas linhas, que são usadas como separadores. | ||
def skipWhitespace(self): | ||
while self.curChar == ' ' or self.curChar == '\n' or self.curChar == '\t' or self.curChar == '\r': | ||
self.nextChar() | ||
|
||
# Pular comentários. | ||
def skipComment(self): | ||
if self.curChar=='#': | ||
while self.curChar != '\n': | ||
self.nextChar() | ||
|
||
# Return o próximo token. | ||
def getToken(self): | ||
self.skipWhitespace() | ||
self.skipComment() | ||
token = None | ||
if self.curChar == '+': | ||
token = Token(self.curChar, TokenType.PLUS) | ||
elif self.curChar == '-': | ||
token = Token(self.curChar, TokenType.MINUS) | ||
elif self.curChar == '*': | ||
token = Token(self.curChar, TokenType.ASTERISK) | ||
elif self.curChar == '/': | ||
token = Token(self.curChar, TokenType.SLASH) | ||
elif self.curChar == '(': | ||
token = Token(self.curChar, TokenType.L_PAREN) | ||
elif self.curChar == ')': | ||
token = Token(self.curChar, TokenType.R_PAREN) | ||
elif self.curChar == ';': | ||
token = Token(self.curChar, TokenType.SEMICOLON) | ||
elif self.curChar == '\0': | ||
token = Token(self.curChar, TokenType.EOF) | ||
#se for = EQ, se for == EQEQ | ||
elif self.curChar == '=': | ||
if self.peek() == '=': | ||
c = self.curChar | ||
self.nextChar() | ||
token = Token(c + self.curChar, TokenType.EQEQ) | ||
else: | ||
token = Token(self.curChar, TokenType.EQ) | ||
elif self.curChar == '!': | ||
if self.peek() == '=': | ||
c = self.curChar | ||
self.nextChar() | ||
token = Token(c + self.curChar, TokenType.NOTEQ) | ||
else: | ||
self.abort("Esperava o símbolo de = e recebeu "+self.peek()) | ||
elif self.curChar == '>': | ||
if self.peek() == '=': | ||
c = self.curChar | ||
self.nextChar() | ||
token = Token(c + self.curChar, TokenType.GTEQ) | ||
else: | ||
token = Token(self.curChar, TokenType.GT) | ||
elif self.curChar == '<': | ||
if self.peek() == '=': | ||
c = self.curChar | ||
self.nextChar() | ||
token = Token(c + self.curChar, TokenType.LTEQ) | ||
else: | ||
token = Token(self.curChar, TokenType.LT) | ||
elif self.curChar == '\"': | ||
self.nextChar() | ||
startPos = self.curPos | ||
while self.curChar != '\"': | ||
if self.curChar == '\\' or self.curChar == '\t' or self.curChar == '\r' or self.curChar == '%': | ||
self.abort("Caractere ilegal dentro de uma string") | ||
self.nextChar() | ||
stringText = self.source[startPos : self.curPos] | ||
token = Token(stringText, TokenType.STRING) | ||
elif self.curChar.isdigit(): | ||
startPos = self.curPos | ||
while self.peek().isdigit(): | ||
self.nextChar() | ||
if self.peek() == '.': #decimais | ||
self.nextChar() | ||
if not self.peek().isdigit(): | ||
self.abort("Caractere ilegal dentro de um número: "+ self.peek()) | ||
while self.peek().isdigit(): | ||
self.nextChar() | ||
number = self.source[startPos : self.curPos + 1] | ||
token = Token(number, TokenType.NUMBER) | ||
elif self.curChar.isalpha(): | ||
startPos = self.curPos | ||
while self.peek().isalnum(): | ||
self.nextChar() | ||
word = self.source[startPos : self.curPos + 1] | ||
keyword = Token.checkIfKeyword(word) | ||
if keyword == None: | ||
token = Token(word, TokenType.IDENT) | ||
else: | ||
token = Token(word, keyword) | ||
else: | ||
#Token desconhecido | ||
self.abort("Token desconhecido: "+self.curChar) | ||
|
||
self.nextChar() | ||
return token | ||
|
||
class Token: | ||
def __init__(self, tokenText, tokenKind): | ||
self.text = tokenText #lexema, a instância específica encontrada | ||
self.kind = tokenKind # o tipo de token (TokenType) classificado | ||
|
||
@staticmethod | ||
def checkIfKeyword(word): | ||
for kind in TokenType: | ||
if kind.name == word.upper() and kind.value > 100 and kind.value < 200: | ||
return kind | ||
return None | ||
|
||
class TokenType(enum.Enum): | ||
EOF = -1 | ||
NUMBER = 1 | ||
IDENT = 2 | ||
STRING = 3 | ||
SEMICOLON = 4 | ||
L_PAREN = 5 | ||
R_PAREN = 6 | ||
#PALAVRAS RESERVADAS | ||
PRINT = 101 | ||
TRUE = 102 | ||
FALSE = 103 | ||
#OPERADORES | ||
EQ = 201 | ||
PLUS = 202 | ||
MINUS = 203 | ||
ASTERISK = 204 | ||
SLASH = 205 | ||
EQEQ = 206 | ||
NOTEQ = 207 | ||
LT = 208 | ||
LTEQ = 209 | ||
GT = 210 | ||
GTEQ = 211 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from lexer import * | ||
from parse import * | ||
import sys | ||
|
||
def main(): | ||
if len(sys.argv) != 2: | ||
sys.exit("Erro: Precisamos de um arquivo como argumento.") | ||
with open(sys.argv[1], 'r') as inputFile: | ||
input = inputFile.read() | ||
|
||
lexer = Lexer(input) | ||
parser = Parser(lexer) | ||
exps = parser.parse() | ||
for exp in exps: | ||
print(exp) | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import sys | ||
from lexer import * | ||
from astnodes import * | ||
|
||
class Parser: | ||
def __init__(self, lexer): | ||
self.lexer = lexer | ||
self.curToken = None | ||
self.peekToken = None | ||
self.nextToken() | ||
self.nextToken() | ||
|
||
#Retorna true se o token **atual** casa com kind | ||
def checkToken(self, kind): | ||
return kind == self.curToken.kind | ||
|
||
#Retorna true se o próximo token **(peek)** casa com kind | ||
def checkPeek(self, kind): | ||
return kind == self.peekToken.kind | ||
|
||
def match(self, kind): | ||
if not self.checkToken(kind): | ||
self.abort("Esperava por " + kind.name + ", apareceu " + self.curToken.kind.name) | ||
self.nextToken() | ||
|
||
# Avançando com os ponteiros dos tokens (atual e peek) | ||
def nextToken(self): | ||
self.curToken = self.peekToken | ||
self.peekToken = self.lexer.getToken() | ||
|
||
def abort(self, msg): | ||
sys.exit("Erro sintático: "+msg) | ||
|
||
# S' ::= S$ | ||
# S ::= (E;)* | ||
# E ::= T (("+"|"-") T)* | ||
# T ::= F (("*"|"/") F)* | ||
# F ::= num | id | "(" E ")" | ||
|
||
# S' ::= S$ | ||
def parse(self): | ||
exps = self.S() | ||
self.match(TokenType.EOF) | ||
return exps | ||
|
||
|
||
# S ::= (E;)* | ||
def S(self): | ||
explist = [] | ||
while not(self.checkToken(TokenType.EOF)): | ||
explist.append(self.E()) | ||
self.match(TokenType.SEMICOLON) | ||
return explist | ||
|
||
# E ::= T (("+"|"-") T)* | ||
# E -> T | ||
# E -> T + T | ||
# E -> T - T | ||
def E(self): | ||
e = self.T() | ||
while self.checkToken(TokenType.PLUS) or self.checkToken(TokenType.MINUS): | ||
if self.checkToken(TokenType.PLUS): | ||
self.match(TokenType.PLUS) | ||
t = self.T() | ||
e = SumExpr(e, t) | ||
elif self.checkToken(TokenType.MINUS): | ||
self.match(TokenType.MINUS) | ||
t = self.T() | ||
e = SubExpr(e, t) | ||
return e | ||
# T ::= F (("*"|"/") F)* | ||
def T(self): | ||
e = self.F() | ||
while self.checkToken(TokenType.ASTERISK) or self.checkToken(TokenType.SLASH): | ||
if self.checkToken(TokenType.ASTERISK): | ||
self.match(TokenType.ASTERISK) | ||
f = self.F() | ||
e = MulExpr(e, f) | ||
elif self.checkToken(TokenType.SLASH): | ||
self.match(TokenType.SLASH) | ||
f = self.F() | ||
e = DivExpr(e, f) | ||
return e | ||
# F ::= num | id | "(" E ")" | ||
def F(self): | ||
e = None | ||
if self.checkToken(TokenType.NUMBER): | ||
valorTokenAtual = self.curToken.text | ||
valorNumerico = int(valorTokenAtual) | ||
e = NumExpr(valorNumerico) | ||
self.match(TokenType.NUMBER) | ||
elif self.checkToken(TokenType.IDENT): | ||
e = IdExpr(self.curToken.text) | ||
self.match(TokenType.IDENT) | ||
elif self.checkToken(TokenType.L_PAREN): | ||
self.match(TokenType.L_PAREN) | ||
e = self.E() | ||
self.match(TokenType.R_PAREN) | ||
else: | ||
self.abort("Token inesperado, esperava um número, um identificador, ou um abre parênteses, recebeu: " + self.curToken.text) | ||
return e |
Oops, something went wrong.