forked from bartlomiejlagosz/M-Lang
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scanner.py
86 lines (66 loc) · 1.67 KB
/
scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import ply.lex as lex
literals = "+-*/()=<>:;',{}[]"
reserved = {
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'while': 'WHILE',
'for': 'FOR',
'break': 'BREAK',
'continue': 'CONTINUE',
'return': 'RETURN',
'eye': 'EYE',
'zeros': 'ZEROS',
'ones': 'ONES',
'print': 'PRINT'
}
tokens = ['DIVASSIGN', 'ADDASSIGN', 'MULASSIGN', 'SUBASSIGN', 'STRING', 'FLOAT', 'INTNUM',
'GE', 'LE', 'EQ', 'NEQ', 'DOTADD', 'DOTSUB', 'DOTMUL', 'DOTDIV', 'TRANSPOSE', 'ID', 'AND', 'OR'] \
+ list(reserved.values())
t_TRANSPOSE = r"'"
t_DIVASSIGN = r'/='
t_ADDASSIGN = r'\+='
t_MULASSIGN = r'\*='
t_SUBASSIGN = r'-='
t_GE = r'>='
t_LE = r'<='
t_EQ = r'=='
t_NEQ = r'!='
t_DOTADD = r'\.\+'
t_DOTSUB = r'\.-'
t_DOTMUL = r'\.\*'
t_DOTDIV = r'\./'
t_INTNUM = r'\d+'
t_AND = r'\&\&'
t_OR = r'\|\|'
t_ignore = ' \t'
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value, 'ID') # Check for reserved words
return t
def t_FLOAT(t):
r'[0-9]*(\.[0-9]|[0-9]\.)[0-9]*([eE][-+]?[0-9]+)?'
t.value = float(t.value)
return t
def t_comment(t):
r"\#.*"
# t.lexer.lineno += 1
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(t):
print('Illegal character: ', t.value[0], 'at', t.lexer.lineno, 'line')
t.lexer.skip(1)
def t_STRING(t):
r'"[^"]*"'
return t
lexer = lex.lex()
def find_tok_column(token):
last_cr = lexer.lexdata.rfind('\n', 0, token.lexpos)
if last_cr < 0:
last_cr = 0
return token.lexpos - last_cr
# fh = open('code.txt', "r")
# lexer.input(fh.read())
# for token in lexer:
# print("line %d: %s(%s)" % (token.lineno, token.type, token.value))