-
Notifications
You must be signed in to change notification settings - Fork 0
/
rule_parser.py
130 lines (115 loc) · 4.84 KB
/
rule_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import re
class Lexer:
def __init__(self, rule_dsl):
self.rule_dsl = rule_dsl
self.tokens = []
def tokenize(self):
# Define regular expressions for different symbols and keywords
regex = r'(?P<IF>IF)|(?P<ELSE>ELSE)|(?P<THEN>THEN)|(?P<AND>AND)|(?P<OR>OR)|(?P<NOT>NOT)|(?P<LPAREN>\()|(?P<RPAREN>\))|(?P<IDENTIFIER>[a-zA-Z_]\w*)|(?P<NUMBER>\d+)|(?P<STRING>"[^"]*")|(?P<WHITESPACE>\s+)|(?P<COMPARISON>[<>!=]=?)'
pattern = re.compile(regex)
for match in pattern.finditer(self.rule_dsl):
token_type = match.lastgroup
token_value = match.group(token_type)
if token_type == 'WHITESPACE':
continue
if token_type == 'COMPARISON':
token_type = token_value
self.tokens.append((token_type, token_value))
class Parser:
def __init__(self, tokens):
self.tokens = tokens
self.current_token_index = 0
def parse(self):
ast = self.parse_if_statement()
if self.current_token_index < len(self.tokens):
raise Exception('Unexpected token')
return ast
def parse_if_statement(self):
if_token = self.get_next_token()
if if_token[0] != 'IF':
return self.parse_expression()
condition = self.parse_expression()
then_token = self.get_next_token()
if then_token[0] != 'THEN':
raise Exception('Expected THEN')
then_expression = self.parse_expression()
return ('IF', condition, then_expression)
def parse_expression(self):
left = self.parse_term()
while self.current_token_index < len(self.tokens):
token = self.tokens[self.current_token_index]
if token[0] in ('AND', 'OR'):
self.current_token_index += 1
right = self.parse_term()
left = (token[0], left, right)
else:
break
return left
def parse_term(self):
left = self.parse_factor()
while self.current_token_index < len(self.tokens):
token = self.tokens[self.current_token_index]
if token[0] in ('>', '<', '==', '!=', '>=', '<=', '='):
self.current_token_index += 1
right = self.parse_factor()
left = (token[0], left, right)
else:
break
return left
def parse_factor(self):
token = self.tokens[self.current_token_index]
if token[0] == 'IDENTIFIER':
self.current_token_index += 1
if self.current_token_index < len(self.tokens) and self.tokens[self.current_token_index][0] == 'LPAREN':
self.current_token_index += 1
args = self.parse_argument_list()
if self.current_token_index >= len(self.tokens) or self.tokens[self.current_token_index][0] != 'RPAREN':
raise Exception('Expected )')
self.current_token_index += 1
return ('FUNCTION', token[1], args)
else:
return ('IDENTIFIER', token[1])
elif token[0] == 'NOT':
self.current_token_index += 1
factor = self.parse_factor()
return ('NOT', factor)
elif token[0] == 'LPAREN':
self.current_token_index += 1
expression = self.parse_expression()
if self.current_token_index >= len(self.tokens) or self.tokens[self.current_token_index][0] != 'RPAREN':
raise Exception('Expected )')
self.current_token_index += 1
return expression
elif token[0] == 'NUMBER':
self.current_token_index += 1
return ('NUMBER', int(token[1]))
elif token[0] == 'STRING':
self.current_token_index += 1
return ('STRING', token[1][1:-1])
else:
raise Exception('Unexpected token')
def parse_argument_list(self):
args = []
if self.current_token_index < len(self.tokens) and self.tokens[self.current_token_index][0] != 'RPAREN':
args.append(self.parse_expression())
while self.current_token_index < len(self.tokens) and self.tokens[self.current_token_index][0] == 'COMMA':
self.current_token_index += 1
args.append(self.parse_expression())
return args
def get_next_token(self):
if self.current_token_index >= len(self.tokens):
return None
token = self.tokens[self.current_token_index]
self.current_token_index += 1
return token
class RuleParser:
def __init__(self, rule_dsl):
self.rule_dsl = rule_dsl
self.tokens = []
self.ast = None
def parse(self):
lexer = Lexer(self.rule_dsl)
lexer.tokenize()
self.tokens = lexer.tokens
parser = Parser(self.tokens)
self.ast = parser.parse()