-
Notifications
You must be signed in to change notification settings - Fork 1
/
vecparser.py
483 lines (374 loc) · 15.6 KB
/
vecparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
import re
from pymatlabparser.matlab_lexer import MatlabLexer
from sly.lex import Token
class Variable:
##"vasdsd(dwa,sd,aww)"
def __init__(self,token_list:list[Token], index:list[str]=None, is_constant=False ) -> None:
self.token_list=token_list
self.index = index
self.is_constant = is_constant
if index==[]:
self.is_constant = True
def start_parsing(self):
if self.index != None:#if has index, dont parses
return self
elif len(self.token_list)==1:
self.is_constant = True
self.index=[]
return self
elif self.is_atom:
self.index = self.atom_index
self.token_list = [self.token_list[0]]
return self
else :#composite
self= expression_parser(self)
return self
@property
def name(self):
name=''
for token in self.token_list:
name+=token.value
return name
@property
def is_atom(self):
if self.token_list[0].type=='NAME' and self.token_list[1].value == '(' and self.token_list[-1].value == ')':
r_paren = 0
for token in self.token_list:
# right parens can be more than 1
if token.value=='(':
r_paren+=1
if r_paren == 1:
return True
return False
@property
def atom_index(self):
index = []
for token in self.token_list[2:]:
if token.type == 'NAME':
index.append(token.value)
return index
def __str__(self):
return self.name
def expand(x:Variable, target_order:list[str]=None,)->Variable:
if x.is_constant:
return x
if len(x.index) == len(target_order):
return x
len_of_original=len(x.index)
idx_max = []
for target_idx in target_order:
if target_idx not in x.index:
idx_max.append(loop_bounds[target_idx])
x.index.append(target_idx)
fake_x='x'
result = f"repmat({fake_x}"+',1'*len_of_original
for im in idx_max:
result += f",{im}"
result += ")"
tokens = MatlabLexer().tokenize(result)
token_list=[token for token in tokens]
x.token_list=token_list[0:2]+x.token_list+token_list[3:]
return x
def permute(x:Variable, target_order:list[str]=None)->Variable:
if x.is_constant:
return x
idx = x.index ## ['n','m','k'...]
if len(idx) != len(target_order):
raise Exception("permute failed: len(idx) != len(order)")
if idx == target_order:
return x
pos_dict = dict(zip(idx, list(range(1,len(idx)+1))))
pos = []
for target_pos in target_order:
pos.append(pos_dict[target_pos])
# print out
fake_x='x'
result = f"permute({fake_x},["
for p in pos:
result += f"{p},"
result = result[:-1] + "])"
tokens = MatlabLexer().tokenize(result)
token_list=[token for token in tokens]
x.token_list=token_list[0:2]+x.token_list+token_list[3:]
x.index=target_order
return x
MIDDLE_OPERATORS={'+':5,'-':5,'*':10,'/':10,'.*':10,'./':10,'^':11,'>=':3,'=':3,'<=':3,'==':3,'<':3,'>':3,'&':2,'|':1,'&&':0,'||':-1,'~=':3}
LEFT_OPERATORS={'f':13,'-':5,'(':-2}
VECTORIZED_OPERATORS={'*':'.*','/':'./','^':'.^','+':'+','-':'-','>=':'>=','=':'=','<=':'<=','=':'=','==':'==','<':'<','>':'>','&':'&','|':'|','&&':'&','||':'|','~=':'~='}
# RIGHT_OPERATORS=['^']
def apply_operator(operator:Token, *args:Variable)->Variable:
def binary_extend_list(x:Variable,op:Token,y:Variable)->list[Token]:
op.value=VECTORIZED_OPERATORS[op.value]
x.token_list.append(op)
return x.token_list+y.token_list
def unary_extend_list(op:Token,x:Variable)->list[Token]:
if op.value in VECTORIZED_OPERATORS:
op.value=VECTORIZED_OPERATORS[op.value]
x.token_list.insert(0, op)
return x.token_list
if operator.value in MIDDLE_OPERATORS and len(args) == 2:
if operator.value == '=':#differentiate between cvx and matlab
index=args[0].index
else:
index =list(set.union(*[set(x.index) for x in args]))
result_variables=[permute(expand(x, index),index) for x in args]
return Variable(binary_extend_list(result_variables[0], operator, result_variables[1]), index=index)
if operator.value in LEFT_OPERATORS and len(args) == 1:#left op
index=args[0].index
result_variables=[permute(expand(x, index),index) for x in args]
return Variable(unary_extend_list(operator, result_variables[0]), index=index)
if operator.type == 'NAME' and len(args) == 1: #left functional
index=args[0].index
result_variables=[permute(expand(x, index),index) for x in args]
return Variable(unary_extend_list(operator, result_variables[0]), index=index)
return None
## input parser
def extract_loop_bounds(input_string):
matchs = re.findall(r'for (\w+)=(\d+):(\w+)', input_string, re.MULTILINE)
num_dict={}
if matchs:
for n_end in matchs:
num_dict[n_end[0]]=n_end[2]
return num_dict
else:
raise Exception("no loop bound")
def extract_expressions(input_block):
result=[]
for line in input_block:
matchs = re.findall(r'.+=.+;', line.replace(" ",""), re.MULTILINE)
result.append(matchs[0][:-1])
return result
def extract_condition(block):
match = re.search(r'[^\w]*if ',block[0])
if match:
return block[0][match.end():]
return None
def expression_parser(variable)->Variable:
def expression_parens(token_list:list[Token])->dict[int,int]:
lis=[]
end_index={}
for i, tok in enumerate(token_list):
if tok.type=='LPAREN':
lis.append(i)
elif tok.type=='RPAREN':
end_index[lis.pop()] = i
return end_index
def double_lexer(token_list:list[Token]):
#the returned list only saves two types of things: operators , variables
end_index= expression_parens(token_list)
lis=[]
i=0
while(i<len(token_list)):
first_token = token_list[i]
if first_token.type=='NAME' and (i+1)<len(token_list) and token_list[i+1].type == 'LPAREN':
if token_list[i+2].value in loop_bounds: # whether it is a array
v=Variable(token_list[i:end_index[i+1]+1]).start_parsing()
lis.append(v)
i=end_index[i+1]+1
continue
else:
lis.append(first_token)
i=i+1
continue
elif first_token.type=='NAME' and first_token.value in loop_bounds:
temp = first_token.value
first_token.value=f'(1:{loop_bounds[first_token.value]})\''
v=Variable([first_token], index=[temp]).start_parsing()
lis.append(v)
i=i+1
continue
elif first_token.type == 'NAME' and first_token.value == cached_condition_name:
v=Variable([first_token], index = cached_condition_index).start_parsing()
lis.append(v)
i=i+1
continue
elif first_token.type=='NAME' or first_token.type=='NUMBER':
v=Variable([first_token]).start_parsing()
lis.append(v)
i=i+1
continue
elif first_token.type != 'LPAREN' and first_token.type != 'RPAREN': #运算符
lis.append(first_token)
i=i+1
continue
elif first_token.type == 'LPAREN' or first_token.type == 'RPAREN':
lis.append(first_token)
i=i+1
continue
return lis
def parse_symbols(symbols):
def use_one_operator_from_stack(symbols):
right_value=symbols.pop()
op = symbols.pop()#operator
if op.value in LEFT_OPERATORS and op.value not in MIDDLE_OPERATORS:#become unary op
symbols.append(apply_operator(op, right_value))
elif op.value in LEFT_OPERATORS and op.value in MIDDLE_OPERATORS and (symbols == [] or (type(symbols[-1]).__name__ == 'Token' and symbols[-1].value == "(") or (type(symbols[-1]).__name__ == 'Token' and symbols[-1].value in MIDDLE_OPERATORS)):#become unary op
symbols.append(apply_operator(op, right_value))
elif op.type=='NAME':# function/unary op
symbols.append(apply_operator(op, right_value))
else: #become biary op
left_value = symbols.pop()
symbols.append(apply_operator(op, left_value, right_value))
def priority( token:Token ):
if token.value in MIDDLE_OPERATORS:
return MIDDLE_OPERATORS[token.value]
elif token.value in LEFT_OPERATORS:
return LEFT_OPERATORS[token.value]
elif token.type == 'NAME':
return LEFT_OPERATORS['f']
else:
raise Exception(f'no priority for {token.value}')
def is_symbol_operator(symbol, str):
if type(symbol).__name__ == 'Token' and symbol.value == str:
return True
else:
return False
# 当前操作数和操作符栈
result_symbols = []
for symbol in symbols:
if type(symbol).__name__ == 'Token' and symbol.value not in "()": # 如果是操作符
while len(result_symbols)>=2 and not is_symbol_operator(result_symbols[-1], '(') and type(result_symbols[-2]).__name__ == 'Token' and priority(result_symbols[-2]) >= priority(symbol):
use_one_operator_from_stack(result_symbols)
result_symbols.append(symbol)
elif is_symbol_operator(symbol, '('):
result_symbols.append(symbol)
elif is_symbol_operator(symbol, ')'):
while len(result_symbols)>=2:
if is_symbol_operator(result_symbols[-2], '('):
variable = result_symbols.pop()
left_paren=result_symbols.pop()
break
else:
use_one_operator_from_stack(result_symbols)
variable.token_list=[left_paren]+ variable.token_list +[symbol]
result_symbols.append(variable)
else: # 如果是数值
result_symbols.append(symbol)
# 处理剩余的操作符
while len(result_symbols)>=2:
use_one_operator_from_stack(result_symbols)
return result_symbols[0]
symbols = double_lexer(variable.token_list)
result_variable = parse_symbols(symbols)
return result_variable
def lhs_rhs_for_cvx(expression):
lis = []
i=0
start = 0
while i< len(expression):
if expression[i]=='(':
lis.append(expression[i])
elif expression[i:i+2] == '==' or expression[i:i+2] == '>=' or expression[i:i+2] == '<=':
if not lis:
start = i
break
i+=2
continue
elif expression[i]==')':
lis.pop()
i+=1
return expression[start:start+2], expression[:start], expression[start+2:]
class string_cached_stack:
def __init__(self):
self.stack=[]
self.cached_condition = None
def pop(self):
self.stack.pop()
if self.stack:
self.cached_condition=self.cached_condition[:self.stack[-1]]
global cached_condition_name
global cached_condition_index
global new_content
tokens = MatlabLexer().tokenize(self.cached_condition)
condition_v = Variable([to for to in tokens]).start_parsing()
print(f'{cached_condition_name}=({condition_v.name});')
new_content+=f'{cached_condition_name}=({condition_v.name});\n\n'
cached_condition_index = condition_v.index
else:
self.cached_condition = None
def append(self, c):
if self.cached_condition:
self.cached_condition=f'{self.cached_condition}&&({c})'
else:
self.cached_condition=f'({c})'
self.stack.append(len(self.cached_condition))
global cached_condition_name
global cached_condition_index
global new_content
tokens = MatlabLexer().tokenize(self.cached_condition)
condition_v = Variable([to for to in tokens]).start_parsing()
print(f'{cached_condition_name}=({condition_v.name});')
new_content+=f'{cached_condition_name}=({condition_v.name});\n\n'
cached_condition_index = condition_v.index
def iscondition(exp):
if re.search(r'[^\w]*if ', exp)!=None:
return True
return False
def extract_condition(line):
match = re.search(r'[^\w]*if ',line)
return line[match.end():]
def isfor(exp):
if re.search(r'[^\w]*for ', exp)!=None:
return True
return False
def isend(exp):
if re.search(r'[^\w]*end[^\w]*', exp)!=None:
return True
return False
def extract_expression(exp):
matchs = re.findall(r'.+=.+;', exp.replace(" ",""), re.MULTILINE)
return matchs[0][:-1]
def add_condition(expression):
global cached_condition_name
if condition_stack.stack:
a = re.search(r'[^=<>]=[^=<>]',expression)
if a : #matlab
lhs,rhs = expression[:a.start()+1], expression[a.end()-1:]
conditinal_expression =f'{lhs}=({cached_condition_name})*({rhs}) + (1-({cached_condition_name}))*{lhs}'
else: #cvx
op, lhs, rhs = lhs_rhs_for_cvx(expression)
conditinal_expression =f'0{op}(-({lhs})+({rhs}))*({cached_condition_name})'
return conditinal_expression
else:
return expression
def vectorize_expression(exp):
tokens = MatlabLexer().tokenize(add_condition(exp))
parser_result= Variable([to for to in tokens]).start_parsing()
print(parser_result, end="")
print(';')
global new_content
new_content+=f'{parser_result.name};\n\n'
input_string=""
with open('loop_editor.m', 'r') as file:
content = file.read()
pattern = r'for .*\nend'
match = re.search(pattern, content, re.DOTALL)
if match:
input_string = match.group(0)
print('\n---------------------------The extracted original for-loop is:----------------------------------')
print(input_string)
else:
raise Exception("No for-loop found.")
print("-----------------------------vectorized by Vecparser as----------------------------------------\n")
new_content="\n\n%-------------------------vectorized by Vecparser as-----------------------\n\n"
loop_bounds = extract_loop_bounds(input_string)
line_list=input_string.splitlines()
cached_condition_name ='cached_condition_for_this'
cached_condition_index=None
keyword_stack=[]
condition_stack =string_cached_stack()
for exp in line_list:
if iscondition(exp):
condition_stack.append(extract_condition(exp))
keyword_stack.append('if')
elif isfor(exp):
keyword_stack.append('for')
elif isend(exp):
if keyword_stack.pop()=='if':
condition_stack.pop()
else:
vectorize_expression(extract_expression(exp))
print("\n---------Those results have been writen to the file \"loop_editor.m\", please refresh it.---------")
new_content+="%-----Please clear this file each time before you write a new loop on------"
with open('loop_editor.m', "a") as target_file:
target_file.write(new_content)