forked from he-actlab/codelets.src
-
Notifications
You must be signed in to change notification settings - Fork 1
/
compile.py
153 lines (140 loc) · 8.35 KB
/
compile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import shutil
from benchmarks.model_generator import model_generator
from tools.benchmark_compilation import model_compile
def compile_model(model_name, batch, seq, init_or_gen, arch_config, half=False, gen_ORCA=None, init_ORCA=None):
cwd = os.getcwd()
parent = None
# move current working directory
codelet = os.path.join(cwd, 'codelets_src')
if not 'codelets_src' in cwd:
parent = cwd
cwd = codelet
os.chdir(codelet)
benchmarks = os.path.join(cwd, 'benchmarks')
tools = os.path.join(cwd, 'tools')
arch = arch_config.replace(".json", "").replace('_','')
# initiation phase
if init_or_gen == 'init':
os.chdir(benchmarks)
model_generator(model_name, batch, seq, init_or_gen, half, gen_ORCA, init_ORCA)
os.chdir(tools)
model_compile(model_name, batch, seq, init_or_gen, arch_config, gen_ORCA, init_ORCA)
os.chdir(cwd)
if gen_ORCA == None and init_ORCA == None:
if 'llama' in model_name:
models = ['-embd-opt', '-rms-opt', '-rattn-opt', '-proj-opt', '-linswi-opt']
else:
models = ['-embd-opt', '-ln-opt', '-attn-opt', '-proj-opt', '-linear1-opt', '-linear2-opt']
else:
if 'llama' in model_name:
models = ['-embd-opt', '-rms-opt', '-qkv-opt', '-proj-opt', '-linswi-opt']
else:
models = ['-embd-opt', '-ln-opt', '-qkv-opt', '-proj-opt', '-linear1-opt', '-linear2-opt']
for model in models:
model = model_name + model
model_path = os.path.join(cwd, f"benchmarks/models/{model}.onnx")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/{model}.onnx")
shutil.move(model_path, store_path)
# move external model files for extreme cases such as 175b or 30b with large seq length
if '175b' in model_name:
if 'embd-opt' in model:
model_path = os.path.join(cwd, f"benchmarks/models/wte.weight")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/wte.weight")
shutil.move(model_path, store_path)
model_path = os.path.join(cwd, f"benchmarks/models/_Constant_attr__value")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/_Constant_attr__value")
shutil.move(model_path, store_path)
elif 'linear1-opt' in model:
model_path = os.path.join(cwd, f"benchmarks/models/c_fc.bias")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/c_fc.bias")
shutil.move(model_path, store_path)
model_path = os.path.join(cwd, f"benchmarks/models/c_fc.weight")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/c_fc.weight")
shutil.move(model_path, store_path)
elif 'linear2-opt' in model:
model_path = os.path.join(cwd, f"benchmarks/models/c_proj.bias")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/c_proj.bias")
shutil.move(model_path, store_path)
model_path = os.path.join(cwd, f"benchmarks/models/c_proj.weight")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/c_proj.weight")
shutil.move(model_path, store_path)
elif '30b' in model_name and int(seq) >= 16384: # external embd layer happens when seq length is large
if 'embd-opt' in model:
model_path = os.path.join(cwd, f"benchmarks/models/wte.weight")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/wte.weight")
shutil.move(model_path, store_path)
model_path = os.path.join(cwd, f"benchmarks/models/_Constant_attr__value")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/_Constant_attr__value")
shutil.move(model_path, store_path)
# move to copiled_result folder
if parent != None:
result_folder= os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
if gen_ORCA == None and init_ORCA == None:
compiled_result = os.path.join(parent, f"compiled_result/{model_name}/b{batch}_s{seq}_{init_or_gen}/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
else:
compiled_result = os.path.join(parent, f"compiled_result/{model_name}-orca/b{batch}_s{seq}_{init_or_gen}/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
if os.path.exists(compiled_result):
shutil.rmtree(compiled_result)
shutil.move(result_folder, compiled_result)
# generation phase
else:
os.chdir(benchmarks)
model_generator(model_name, batch, seq, init_or_gen, half, gen_ORCA, init_ORCA)
os.chdir(tools)
model_compile(model_name, batch, seq, init_or_gen, arch_config, gen_ORCA, init_ORCA)
os.chdir(cwd)
if gen_ORCA == None and init_ORCA == None:
if 'llama' in model_name:
models = ['-rgen-opt']
else:
models = ['-gen-opt']
model_type = model_name
for model in models:
model = model_name + model
model_path = os.path.join(cwd, f"benchmarks/models/{model}.onnx")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/{model}.onnx")
shutil.move(model_path, store_path)
if parent != None:
result_folder= os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
compiled_result = os.path.join(parent, f"compiled_result/{model_type}/b{batch}_s{seq}_{init_or_gen}/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
if os.path.exists(compiled_result):
shutil.rmtree(compiled_result)
shutil.move(result_folder, compiled_result)
else:
models = []
gen_len = 0
if gen_ORCA != None:
for i in gen_ORCA:
if 'llama' in model_name:
models.append(f'-rgen-{i}-opt')
else:
models.append(f'-gen-{i}-opt')
gen_len += 1
if init_ORCA != None:
for i in init_ORCA:
if 'llama' in model_name:
models.append(f'-rinit-{i}-opt')
else:
models.append(f'-init-{i}-opt')
model_type = f'{model_name}-orca'
for i, model in enumerate(models):
model = model_name + model
if 'gen' in model:
seq = gen_ORCA[i]
init_or_gen = 'gen'
else:
seq = init_ORCA[i-gen_len]
init_or_gen = 'init'
model_path = os.path.join(cwd, f"benchmarks/models/{model}.onnx")
store_path = os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}/{model}.onnx")
shutil.move(model_path, store_path)
if parent != None:
result_folder= os.path.join(cwd, f"tools/compilation_output/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
compiled_result = os.path.join(parent, f"compiled_result/{model_type}/attn-{init_or_gen}/{model}_{arch}_b{batch}_s{seq}_{init_or_gen}")
if os.path.exists(compiled_result):
shutil.rmtree(compiled_result)
shutil.move(result_folder, compiled_result)
if __name__ == "__main__":
compile_gpt('gpt2', 16, 256, 'init', 'benchmark_128x128.json')
# compile_gpt('gpt2', 16, 256, 'gen', 'benchmark_128x128.json', 512)