-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathrun_language_model.py
170 lines (161 loc) · 7.25 KB
/
run_language_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import fire
from llama import Llama
import warnings
import json
import os
from langchain_openai import OpenAI
"""
This file is to run large language model.
The running instructions have been generated in file f'{experiment name}.sh'
Please run the following command:
nohup bash {experiment name}.sh > output_name.out
or: bash {experiment name}.sh
"""
# If you want to use OpenAI's model, please set API here
os.environ['OPENAI_API_KEY'] = 'YOUR API KEY'
def main(
ckpt_dir: str, # LLM model name
path: str, # input and output place
tokenizer_path: str = 'tokenizer.model',
temperature: float = 0.6,
top_p: float = 0.9,
max_seq_len: int = 4096,
max_gen_len: int = 256,
max_batch_size: int = 1,
):
"""
Entry point of the program for generating text using a pretrained model
:param
ckpt_dir: The directory containing checkpoint files for the pretrained model.
If you want to use gpt, please trans the true name of a gpt model.
If you just input 'gpt', we provide default model: 'gpt-3.5-turbo-instruct'.
For security reasons, please type your openai API inside this file
path: path to the experiment
tokenizer_path: The path to the tokenizer model used for text encoding/decoding.
temperature: The temperature value for controlling randomness in generation.
Defaults to 0.6.
top_p: The top-p sampling parameter for controlling diversity in generation.
Defaults to 0.9.
max_seq_len: The maximum sequence length for input prompts. Defaults to 4096.
max_gen_len: The maximum length of generated sequences. Defaults to 256.
max_batch_size: The maximum batch size for generating sequences. Defaults to 1.
:return:
the output will be saved at
f"./Inputs&Outputs/{path}/outputs-{ckpt_dir}-{temperature}-{top_p}-{max_seq_len}-{max_gen_len}.json"
"""
print(path)
generator = None
llm = None
# summary stage
if os.path.exists(f'./Inputs&Outputs/{path}/set.json'):
flag_llm = 'llama'
print('summarizing now')
# need to summarize
with open(f'./Inputs&Outputs/{path}/set.json', "r") as file:
settings = json.load(file)
summary_model = settings['infor']
para_flag = False
if summary_model.find('-para'):
para_flag = True
summary_model = summary_model.strip('-para')
if summary_model.find('gpt') != -1:
flag_llm = 'gpt'
if summary_model == 'gpt':
summary_model = 'gpt-3.5-turbo-instruct'
llm = OpenAI(model=summary_model, temperature=temperature, top_p=top_p, max_tokens=max_gen_len)
else:
generator = Llama.build(
ckpt_dir='Model/' + summary_model,
tokenizer_path='Model/' + tokenizer_path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
)
suf = settings['suffix']
adh_1 = settings['adhesive_con']
adh_2 = settings['adhesive_prompt']
with open(f"./Inputs&Outputs/{path}/question.json", 'r', encoding='utf-8') as f_que:
questions = json.loads(f_que.read())
with open(f"./Inputs&Outputs/{path}/context.txt", 'r', encoding='utf-8') as f_con:
contexts = json.loads(f_con.read())
su_1 = "Given the following question and context, extract any part of the context" \
+ " *AS IS* that is relevant to answer the question. If none of the context is relevant" \
+ " return NO_OUTPUT.\n\nRemember, *DO NOT* edit the extracted parts of the context.\n\n> Question: "
if para_flag:
su_1 = "Given the following question and context, extract any part of the context" \
+ " *AS IS* that is relevant to answer the question. If none of the context is relevant" \
+ " return NO_OUTPUT.\n\n> Question: "
su_2 = "\n> Context:\n>>>\n"
su_3 = "\n>>>\nExtracted relevant parts:"
prompt_ge_contexts = []
summarize_contexts = []
for i in range(len(questions)):
ques = questions[i]
k_contexts = contexts[i]
ge_contexts = []
sum_contexts = []
for j in range(len(k_contexts)):
context = k_contexts[j]
prompt_ge_context = su_1 + ques + su_2 + context + su_3
ge_contexts.append(prompt_ge_context)
if flag_llm == 'gpt':
ans = llm.invoke(prompt_ge_context)
else:
results = generator.text_completion(
[prompt_ge_context],
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)
ans = results[0]['generation']
sum_contexts.append(ans)
summarize_contexts.append(sum_contexts)
prompt_ge_contexts.append(ge_contexts)
with open(f"./Inputs&Outputs/{path}/summarize_contexts.json", 'w', encoding='utf-8') as f_c:
f_c.write(json.dumps(summarize_contexts))
with open(f"./Inputs&Outputs/{path}/generate_summarize_prompt.json", 'w', encoding='utf-8') as f_g:
f_g.write(json.dumps(prompt_ge_contexts))
prompts = []
for i in range(len(questions)):
con_u = adh_1.join(summarize_contexts[i])
prompt = suf[0] + con_u + adh_2 + suf[1] + questions[i] + adh_2 + suf[2]
prompts.append(prompt)
with open(f"./Inputs&Outputs/{path}/prompts.txt", 'w', encoding='utf-8') as f_p:
f_p.write(json.dumps(prompts))
flag_llm = 'llama'
if ckpt_dir.find('gpt') != -1:
# Type your API here
if ckpt_dir == 'gpt':
ckpt_dir = 'gpt-3.5-turbo-instruct'
llm = OpenAI(model=ckpt_dir, temperature=temperature, top_p=top_p, max_tokens=max_gen_len)
flag_llm = 'gpt'
else:
generator = Llama.build(
ckpt_dir='Model/' + ckpt_dir,
tokenizer_path='Model/' + tokenizer_path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
)
# generate output
print('generating output')
with open(f"./Inputs&Outputs/{path}/prompts.json", 'r', encoding='utf-8') as f:
all_prompts = json.loads(f.read())
answer = []
for i in range(len(all_prompts)):
if flag_llm == 'gpt':
ans = llm.invoke(all_prompts[i])
else:
results = generator.text_completion(
[all_prompts[i]],
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)
ans = results[0]['generation']
answer.append(ans)
with open(
f"./Inputs&Outputs/{path}/outputs-{ckpt_dir}-{temperature}-{top_p}-{max_seq_len}-{max_gen_len}.json",
'w', encoding='utf-8') as file:
file.write(json.dumps(answer))
if __name__ == "__main__":
warnings.filterwarnings("ignore")
fire.Fire(main)