-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_model.py
167 lines (136 loc) · 5.8 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import json
from typing import List
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import warnings
def load_few_shot_prompt(prompt_path: str) -> str:
"""
Load the few-shot prompt from a text file.
Args:
prompt_path (str): Path to the few-shot prompt file.
Returns:
str: The few-shot prompt as a single string.
"""
with open(prompt_path, 'r', encoding='utf-8') as f:
few_shot = f.read()
# Ensure proper formatting: add two newlines at the end
if not few_shot.endswith('\n\n'):
few_shot += '\n\n'
return few_shot
def load_test_questions(test_path: str, num_samples: int = 4) -> List[str]:
"""
Load a specified number of questions from the test.jsonl file.
Args:
test_path (str): Path to the test.jsonl file.
num_samples (int, optional): Number of questions to load. Defaults to 4.
Returns:
List[str]: A list of question strings.
"""
questions = []
with open(test_path, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
questions.append(data['question'])
if len(questions) >= num_samples:
break
return questions
def construct_prompt(few_shot: str, question: str, prompt_type: str = "equation_only") -> str:
"""
Construct the prompt by combining few-shot examples with the new question.
Args:
few_shot (str): The few-shot prompt.
question (str): The new question to be answered.
prompt_type (str, optional): Type of prompt. Defaults to "equation_only".
Returns:
str: The complete prompt to be sent to the model.
"""
if prompt_type == "equation_only":
prompt = f"{few_shot}Question: {question}\nAnswer:"
else:
prompt = f"{few_shot}Question: {question}\nLet's think step by step\nAnswer:"
return prompt
def perform_inference(model, tokenizer, prompts: List[str], max_gen_len: int = 100) -> List[str]:
"""
Generate predictions for a list of prompts using the model.
Args:
model: The language model.
tokenizer: The tokenizer corresponding to the model.
prompts (List[str]): List of prompt strings.
max_gen_len (int, optional): Maximum number of tokens to generate. Defaults to 100.
Returns:
List[str]: List of generated prediction strings.
"""
# Tokenize the prompts with left padding
inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True)
# Assign pad_token to eos_token if not already set
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("Pad token not found. Setting pad_token to eos_token.")
# Move inputs to the same device as the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = {key: value.to(device) for key, value in inputs.items()}
model.to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_gen_len,
do_sample=True, # Enable sampling
temperature=0.7, # Sampling temperature
top_p=0.9, # Top-p (nucleus) sampling
no_repeat_ngram_size=3, # Prevent repeating n-grams
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
predictions = [tokenizer.decode(output, skip_special_tokens=True).strip() for output in outputs]
return predictions
def main():
# Hardcoded paths
few_shot_path = "data/prompts/equation_only.txt" # Path to few-shot prompt
test_path = "data/test.jsonl" # Path to test data
# Model configuration
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct" # Hugging Face model name
tokenizer_name = "Qwen/Qwen2.5-Math-1.5B-Instruct" # Corresponding tokenizer
# Number of samples to test
num_samples = 4
# Prompt type
prompt_type = "equation_only" # Can be "equation_only" or other types as defined
# Maximum tokens to generate
max_gen_len = 700
# Load the few-shot prompt
few_shot = load_few_shot_prompt(few_shot_path)
print("Few-shot prompt loaded.")
# Load a small set of test questions
test_questions = load_test_questions(test_path, num_samples=num_samples)
print(f"Loaded {len(test_questions)} test questions.")
# Initialize the tokenizer and model
print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
# Assign pad_token to eos_token if not set
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("Pad token not found. Setting pad_token to eos_token.")
# Set padding side to 'left' for decoder-only models
tokenizer.padding_side = 'left'
print(f"Tokenizer padding side set to: {tokenizer.padding_side}")
model = AutoModelForCausalLM.from_pretrained(
model_name
)
model.eval()
print("Model and tokenizer loaded.")
# Construct prompts for each question
prompts = [construct_prompt(few_shot, q, prompt_type=prompt_type) for q in test_questions]
for idx, prompt in enumerate(prompts):
print(f"\n--- Prompt {idx+1} ---\n{prompt}\n")
# Perform inference
print("Starting inference...")
predictions = perform_inference(model, tokenizer, prompts, max_gen_len=max_gen_len)
print("Inference completed.\n")
# Display and save predictions
print("--- Predictions ---")
for idx, pred in enumerate(predictions):
print(f"\nPrediction {idx+1}:\n{pred}\n")
# Optionally, save each prediction to a separate file for detailed analysis
with open(f"prediction_{idx+1}.txt", "w", encoding="utf-8") as f:
f.write(pred)
if __name__ == "__main__":
main()