-
Notifications
You must be signed in to change notification settings - Fork 0
/
prompt_compose_helpers.py
348 lines (297 loc) · 15.8 KB
/
prompt_compose_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
"""
Author: Qianxi Li
Date: June 2, 2024
Description:
This module provides two main functions for constructing prompts:
1. construct_answer_prompts: Composes prompts for answer generation
based on the given dataset, examples, and iteration parameters.
2. construct_feedback_prompts: Composes prompts for feedback generation
using model predictions and existing examples.
Both functions return datasets in dictionary form that can be serialized
to JSON for further processing.
"""
import json
import os
import copy
import logging
# Configure logging to show INFO level messages and above with a standard format.
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def construct_answer_prompts(base_dataset_path,
per_task_data_row_amount,
example_source,
prompt_example_dict,
pos_example_amount=None,
neg_example_amount=None):
"""
construct_answer_prompts():
Composes and returns a dictionary containing answer-generation prompts
for each JSON file in the specified directory and a dictionary of
current examples used during prompt composition.
Args:
base_dataset_path (str): Path to the directory containing the dataset JSON files.
per_task_data_row_amount (int): Maximum number of data rows to sample from each task.
example_source (str): Source of examples, either 'human' or 'llm'.
prompt_example_dict (dict): Dictionary of previously used examples per task.
pos_example_amount (int, optional): Number of positive examples to include. Defaults to None.
neg_example_amount (int, optional): Number of negative examples to include. Defaults to None.
Returns:
tuple: A tuple containing:
- dataset_dict (dict): A dictionary of prompt datasets for each task.
- current_examples_dict (dict): A dictionary of the current examples used per task.
"""
# Define an internal helper function to compose example prompts.
def compose_examples(example_source,
task_examples,
pos_example_amount,
negative_example_amount):
"""
compose_examples():
Helper function that concatenates prompts from
positive/negative or general examples for a specific task.
Args:
example_source (str): Source of examples, 'human' or 'llm'.
task_examples (dict or list): Examples from either the dataset or previously loaded dictionary.
pos_example_amount (int): Number of positive examples to include if present.
negative_example_amount (int): Number of negative examples to include if present.
Returns:
tuple: A string containing the concatenated prompt and
a list of example entries used in the prompt.
"""
# Initialize the prompt as an empty string.
prompt = ""
# Initialize a list to hold the chosen examples.
examples = []
# Check if the example source is 'human'.
if example_source == "human":
# Prepare a local list of examples.
if pos_example_amount:
# Slice the positive examples based on the desired amount.
pos_example = task_examples["Positive Examples"][:pos_example_amount]
# Add these positive examples to the master list.
examples += pos_example
# Construct the prompt snippet for each positive example.
for each_example in pos_example:
prompt += (
f"""### Task:\n{each_example["input"]}\n\n### Answer:\n{each_example["output"]}\n\n"""
)
# If negative examples are specified, handle them similarly.
if negative_example_amount:
neg_example = task_examples["Negative Examples"][:negative_example_amount]
examples += neg_example
# Check if the negative examples are valid before constructing the prompt.
if '-' not in neg_example:
for each_example in neg_example:
prompt += (
f"""### Task:\n{each_example["input"]}\n\n### Answer:\n{each_example["output"]}\n\n"""
)
else:
# If the example source is 'llm', we assume 'task_examples' is already a list.
examples = task_examples
# Construct the prompt snippet for each existing example.
for each_example in task_examples:
prompt += (
f"""### Task:\n{each_example["input"]}\n\n### Answer:\n{each_example["output"]}\n\n"""
)
# Return the combined prompt and the list of examples used.
return prompt, examples
# Initialize the final dataset dictionary.
dataset_dict = {}
# Initialize the dictionary to store the current iteration examples.
current_examples_dict = {}
# Iterate over each JSON file in the dataset directory.
for idx, each_json in enumerate(os.listdir(base_dataset_path)):
# Process only files with a .json extension.
if ".json" in each_json:
# Initialize a list to hold prompts for a single task.
per_task_prompt_list = []
# Build the full path to the current JSON file.
full_path = os.path.join(base_dataset_path, each_json)
# Open and load the JSON file.
with open(full_path) as obj:
file = json.loads(obj.read())
# Create a deep copy to avoid modifying the original file in memory.
per_task_dict = copy.deepcopy(file)
# Extract various parts of the dataset: definition, caution, instances, etc.
task_definition = file["Definition"]
caution = file["Emphasis & Caution"]
# Determine how many instances to sample up to 'per_task_data_row_amount'.
full_length = per_task_data_row_amount
if full_length > len(file["Instances"]):
full_length = len(file["Instances"])
# Slice the instances to the determined length.
instances = file["Instances"][:full_length]
# Replace the original instances with the sliced subset.
per_task_dict["Instances"] = instances
# If we have any positive or negative examples to include, compose them.
if pos_example_amount or neg_example_amount:
if example_source == "human":
example_prompt, per_task_examples = compose_examples(
example_source,
file["Examples"],
pos_example_amount,
neg_example_amount
)
else:
example_prompt, per_task_examples = compose_examples(
example_source,
prompt_example_dict[each_json],
pos_example_amount,
neg_example_amount
)
# Construct the instruction portion by combining definition and caution.
instruction = f"""{task_definition} {caution}\n\n"""
# Provide a context string for the model.
context = (
f"""Please refer to the instruction and task information and """
f"""give your answers."""
)
# If using examples, inform the model that these examples exist.
if pos_example_amount or neg_example_amount:
context += "You need to follow the examples we provided."
# Initialize a placeholder for the examples section.
example_str = ""
# If examples are present, incorporate them in a separate section.
if pos_example_amount or neg_example_amount:
example_str = f"### Examples:\n{example_prompt}"
# Construct final prompts for each instance in the dataset.
for idx, instance in enumerate(instances):
full_prompt = (
f"""### Context:\n{context}\n\n### Instruction:\n{instruction}"""
f"""{example_str}### Task:\n{instance['input']}\n\n### Answer:\n"""
)
per_task_prompt_list.append(full_prompt)
# Attach the newly composed prompts to the task dictionary.
per_task_dict["Answer Prediction Prompt Dataset"] = per_task_prompt_list
# If we used some examples, store them in both the per-task dict and current_examples_dict.
if pos_example_amount or neg_example_amount:
per_task_dict["Current Examples"] = per_task_examples
current_examples_dict[each_json] = per_task_examples
# Assign the per-task dictionary to the main dataset.
dataset_dict[each_json] = per_task_dict
# Remove the reference to free memory (optional, but sometimes useful).
del per_task_dict
# Return both the final dataset dictionary and the current examples used.
return dataset_dict, current_examples_dict
def construct_feedback_prompts(loaded_examples,
answer_pred_dataset):
"""
construct_feedback_prompts():
Composes and returns a dictionary of feedback-generation prompts
that incorporate model-generated answers (from 'answer_pred_dataset')
and optionally some existing examples ('loaded_examples').
Args:
loaded_examples (dict): Dictionary of examples that can be used to guide feedback.
answer_pred_dataset (dict): Dataset of tasks and model predictions.
Returns:
dict: A dictionary containing feedback prompt datasets for each task, plus
aggregated feedback inputs if needed.
"""
# Define an internal helper function to build feedback examples in a prompt format.
def compose_feedback_examples(examples):
"""
compose_feedback_examples():
Builds a single concatenated string of feedback examples.
Args:
examples (list): List of example entries that contain
input, output, and a reason for correction.
Returns:
str: A concatenated string of feedback examples.
"""
# Start with an empty string for the prompt.
prompt = ""
# Loop through each example and construct the snippet.
for each_example in examples:
single_prompt = (
f"""### Task:\n{each_example["input"]}\n\n"""
f"""### Predicted Answer:\n{each_example["output"]}\n\n"""
f"""###Feedback:\n{each_example["reason"]}. """
f"""So the answer should be {each_example["output"]}\n\n"""
)
# Concatenate to the overall prompt string.
prompt += single_prompt
# Return the fully composed string of feedback examples.
return prompt
# Initialize the dictionary that will hold feedback data per task.
dataset_dict = {}
# Initialize lists for storing aggregated prompts if needed.
all_task_feedback_input_data = []
all_task_feedback_gen_prompt_data = [] # We'll remove the usage if it's not stored.
# Iterate through each task and its data in the answer prediction dataset.
for task_name, task_dict in answer_pred_dataset.items():
# Prepare a list to hold new feedback prompts for this task.
per_task_prompt_list = []
# Prepare a list to hold feedback prompts without examples.
per_task_no_example_input_list = []
# Deep copy the task dictionary to avoid mutating the original.
per_task_dict = copy.deepcopy(task_dict)
# Extract task definition, caution, and instances.
task_definition = per_task_dict["Definition"]
caution = per_task_dict["Emphasis & Caution"]
instances = per_task_dict["Instances"]
# If we have loaded examples, compose the feedback examples prompt.
if len(list(loaded_examples.keys())) != 0:
example_prompt = compose_feedback_examples(loaded_examples[task_name])
# Provide a feedback prompt guiding the model to analyze predicted answers.
feedback_prompt = (
"Please refer to the instruction and task information, "
"provide your feedback for whether the predict answer is proper, "
"the reasons and what the correct answer is. "
"You need to follow the examples we provided."
)
# Combine feedback prompt with the actual instruction text for a full context.
context = f"""{feedback_prompt}\n\n### Instruction:\n{task_definition} {caution} \n\n"""
# Create a shorter prompt when no examples are available.
feedback_input_no_examples = (
"Please refer to the instruction and task information, "
"provide your feedback for whether the predict answer is proper, "
"the reasons and what the correct answer is."
)
no_example_context = f"""{feedback_input_no_examples}\n\n### Instruction:\n{task_definition} {caution} \n\n"""
# Build feedback prompts for each instance in the dataset.
for instance in instances:
# Extract the instance output, which could be a list in some datasets.
standard_answer = instance['output']
if isinstance(standard_answer, list):
standard_answer = instance['output'][0]
# Initialize an empty example_str that might hold example prompts.
example_str = ""
# If examples exist, add them into the prompt.
if len(list(loaded_examples.keys())) != 0:
example_str = f"### Examples:\n{example_prompt}"
# Construct the full feedback prompt with examples included.
full_prompt = (
f"""### Context:\n{context}{example_str}"""
f"""### Task:\n{instance['input']}\n\n"""
f"""### Predicted Answer:\n{instance['answer_prediction']}\n\n"""
f"""### Feedback:\n"""
)
# Construct a fallback prompt without examples.
no_example_full_prompt = (
f"""### Context:\n{no_example_context}"""
f"""### Task:\n{instance['input']}\n\n"""
f"""### Predicted Answer:\n{instance['answer_prediction']}\n\n"""
f"""### Feedback:\n"""
)
# If no examples are loaded, use the no_example prompt list.
if len(list(loaded_examples.keys())) == 0:
per_task_prompt_list.append(no_example_full_prompt)
else:
# Otherwise, use the full prompt containing examples.
per_task_prompt_list.append(full_prompt)
# Keep a copy of the no-example prompts if needed later.
per_task_no_example_input_list.append(no_example_full_prompt)
# Store the newly built feedback prompts in the task dictionary.
per_task_dict["Feedback Prediction Prompt Dataset"] = per_task_prompt_list
# Assign the per-task dictionary back into the main dataset.
dataset_dict[task_name] = per_task_dict
# Add the no-example prompts to the aggregated list.
all_task_feedback_input_data += per_task_no_example_input_list
# Add the full prompts to the aggregated list.
all_task_feedback_gen_prompt_data += per_task_prompt_list
# Store all feedback input prompts in the dataset dictionary.
dataset_dict["all_feedback_input_list"] = all_task_feedback_input_data
# Return the final dictionary of feedback prompts.
return dataset_dict