-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
116 lines (93 loc) · 7.07 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import random
class NeQA:
def __init__(self, max_instances: int): # max_instances: -1 for all
self.path = "neqa_100.jsonl"
# self.path = "neqa_100_nonnegated.jsonl"
self.data = [json.loads(line) for line in open(self.path)]
if max_instances != -1:
random.seed(42)
random.shuffle(self.data)
self.data = self.data[:max_instances]
def apply(self, prompt_fn: callable):
for item in self.data:
item["prompt"] = prompt_fn(item)
def prompt_zeroshot(self, item: dict) -> str:
assert len(item["choices"]) == 2
return f"The following are multiple choice questions (with answers) about common sense.\n\nQuestion: {item['question']}\nA. {item['choices'][0]}\nB. {item['choices'][1]}\nAnswer:"
def prompt_zeroshot_hint(self, item: dict) -> str:
assert len(item["choices"]) == 2
return f"The following are multiple choice questions (with answers) about common sense. Note that if there is a negation in the question, we should choose the wrong answer to the original question.\n\nQuestion: {item['question']}\nA. {item['choices'][0]}\nB. {item['choices'][1]}\nAnswer:"
def prompt_fewshot(self, item: dict) -> str:
assert len(item["choices"]) == 2
return f"The following are multiple choice questions (with answers) about common sense.\n\nQuestion: If a cat has a body temp that is below average, it isn't in\nA. danger\nB. safe ranges\nAnswer: B\n\nQuestion: As the barometer reading goes lower there is not a greater chance of\nA. sunshine\nB. getting wet\nAnswer: A\n\nQuestion: Coral is a type of living organism which cannot be identified in\nA. saltwater locations that are open\nB. any where with liquid\nAnswer: B\n\nQuestion: {item['question']}\nA. {item['choices'][0]}\nB. {item['choices'][1]}\nAnswer:"
def prompt_fewshot_hint(self, item: dict) -> str:
assert len(item["choices"]) == 2
return f"The following are multiple choice questions (with answers) about common sense. Note that if there is a negation in the question, we should choose the wrong answer to the non-negated question.\n\nQuestion: If a cat has a body temp that is below average, it isn't in\nA. danger\nB. safe ranges\nAnswer: B\n\nQuestion: As the barometer reading goes lower there is not a greater chance of\nA. sunshine\nB. getting wet\nAnswer: A\n\nQuestion: Coral is a type of living organism which cannot be identified in\nA. saltwater locations that are open\nB. any where with liquid\nAnswer: B\n\nQuestion: {item['question']}\nA. {item['choices'][0]}\nB. {item['choices'][1]}\nAnswer:"
def prompt_fewshot_cot(self, item: dict) -> str:
assert len(item["choices"]) == 2
return f'The following are multiple choice questions (with answers) about common sense.\n\nQuestion: If a cat has a body temp that is below average, it isn\'t in\nA. danger\nB. safe ranges\nAnswer: Let\'s think step-by-step.\nFirst, let\'s answer non-negated question: "If a cat has a body temp that is below average, it is in?"\nFor this non-negated question, we have A.\nThen, to answer the negated question, take the other answer, which would be B.\nSo the answer is B.\n\nQuestion: As the barometer reading goes lower there is not a greater chance of\nA. sunshine\nB. getting wet\nAnswer: Let\'s think step-by-step.\nFirst, let\'s answer non-negated question: "As the barometer reading goes lower there is a greater chance of?"\nFor this non-negated question, we have B.\nThen, to answer the negated question, take the other answer, which would be A.\nSo the answer is A.\n\nQuestion: Coral is a type of living organism which cannot be identified in\nA. saltwater locations that are open\nB. any where with liquid\nAnswer: Let\'s think step-by-step.\nFirst, let\'s answer non-negated question: "Coral is a type of living organism which can be identified in?"\nFor this non-negated question, we have A.\nThen, to answer the negated question, take the other answer, which would be B.\nSo the answer is B.\n\nQuestion: {item["question"]}\nA. {item["choices"][0]}\nB. {item["choices"][1]}\nAnswer:'
class Task2:
def __init__(self, max_instances: int): # max_instances: -1 for all
self.path_original = "neqa_100_normalized_nonnegated.jsonl"
self.path_negated = "neqa_100_normalized.jsonl"
self.data_original = [json.loads(line) for line in open(self.path_original)]
self.data_negated = [json.loads(line) for line in open(self.path_negated)]
assert len(self.data_original) == len(self.data_negated)
if max_instances > 0:
self.data_original = self.data_original[:max_instances]
self.data_negated = self.data_negated[:max_instances]
def apply(self, prompt_fn: callable):
self.data = []
for i, (item_original, item_negated) in enumerate(
zip(self.data_original, self.data_negated)
):
prompt = prompt_fn(item_original, item_negated)
if i % 2 == 0:
self.data.append({"prompt": prompt, "answer": 1})
else:
self.data.append(
{
"prompt": prompt.replace(
"A. the same\nB. different", "A. different\nB. the same"
),
"answer": 0,
}
)
def prompt_zeroshot(self, item_original: dict, item_negated: dict) -> str:
question_original = item_original["question"]
question_negated = item_negated["question"]
answer_original = item_original["choices"][item_original["answer"]]
return f'Sentence 1: "{question_original} {answer_original}."\nSentence 2: "{question_negated} {answer_original}."\nQuestion: The above two sentences are?\nA. the same\nB. different\nAnswer:'
def prompt_zeroshot_hint(self, item_original: dict, item_negated: dict) -> str:
question_original = item_original["question"]
question_negated = item_negated["question"]
answer_original = item_original["choices"][item_original["answer"]]
return f'Negated sentences are different from original sentences.\n\nSentence 1: "{question_original} {answer_original}."\nSentence 2: "{question_negated} {answer_original}."\nQuestion: The above two sentences are?\nA. the same\nB. different\nAnswer:'
if __name__ == "__main__":
data = NeQA(-1)
print("-" * 80)
print("prompt_zeroshot")
data.apply(data.prompt_zeroshot)
print(data.data[0]["prompt"])
print("-" * 80)
print("prompt_zeroshot_hint")
data.apply(data.prompt_zeroshot_hint)
print(data.data[0]["prompt"])
print("-" * 80)
print("prompt_fewshot")
data.apply(data.prompt_fewshot)
print(data.data[0]["prompt"])
print("-" * 80)
print("prompt_fewshot_cot")
data.apply(data.prompt_fewshot_cot)
print(data.data[0]["prompt"])
data = Task2(-1)
print("-" * 80)
print("prompt_zeroshot")
data.apply(data.prompt_zeroshot)
print(data.data[0]["prompt"])
print("-" * 80)
print("prompt_zeroshot_hint")
data.apply(data.prompt_zeroshot_hint)
print(data.data[0]["prompt"])