-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvalidate_wordnet.py
executable file
·91 lines (68 loc) · 2.72 KB
/
validate_wordnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
from tqdm import tqdm
from execute import Engine
from rouge_score import rouge_scorer
from wordnet import wordnet
import csv
MODEL_PATH = "./training/bart_enwiki-kw_summary-2d8df:ROUTINE::1:10000"
e = Engine(model_path=MODEL_PATH)
validation_data_originals = []
print("Caching originals data...")
for i in tqdm(range(4,5)):
filename = f"./data/enwiki-parsed-long-oc-MD{i}.json"
with open(filename, "r") as df:
validation_data_originals = validation_data_originals + json.load(df)
pairs = [[(i["title"], i["context"]), i["target"]] for i in validation_data_originals]
collected_pairs = []
for sample in pairs:
word = sample[0][0]
definitions = wordnet.get_word_definition(word)
if len(definitions) > 0:
collected_pairs.append([sample[0], (sample[1], definitions)])
print(f"Validating upon {len(collected_pairs)} collected pairs!")
rouge1_prec = []
rouge1_recc = []
rouge1_fm = []
rougel_prec = []
rougel_recc = []
rougel_fm = []
sample = collected_pairs[97]
output = e.execute(sample[0][0], sample[0][1], num_beams=2, min_length=10)
sample
print(output)
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
for sample in tqdm(collected_pairs):
# Get output and compare with results
# output = e.execute(sample[0][0], sample[0][1], num_beams=2, min_length=10)
results = [scorer.score(i, sample[1][0]) for i in sample[1][1]]
# get synsyet with highest scoreso
# rouge1
results_rouge1 = [i["rouge1"] for i in results]
results_rouge1.sort(key=lambda x: (x.precision+x.recall+x.fmeasure)/3)
rouge1 = results_rouge1[-1]
# rougel
results_rougel = [i["rougeL"] for i in results]
results_rougel.sort(key=lambda x: (x.precision+x.recall+x.fmeasure)/3)
rougel = results_rougel[-1]
# append results
rouge1_prec.append(rouge1.precision)
rouge1_recc.append(rouge1.recall)
rouge1_fm.append(rouge1.fmeasure)
rougel_prec.append(rougel.precision)
rougel_recc.append(rougel.recall)
rougel_fm.append(rougel.fmeasure)
sum(rouge1_prec)/len(rouge1_prec) # 0.21870
sum(rouge1_recc)/len(rouge1_recc) # 0.27794
sum(rouge1_fm)/len(rouge1_fm) # 0.22116
sum(rougel_prec)/len(rougel_prec) # 0.18330
sum(rougel_recc)/len(rougel_recc) # 0.24114
sum(rougel_fm)/len(rougel_fm) # 0.18879
# title.append(i["18879"])
# model_output.append(output)
# desired_output.append(i["target"])
# rouge1_prec.append(results["rouge1"].precision)
# rouge1_recc.append(results["rouge1"].recall)
# rouge1_fm.append(results["rouge1"].fmeasure)
# rougel_prec.append(results["rougeL"].precision)
# rougel_recc.append(results["rougeL"].recall)
# rougel_fm.append(results["rougeL"].fmeasure)