-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerate_examples_table.py
105 lines (85 loc) · 3.29 KB
/
generate_examples_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from evaluation.fix_spans import _contiguous_ranges
import pandas as pd
import numpy as np
from ast import literal_eval
import os
import sys
from evaluation.metrics import f1
def get_spans_from_offsets(text, offsets):
text_spans = []
ranges = _contiguous_ranges(offsets)
for _range in ranges:
text_spans.append(text[_range[0] : _range[1] + 1])
return text_spans
test_file = "./data/tsd_test_spans.csv"
test_df = pd.read_csv(test_file)
test_df["spans"] = test_df["spans"].apply(lambda x: literal_eval(x))
output_file = "./example_table.txt"
all_preds = []
for predictions_file in sorted(os.listdir("results/test_predictions")):
with open(os.path.join("results/test_predictions", predictions_file), "r") as f:
all_preds.append(f.readlines())
# best_example_id = -1
# best_f1_std = 0
# second_example_id = -1
# second_f1_std = 0
# third_example_id = -1
# third_f1_std = 0
example_f1_variance = {}
for example_id in range(2000):
# example_id = int(sys.argv[1]) # something between 0 and 1999
f1_scores = []
example_text = test_df.iloc[example_id]["text"]
gold = test_df.iloc[example_id]["spans"]
for idx, predictions_file in enumerate(
sorted(os.listdir("results/test_predictions"))
):
# print(predictions_file)
pred = all_preds[idx][example_id]
pred = sorted(np.unique(literal_eval(pred.split("\t")[1])))
f1_scores.append(f1(pred, gold))
example_f1_variance[example_id] = np.std(f1_scores)
# if np.std(f1_scores) > best_f1_std:
# third_example_id = second_example_id
# third_f1_std = second_f1_std
# second_example_id = best_example_id
# second_f1_std = best_f1_std
# best_example_id = example_id
# best_f1_std = np.std(f1_scores)
# elif np.std(f1_scores) > second_f1_std:
# third_example_id = second_example_id
# third_f1_std = second_f1_std
# second_example_id = example_id
# second_f1_std = np.std(f1_scores)
# elif np.std(f1_scores) > third_f1_std:
# third_example_id = example_id
# third_f1_std = np.std(f1_scores)
# from scipy.stats import mode
val = np.median(np.array(list(example_f1_variance.values())))
print(val)
for k, v in example_f1_variance.items():
if v == val:
example_id = k
break
# sorted_example_f1_variance = [
# k for k, v in sorted(example_f1_variance.items(), key=lambda x: x[1], reverse=True)
# ]
# example_id = sorted_example_f1_variance[15]
example_text = test_df.iloc[example_id]["text"]
gold = test_df.iloc[example_id]["spans"]
example_spans = get_spans_from_offsets(example_text, gold)
file_names = ["text_" + str(example_id), "ground"]
spans = [example_text, example_spans]
for idx, predictions_file in enumerate(sorted(os.listdir("results/test_predictions"))):
pred = all_preds[idx][example_id]
pred = sorted(np.unique(literal_eval(pred.split("\t")[1])))
text_spans = get_spans_from_offsets(example_text, pred)
file_names.append(
predictions_file.replace("_spans-pred.txt", "").replace("original_test-", "")
)
spans.append(text_spans)
df = pd.DataFrame.from_dict({"file_names": file_names, "spans": spans})
df.to_latex(output_file, index=False)
s = df.to_markdown(tablefmt="github")
with open("example_markdown_table.md", "w") as f:
f.write(s)