forked from clarkkev/deep-coref
-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluation.py
104 lines (80 loc) · 3.01 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
from collections import Counter
from sklearn.utils.linear_assignment_ import linear_assignment
def f1(p_num, p_den, r_num, r_den, beta=1):
p = 0 if p_den == 0 else p_num / float(p_den)
r = 0 if r_den == 0 else r_num / float(r_den)
return 0 if p + r == 0 else (1 + beta * beta) * p * r / (beta * beta * p + r)
class Evaluator:
def __init__(self, metric, beta=1):
self.p_num = 0
self.p_den = 0
self.r_num = 0
self.r_den = 0
self.metric = metric
self.beta = beta
def update(self, document):
if self.metric == ceafe:
pn, pd, rn, rd = self.metric(document.clusters, document.gold)
else:
pn, pd = self.metric(document.clusters, document.mention_to_gold)
rn, rd = self.metric(document.gold, document.mention_to_cluster)
self.p_num += pn
self.p_den += pd
self.r_num += rn
self.r_den += rd
def get_f1(self):
return f1(self.p_num, self.p_den, self.r_num, self.r_den, beta=self.beta)
def get_recall(self):
return 0 if self.r_num == 0 else self.r_num / float(self.r_den)
def get_precision(self):
return 0 if self.p_num == 0 else self.p_num / float(self.p_den)
def prf(self):
return self.get_precision(), self.get_recall(), self.get_f1()
def get_counts(self):
return self.p_num, self.p_den, self.r_num, self.r_den
def evaluate_documents(documents, metric, beta=1):
evaluator = Evaluator(metric, beta=beta)
for document in documents:
evaluator.update(document)
return evaluator.get_precision(), evaluator.get_recall(), evaluator.get_f1()
def b_cubed(clusters, mention_to_gold):
num, dem = 0, 0
for c in clusters:
if len(c) == 1:
continue
gold_counts = Counter()
correct = 0
for m in c:
if m in mention_to_gold:
gold_counts[tuple(mention_to_gold[m])] += 1
for c2, count in gold_counts.iteritems():
if len(c2) != 1:
correct += count * count
num += correct / float(len(c))
dem += len(c)
return num, dem
def muc(clusters, mention_to_gold):
tp, p = 0, 0
for c in clusters:
p += len(c) - 1
tp += len(c)
linked = set()
for m in c:
if m in mention_to_gold:
linked.add(mention_to_gold[m])
else:
tp -= 1
tp -= len(linked)
return tp, p
def phi4(c1, c2):
return 2 * len([m for m in c1 if m in c2]) / float(len(c1) + len(c2))
def ceafe(clusters, gold_clusters):
clusters = [c for c in clusters if len(c) != 1]
scores = np.zeros((len(gold_clusters), len(clusters)))
for i in range(len(gold_clusters)):
for j in range(len(clusters)):
scores[i, j] = phi4(gold_clusters[i], clusters[j])
matching = linear_assignment(-scores)
similarity = sum(scores[matching[:, 0], matching[:, 1]])
return similarity, len(clusters), similarity, len(gold_clusters)