-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy patheval.py
92 lines (74 loc) · 3.73 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import torch
from sklearn.metrics import precision_recall_fscore_support, classification_report, confusion_matrix, accuracy_score
import numpy as np
from utils import tensor_dict_to_gpu, tensor_dict_to_cpu
def calc_classification_metrics(y_true, y_predicted, labels):
macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(y_true, y_predicted, average='macro')
micro_precision, micro_recall, micro_f1, _ = precision_recall_fscore_support(y_true, y_predicted, average='micro')
weighted_precision, weighted_recall, weighted_f1, _ = precision_recall_fscore_support(y_true, y_predicted, average='weighted')
per_label_precision, per_label_recall, per_label_f1, _ = precision_recall_fscore_support(y_true, y_predicted, average=None, labels=labels)
acc = accuracy_score(y_true, y_predicted)
class_report = classification_report(y_true, y_predicted, digits=4)
confusion_abs = confusion_matrix(y_true, y_predicted, labels=labels)
# normalize confusion matrix
confusion = np.around(confusion_abs.astype('float') / confusion_abs.sum(axis=1)[:, np.newaxis] * 100, 2)
return {"acc": acc,
"macro-f1": macro_f1,
"macro-precision": macro_precision,
"macro-recall": macro_recall,
"micro-f1": micro_f1,
"micro-precision": micro_precision,
"micro-recall": micro_recall,
"weighted-f1": weighted_f1,
"weighted-precision": weighted_precision,
"weighted-recall": weighted_recall,
"labels": labels,
"per-label-f1": per_label_f1.tolist(),
"per-label-precision": per_label_precision.tolist(),
"per-label-recall": per_label_recall.tolist(),
"confusion_abs": confusion_abs.tolist()
}, \
confusion.tolist(), \
class_report
def eval_model(model, eval_batches, device, task):
model.eval()
true_labels = []
labels_dict={}
predicted_labels = []
docwise_predicted_labels=[]
docwise_true_labels = []
doc_name_list = []
with torch.no_grad():
for batch in eval_batches:
# move tensor to gpu
tensor_dict_to_gpu(batch, device)
if batch["task"] != task.task_name:
continue
output = model(batch=batch)
true_labels_batch, predicted_labels_batch = \
clear_and_map_padded_values(batch["label_ids"].view(-1), output["predicted_label"].view(-1), task.labels)
docwise_true_labels.append(true_labels_batch)
docwise_predicted_labels.append(predicted_labels_batch)
doc_name_list.append(batch['doc_name'][0])
true_labels.extend(true_labels_batch)
predicted_labels.extend(predicted_labels_batch)
tensor_dict_to_cpu(batch)
labels_dict['y_true']=true_labels
labels_dict['y_predicted'] = predicted_labels
labels_dict['labels'] = task.labels
labels_dict['docwise_y_true'] = docwise_true_labels
labels_dict['docwise_y_predicted'] = docwise_predicted_labels
labels_dict['doc_names'] = doc_name_list
metrics, confusion, class_report = \
calc_classification_metrics(y_true=true_labels, y_predicted=predicted_labels, labels=task.labels)
return metrics, confusion,labels_dict, class_report
def clear_and_map_padded_values(true_labels, predicted_labels, labels):
assert len(true_labels) == len(predicted_labels)
cleared_predicted = []
cleared_true = []
for true_label, predicted_label in zip(true_labels, predicted_labels):
# filter masked labels (0)
if true_label > 0:
cleared_true.append(labels[true_label])
cleared_predicted.append(labels[predicted_label])
return cleared_true, cleared_predicted