-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate_cams.py
117 lines (99 loc) · 4.54 KB
/
evaluate_cams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import os
import torchvision
import six
from utils.utils import init_params, seed_reproducer, mkdir
def calc_semantic_segmentation_confusion(pred_labels, gt_labels):
"""Collect a confusion matrix.
The number of classes :math:`n\_class` is
:math:`max(pred\_labels, gt\_labels) + 1`, which is
the maximum class id of the inputs added by one.
Args:
pred_labels (iterable of numpy.ndarray): See the table in
:func:`chainercv.evaluations.eval_semantic_segmentation`.
gt_labels (iterable of numpy.ndarray): See the table in
:func:`chainercv.evaluations.eval_semantic_segmentation`.
Returns:
numpy.ndarray:
A confusion matrix. Its shape is :math:`(n\_class, n\_class)`.
The :math:`(i, j)` th element corresponds to the number of pixels
that are labeled as class :math:`i` by the ground truth and
class :math:`j` by the prediction.
"""
pred_labels = iter(pred_labels)
gt_labels = iter(gt_labels)
n_class = 0
confusion = np.zeros((n_class, n_class), dtype=np.int64)
for pred_label, gt_label in six.moves.zip(pred_labels, gt_labels):
if pred_label.ndim != 2 or gt_label.ndim != 2:
raise ValueError('ndim of labels should be two.')
if pred_label.shape != gt_label.shape:
raise ValueError('Shape of ground truth and prediction should'
' be same.')
pred_label = pred_label.flatten()
gt_label = gt_label.flatten()
# Dynamically expand the confusion matrix if necessary.
lb_max = np.max((pred_label, gt_label))
if lb_max >= n_class:
expanded_confusion = np.zeros(
(lb_max + 1, lb_max + 1), dtype=np.int64)
expanded_confusion[0:n_class, 0:n_class] = confusion
n_class = lb_max + 1
confusion = expanded_confusion
# Count statistics from valid pixels.
mask = gt_label >= 0
confusion += np.bincount(
n_class * gt_label[mask].astype(int) +
pred_label[mask], minlength=n_class**2).reshape((n_class, n_class))
for iter_ in (pred_labels, gt_labels):
# This code assumes any iterator does not contain None as its items.
if next(iter_, None) is not None:
raise ValueError('Length of input iterables need to be same')
return confusion
def calc_semantic_segmentation_iou(confusion):
"""Calculate Intersection over Union with a given confusion matrix.
The definition of Intersection over Union (IoU) is as follows,
where :math:`N_{ij}` is the number of pixels
that are labeled as class :math:`i` by the ground truth and
class :math:`j` by the prediction.
* :math:`\\text{IoU of the i-th class} = \
\\frac{N_{ii}}{\\sum_{j=1}^k N_{ij} + \\sum_{j=1}^k N_{ji} - N_{ii}}`
Args:
confusion (numpy.ndarray): A confusion matrix. Its shape is
:math:`(n\_class, n\_class)`.
The :math:`(i, j)` th element corresponds to the number of pixels
that are labeled as class :math:`i` by the ground truth and
class :math:`j` by the prediction.
Returns:
numpy.ndarray:
An array of IoUs for the :math:`n\_class` classes. Its shape is
:math:`(n\_class,)`.
"""
iou_denominator = (confusion.sum(axis=1) + confusion.sum(axis=0) -
np.diag(confusion))
iou = np.diag(confusion) / iou_denominator
return iou
if __name__ == "__main__":
# Make experiment reproducible
seed_reproducer(2020)
A = torchvision.datasets.VOCSegmentation('data',
year='2012',
image_set='train',
download=False)
hparams = init_params()
labels = []
for _, label in A:
label = np.array(label)
labels.append(np.where((label == 255), -1, label))
preds = []
for id in sorted(os.listdir(hparams.cam_dir)):
cam_dict = np.load(os.path.join(hparams.cam_dir, id), allow_pickle=True).item()
cams = cam_dict['high_res']
cams = np.pad(cams, ((1, 0), (0, 0), (0, 0)), mode='constant', constant_values=hparams.cam_eval_thres)
keys = np.pad(cam_dict['keys'] + 1, (1, 0), mode='constant')
cls_labels = np.argmax(cams, axis=0)
cls_labels = keys[cls_labels]
preds.append(cls_labels.copy())
confusion = calc_semantic_segmentation_confusion(preds, labels)
iou = calc_semantic_segmentation_iou(confusion)
print({'miou': np.nanmean(iou)})