-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcluster_evaluation.py
80 lines (65 loc) · 2.45 KB
/
cluster_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import cv2
from sklearn import metrics
import candidate_selection as cand_selec
# def pairwise_distance(f_name, img_feats, query_feats):
# if f_name == "BOVW":
# diq = np.sqrt(np.sum((img_feats - query_feats) ** 2))
# elif f_name == "MPEG7":
# diq = np.sqrt(np.sum((img_feats[0] - query_feats[0]) ** 2)) + np.sqrt(
# np.sum((img_feats[1] - query_feats[1]) ** 2)) + np.sqrt(
# np.sum((img_feats[2] - query_feats[2]) ** 2))
# elif f_name == "SIFT":
# bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
# matches = bf.match(img_feats, query_feats)
# matches = sorted(matches, key=lambda x: x.distance)
# matches = len(matches)
# diq = matches
#
# return diq
def my_v_measure_score(labels_true, labels_pred):
return metrics.v_measure_score(labels_true, labels_pred)
def my_calinski_harabasz_score(fname, dataset, labels):
if (fname == "MPEG7" or fname == "SIFT"):
dataset = np.array(dataset)
dataset = dataset.reshape(
dataset.shape[0], (dataset.shape[1]*dataset.shape[2]))
return metrics.calinski_harabasz_score(dataset, labels)
else:
return metrics.calinski_harabasz_score(dataset, labels)
# def my_silhouette_score(fname, dataset, labels):
# if (fname == "SIFT"):
# dataset = np.array(dataset)
# dataset = dataset.reshape(
# dataset.shape[0], (dataset.shape[1] * dataset.shape[2]))
# return metrics.silhouette_score(dataset, labels, metric='euclidean')
# else:
# return metrics.silhouette_score(dataset, labels, metric='euclidean')
def silhouette_score(f_name, data_points, labels, k, dist_matrix):
num = len(labels)
silh_points = []
for i in range(num):
clu = labels[i]
same = 0.0
diff = 0.0
for j in range(num):
if labels[j] == clu:
same += dist_matrix[i][j]
else:
diff += dist_matrix[i][j]
same_count = labels.count(labels[i])
a = same / same_count
b = diff / (num - same_count)
silh_points.append((b - a) / max(b, a))
final_score = 0.0
for i in range(k):
temp = 0.0
for j in range(num):
if i == labels[j]:
temp += silh_points[j]
if labels.count(i) == 0:
temp = 0
else:
temp /= labels.count(i)
final_score += temp
return final_score / k