forked from bnsreenu/python_for_microscopists
-
Notifications
You must be signed in to change notification settings - Fork 0
/
069b-Validate_BOVW_V1.0.py
130 lines (98 loc) · 4.26 KB
/
069b-Validate_BOVW_V1.0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
__author__ = "Sreenivas Bhattiprolu"
__license__ = "Feel free to copy, I appreciate if you acknowledge Python for Microscopists"
# https://www.youtube.com/watch?v=PRceoMWcv1U
"""
All cell images resized to 128 x 128
Images used for test are completely different that the ones used for training.
136 images for testing, each parasitized and uninfected (136 x 2)
104 images for training, each parasitized and uninfected (104 x 2)
"""
import cv2
import numpy as np
import os
import pylab as pl
from sklearn.metrics import confusion_matrix, accuracy_score #sreeni
from sklearn.externals import joblib
# Load the classifier, class names, scaler, number of clusters and vocabulary
#from stored pickle file (generated during training)
clf, classes_names, stdSlr, k, voc = joblib.load("bovw.pkl")
# Get the path of the testing image(s) and store them in a list
#test_path = 'dataset/test' # Names are Aeroplane, Bicycle, Car
test_path = 'cell_images/test' # Folder Names are Parasitized and Uninfected
#instead of test if you use train then we get great accuracy
testing_names = os.listdir(test_path)
# Get path to all images and save them in a list
# image_paths and the corresponding label in image_paths
image_paths = []
image_classes = []
class_id = 0
#To make it easy to list all file names in a directory let us define a function
#
def imglist(path):
return [os.path.join(path, f) for f in os.listdir(path)]
#Fill the placeholder empty lists with image path, classes, and add class ID number
for testing_name in testing_names:
dir = os.path.join(test_path, testing_name)
class_path = imglist(dir)
image_paths+=class_path
image_classes+=[class_id]*len(class_path)
class_id+=1
# Create feature extraction and keypoint detector objects
#SIFT is not available anymore in openCV
# Create List where all the descriptors will be stored
des_list = []
#BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example
brisk = cv2.BRISK_create(30)
for image_path in image_paths:
im = cv2.imread(image_path)
kpts, des = brisk.detectAndCompute(im, None)
des_list.append((image_path, des))
# Stack all the descriptors vertically in a numpy array
descriptors = des_list[0][1]
for image_path, descriptor in des_list[0:]:
descriptors = np.vstack((descriptors, descriptor))
# Calculate the histogram of features
#vq Assigns codes from a code book to observations.
from scipy.cluster.vq import vq
test_features = np.zeros((len(image_paths), k), "float32")
for i in range(len(image_paths)):
words, distance = vq(des_list[i][1],voc)
for w in words:
test_features[i][w] += 1
# Perform Tf-Idf vectorization
nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')
# Scale the features
#Standardize features by removing the mean and scaling to unit variance
#Scaler (stdSlr comes from the pickled file we imported)
test_features = stdSlr.transform(test_features)
#######Until here most of the above code is similar to Train except for kmeans clustering####
#Report true class names so they can be compared with predicted classes
true_class = [classes_names[i] for i in image_classes]
# Perform the predictions and report predicted class names.
predictions = [classes_names[i] for i in clf.predict(test_features)]
#Print the true class and Predictions
print ("true_class =" + str(true_class))
print ("prediction =" + str(predictions))
###############################################
#To make it easy to understand the accuracy let us print the confusion matrix
def showconfusionmatrix(cm):
pl.matshow(cm)
pl.title('Confusion matrix')
pl.colorbar()
pl.show()
accuracy = accuracy_score(true_class, predictions)
print ("accuracy = ", accuracy)
cm = confusion_matrix(true_class, predictions)
print (cm)
showconfusionmatrix(cm)
################# sreeni ###########################
"""
#For classification of unknown files we can print the predictions
#Print the Predictions
print ("Image =", image_paths)
print ("prediction =" + str(predictions))
#np.transpose to save data into columns, otherwise saving as rows
np.savetxt ('mydata.csv', np.transpose([image_paths, predictions]),fmt='%s', delimiter=',', newline='\n')
"""