-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect.py
195 lines (144 loc) · 7.28 KB
/
detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import numpy as np
import cv2
import pandas as pd
import pickle
from matplotlib import pyplot as plt
import os
image_dataset = pd.DataFrame() #Dataframe to capture image features
img_path = "images/train_images/"
for image in os.listdir(img_path): #iterate through each file
print(image)
df = pd.DataFrame() #Temporary data frame to capture information for each loop.
#Reset dataframe to blank after each loop.
input_img = cv2.imread(img_path + image) #Read images
#Check if the input image is RGB or grey and convert to grey if RGB
if input_img.ndim == 3 and input_img.shape[-1] == 3:
img = cv2.cvtColor(input_img,cv2.COLOR_BGR2GRAY)
elif input_img.ndim == 2:
img = input_img
else:
raise Exception("The module works only with grayscale and RGB images!")
#Add pixel values to the data frame
pixel_values = img.reshape(-1)
df['Pixel_Value'] = pixel_values #Pixel value itself as a feature
df['Image_Name'] = image #Capture image name as we read multiple images
num = 1 #To count numbers up in order to give Gabor features a lable in the data frame
kernels = []
for theta in range(2): #Define number of thetas
theta = theta / 4. * np.pi
for sigma in (1, 3): #Sigma with 1 and 3
for lamda in np.arange(0, np.pi, np.pi / 4): #Range of wavelengths
for gamma in (0.05, 0.5): #Gamma values of 0.05 and 0.5
gabor_label = 'Gabor' + str(num) #Label Gabor columns as Gabor1, Gabor2, etc.
# print(gabor_label)
ksize=9
kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
kernels.append(kernel)
#Now filter the image and add values to a new column
fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
filtered_img = fimg.reshape(-1)
df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2, etc.
print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
num += 1 #Increment for gabor column label
#CANNY EDGE
edges = cv2.Canny(img, 100,200) #Image, min and max values
edges1 = edges.reshape(-1)
df['Canny Edge'] = edges1 #Add column to original dataframe
from skimage.filters import roberts, sobel, scharr, prewitt
#ROBERTS EDGE
edge_roberts = roberts(img)
edge_roberts1 = edge_roberts.reshape(-1)
df['Roberts'] = edge_roberts1
#SOBEL
edge_sobel = sobel(img)
edge_sobel1 = edge_sobel.reshape(-1)
df['Sobel'] = edge_sobel1
#SCHARR
edge_scharr = scharr(img)
edge_scharr1 = edge_scharr.reshape(-1)
df['Scharr'] = edge_scharr1
#PREWITT
edge_prewitt = prewitt(img)
edge_prewitt1 = edge_prewitt.reshape(-1)
df['Prewitt'] = edge_prewitt1
#GAUSSIAN with sigma=3
from scipy import ndimage as nd
gaussian_img = nd.gaussian_filter(img, sigma=3)
gaussian_img1 = gaussian_img.reshape(-1)
df['Gaussian s3'] = gaussian_img1
#GAUSSIAN with sigma=7
gaussian_img2 = nd.gaussian_filter(img, sigma=7)
gaussian_img3 = gaussian_img2.reshape(-1)
df['Gaussian s7'] = gaussian_img3
#MEDIAN with sigma=3
median_img = nd.median_filter(img, size=3)
median_img1 = median_img.reshape(-1)
df['Median s3'] = median_img1
#VARIANCE with size=3
variance_img = nd.generic_filter(img, np.var, size=3)
variance_img1 = variance_img.reshape(-1)
df['Variance s3'] = variance_img1 #Add column to original dataframe
image_dataset = image_dataset.append(df)
mask_dataset = pd.DataFrame() #Create dataframe to capture mask info.
mask_path = "images/train_masks/"
for mask in os.listdir(mask_path): #iterate through each file to perform some action
print(mask)
df2 = pd.DataFrame() #Temporary dataframe to capture info for each mask in the loop
input_mask = cv2.imread(mask_path + mask)
#Check if the input mask is RGB or grey and convert to grey if RGB
if input_mask.ndim == 3 and input_mask.shape[-1] == 3:
label = cv2.cvtColor(input_mask,cv2.COLOR_BGR2GRAY)
elif input_mask.ndim == 2:
label = input_mask
else:
raise Exception("The module works only with grayscale and RGB images!")
#Add pixel values to the data frame
label_values = label.reshape(-1)
df2['Label_Value'] = label_values
df2['Mask_Name'] = mask
mask_dataset = mask_dataset.append(df2) #Update mask dataframe with all the info from each mask
################################################################
# STEP 3: GET DATA READY FOR RANDOM FOREST (or other classifier)
# COMBINE BOTH DATAFRAMES INTO A SINGLE DATASET
###############################################################
dataset = pd.concat([image_dataset, mask_dataset], axis=1) #Concatenate both image and mask datasets
#If you expect image and mask names to be the same this is where we can perform sanity check
#dataset['Image_Name'].equals(dataset['Mask_Name'])
##
##If we do not want to include pixels with value 0
##e.g. Sometimes unlabeled pixels may be given a value 0.
dataset = dataset[dataset.Label_Value != 0]
#Assign training features to X and labels to Y
#Drop columns that are not relevant for training (non-features)
X = dataset.drop(labels = ["Image_Name", "Mask_Name", "Label_Value"], axis=1)
#Assign label values to Y (our prediction)
Y = dataset["Label_Value"].values
##Split data into train and test to verify accuracy after fitting the model.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
####################################################################
# STEP 4: Define the classifier and fit a model with our training data
###################################################################
#Import training classifier
from sklearn.ensemble import RandomForestClassifier
## Instantiate model with n number of decision trees
model = RandomForestClassifier(n_estimators = 50, random_state = 42)
## Train the model on training data
model.fit(X_train, y_train)
#######################################################
# STEP 5: Accuracy check
#########################################################
from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset.
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
##########################################################
#STEP 6: SAVE MODEL FOR FUTURE USE
###########################################################
##You can store the model for future use. In fact, this is how you do machine elarning
##Train on training images, validate on test images and deploy the model on unknown images.
#
#
##Save the trained model as pickle string to disk for future use
model_name = "sandstone_model"
pickle.dump(model, open(model_name, 'wb'))