-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGenData.py
173 lines (146 loc) · 8.74 KB
/
GenData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# GenData.py
import argparse
import sys
import numpy as np
import cv2
import os
# module level variables ##########################################################################
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
###################################################################################################
def main():
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--image_train",
help = "path for the images that you're going to train")
args = vars(ap.parse_args())
if args.get("image", True):
imgTrainingNumbers = cv2.imread(args["image_train"]) # read in training numbers image
if imgTrainingNumbers is None:
print("error: image not read from file \n\n") # print error message to std out
os.system("pause") # pause so user can see error message
return
else:
print("Please add -d or --image_train argument")
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY) # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0) # blur
# filter image from grayscale to black and white
imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image
255, # make pixels that pass the threshold full white
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # use gaussian rather than mean, seems to give better results
cv2.THRESH_BINARY_INV, # invert so foreground will be white, background will be black
11, # size of a pixel neighborhood used to calculate threshold value
2) # constant subtracted from the mean or weighted mean
cv2.imshow("imgThresh", imgThresh) # show threshold image for reference
imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy, # input image, make sure to use a copy since the function will modify this image in the course of finding contours
cv2.RETR_EXTERNAL, # retrieve the outermost contours only
cv2.CHAIN_APPROX_SIMPLE) # compress horizontal, vertical, and diagonal segments and leave only their end points
# declare empty numpy array, we will use this to write to file later
# zero rows, enough cols to hold all image data
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = [] # declare empty classifications list, this will be our list of how we are classifying our chars from user input, we will write to file at the end
# possible chars we are interested in are digits 0 through 9, put these in list intValidChars
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('a'), ord('b'), ord('c'), ord('d'), ord('e'), ord('f'), ord('g'), ord('h'), ord('i'), ord('j'),
ord('k'), ord('l'), ord('m'), ord('n'), ord('o'), ord('p'), ord('q'), ord('r'), ord('s'), ord('t'),
ord('u'), ord('v'), ord('w'), ord('x'), ord('y'), ord('z')]
for npaContour in npaContours: # for each contour
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA: # if contour is big enough to consider
[intX, intY, intW, intH] = cv2.boundingRect(npaContour) # get and break out bounding rect
# draw rectangle around each contour as we ask user for input
cv2.rectangle(imgTrainingNumbers, # draw rectangle on original training image
(intX, intY), # upper left corner
(intX+intW,intY+intH), # lower right corner
(0, 0, 255), # red
2) # thickness
imgROI = imgThresh[intY:intY+intH, intX:intX+intW] # crop char out of threshold image
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)) # resize image, this will be more consistent for recognition and storage
cv2.imshow("imgROI", imgROI) # show cropped out char for reference
cv2.imshow("imgROIResized", imgROIResized) # show resized image for reference
cv2.imshow("training_numbers.png", imgTrainingNumbers) # show training numbers image, this will now have red rectangles drawn on it
intChar = cv2.waitKey(0) # get key press
if intChar == 27: # if esc key was pressed
sys.exit() # exit program
elif intChar in intValidChars: # else if the char is in the list of chars we are looking for . . .
intClassifications.append(intChar) # append classification char to integer list of chars (we will convert to float later before writing to file)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT)) # flatten image to 1d numpy array so we can write to file later
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0) # add current flattened impage numpy array to list of flattened image numpy arrays
# end if
# end if
# end for
fltClassifications = np.array(intClassifications, np.float32) # convert classifications list of ints to numpy array of floats
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1)) # flatten numpy array of floats to 1d so we can write to file later
print ("\n\ntraining complete !!\n")
np.savetxt("classifications.txt", npaClassifications) # write flattened images to file
np.savetxt("flattened_images.txt", npaFlattenedImages)
changeCaption() #
cv2.destroyAllWindows() # remove windows from memory
return
###################################################################################################
def changeCaption():
data = np.loadtxt("classifications.txt")
i = 0
for a in data:
a = int(round(a))
if (a == ord('a')):
data[i] = ord('A')
if (a == ord('b')):
data[i] = ord('B')
if (a == ord('c')):
data[i] = ord('C')
if (a == ord('d')):
data[i] = ord('D')
if (a == ord('e')):
data[i] = ord('E')
if (a == ord('f')):
data[i] = ord('F')
if (a == ord('g')):
data[i] = ord('G')
if (a == ord('h')):
data[i] = ord('H')
if (a == ord('i')):
data[i] = ord('I')
if (a == ord('j')):
data[i] = ord('J')
if (a == ord('k')):
data[i] = ord('K')
if (a == ord('l')):
data[i] = ord('L')
if (a == ord('m')):
data[i] = ord('M')
if (a == ord('n')):
data[i] = ord('N')
if (a == ord('o')):
data[i] = ord('O')
if (a == ord('p')):
data[i] = ord('P')
if (a == ord('q')):
data[i] = ord('Q')
if (a == ord('r')):
data[i] = ord('R')
if (a == ord('s')):
data[i] = ord('S')
if (a == ord('t')):
data[i] = ord('T')
if (a == ord('u')):
data[i] = ord('U')
if (a == ord('v')):
data[i] = ord('V')
if (a == ord('w')):
data[i] = ord('W')
if (a == ord('x')):
data[i] = ord('X')
if (a == ord('y')):
data[i] = ord('Y')
if (a == ord('z')):
data[i] = ord('Z')
i = i+1
#fltClassifications = np.array(intClassifications, np.float32)
hasil = np.array(data, np.float32) # convert classifications list of ints to numpy array of floats
npaClassifications = hasil.reshape((hasil.size, 1))
np.savetxt("classifications.txt", npaClassifications)
#print("char was change to caption !")
if __name__ == "__main__":
main()
# end if