-
Notifications
You must be signed in to change notification settings - Fork 53
/
train_model.py
98 lines (77 loc) · 2.97 KB
/
train_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# train_model.py
import os
import cv2
import numpy as np
# Current version of training
version = '_2_1'
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
int_classifications = []
npa_flattened_images = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npa_classifications = []
trained_folder = 'knn'
trained_json_path = 'training' + version + '.json'
# Classify a digit
def train_file(file_path, char):
global npa_flattened_images, int_classifications, npaRawFlattenedImages
if char == 'dot':
char = 'A'
img = cv2.imread(file_path)
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgThreshCopy = imgGray.copy()
imgROIResized = cv2.resize(imgThreshCopy, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
int_classifications.append(ord(char))
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npa_flattened_images = np.append(npa_flattened_images, npaFlattenedImage, 0)
# Write out the dictionary as a string
def serialize_dict(dict):
output = '{'
count = 1
proplen = len(dict)
for key in dict:
vals = dict[key]
output += '"{}": {}'.format(key, vals)
if count < proplen:
output += ','
count += 1
output += '}'
return output
# Write out the image mat data, to the format OpenCV expects
def serialize_mat(mat):
type_id = 'opencv-matrix'
dt = 'f' # TODO: Be smarter about the data type
return '{{"type_id":"{}", "dt":"{}", "data":{}, "rows":{}, "cols": {}}}\n' \
.format(type_id, dt, serialize_array(mat), mat.shape[0], mat.shape[1])
# Write out an array into a string for use in serialization
def serialize_array(arr):
output = '['
for value in arr:
for element in value:
output += str(element) + ',' #'%.18e' % thing2 + ',\n'
output = output[:-1]
output += ']'
return output
def main():
training_dir = "training"
for fname in os.listdir(training_dir):
path = os.path.join(training_dir, fname)
if os.path.isdir(path):
print('Training ' + fname)
tfiles = os.listdir(path)
for tfile in tfiles:
if not tfile.startswith('.'):
train_file(path + '/' + tfile, fname)
# Save the classifications for use in Python
fltClassifications = np.array(int_classifications, np.float32)
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))
np.savetxt(trained_folder + "/classifications" + version + ".txt", npaClassifications)
np.savetxt(trained_folder + "/flattened_images" + version + ".txt", npa_flattened_images)
# Save the classifications into a JSON file for use in C++/iOS
data = {
'classifications': serialize_mat(npaClassifications),
'flattened_images': serialize_mat(npa_flattened_images)
}
with open(trained_folder + '/' + trained_json_path, 'w') as outfile:
outfile.write(serialize_dict(data))
if __name__ == "__main__":
main()