-
Notifications
You must be signed in to change notification settings - Fork 0
/
number_recogniser.py
210 lines (198 loc) · 7.76 KB
/
number_recogniser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.data import Dataset
from tensorflow import cast, float32
from time import time
# Get dataset
def get_data():
# MNIST Handwritten character image dataset with 60k training examples and 10k testing examples
# Each example x contains 28x28 'pixels' expressed as integers with range 0-255
# Each example label y contains a number that the image represents
(x_train, y_train), (x_test, y_test) = mnist.load_data(
# Load dataset and store on disk for improved runtime
path='mnist.npz'
)
# Dataset object allows for some useful methods
ds_train = Dataset.from_tensor_slices((x_train, y_train))
ds_test = Dataset.from_tensor_slices((x_test, y_test))
return ds_train, ds_test
# Preprocess data
def preprocess(dataset, batchsize, shuffle=False):
# Normalise pixel values by dividing by the max value
# Required for the model
def normalise(image, label):
return cast(image, float32) / 255.0, label
dataset = dataset.map(normalise)
if shuffle:
# Shuffle to create random batches even if dataset is sorted
dataset = dataset.shuffle(len(dataset))
# After each 'batch' of x examples the error is calculated and the model trained;
# this improves runtime by doing less backpropagation steps
dataset = dataset.batch(batchsize)
# Cache (keep in memory) for improved runtime
dataset = dataset.cache()
return dataset
# Define models and return them one by one
def def_models(layersize, filters):
# Simple one layered model
yield (
"OneLayerNN",
# Feed forward NN
Sequential([
# Flatten 2D image matrix into 1D matrix;
# Creates input layer with nodes for each pixel
Flatten(input_shape=(28, 28)),
# Hidden layer, densely connected with previous layer
Dense(
# Number of nodes in this layer
# Larger number increased runtime without significant performance gain
# Lower number reduced performance
units=layersize,
# relu popular and fast
# avoids vanishing gradient problem;
# But deactivates some neurons
activation='relu',
# Can shift activation function, making the network more flexible
# Useful when many zeroes are present in the previous layer
use_bias=True
),
# Dropout can be used to reduce overfitting, however it was disadvantageous in this task
# Output layer with a node for each number, densely connected with previous
Dense(10)
])
)
# Convolutional model
yield (
"OneConvLayerNN",
Sequential([
# convolutional layer
# applies convolution to the 2D matrix
Conv2D(
# How many kernels to apply
# Can detect different kinds of basic features
# Will significantly affect the performance
filters=filters,
# Size of kernel to slide over image
# Smaller is often more accurate because it can detect more detail
# Too small would be less useful in detecting small features
# Larger can be faster
kernel_size=3,
use_bias=True,
# Grayscale, 3rd dimension has one channel instead of 3
input_shape=(28, 28, 1),
activation="relu",
# padding to ensure that the shape of the data stays the same
padding="same"
),
# Pooling can be used to reduce features and thus prevent overfitting, but this was not advantageous for this task
# Needed to connect it to the output layer
Flatten(input_shape=(28, 28)),
Dense(10)
])
)
yield (
# Three layered model
"ThreeLayerNN",
Sequential([
Flatten(input_shape=(28, 28)),
# Repeating the same hidden layer multiple times
Dense(
units=layersize,
activation='relu',
use_bias=True
),
Dense(
units=layersize,
activation='relu',
use_bias=True
),
Dense(
units=layersize,
activation='relu',
use_bias=True
),
Dense(10)
])
)
# Train and evaluate model
def train(ds_train, model, epochs, ds_test=None):
# Set parameters
model.compile(
# Gradient descent optimisation algorithm
optimizer=Adam(
# Learning rate,
# used to scale weight changes
# higher will increase convergence speed
# but make it more susceptible to local optima
0.01
),
# Is a more compact measure of cat. cross entropy,
# which is a measure of similarity between two probability distributions:
# the prediction and the true output
loss=SparseCategoricalCrossentropy(from_logits=True),
# Measure accuracy
# Useful for comparing and intepreting the evaluation
metrics=[SparseCategoricalAccuracy()]
)
# Train the model
model.fit(
# Run the training examples through the model which yields a prediction
# The loss function finds the error of this prediction compared to the true result
# Backpropagation is used to propagate the error down the layers
# and update the weights to reduce the loss
ds_train,
# Go over dataset x times
# More repeats didn't significantly affect the results
epochs=epochs,
# Improve runtime by multithreading
use_multiprocessing=True,
# don't print progress of training
verbose=0
)
# Evaluate the model if there is evaluation data
if ds_test is not None:
score = model.evaluate(
# Run the testing examples through the model to find predictions
# The correct output is known
# The accuracy is the number of correct predictions divided by the total
ds_test,
# don't print anything,
verbose=0,
return_dict=True
)
return model, score
else:
return model
# Main control flow
# Most important hyperparameters can be optimised by calling main
def main(batchsize=128, epochs=2, layersize=250, filters=10):
# Get data
ds_train, ds_test = get_data()
# Preprocessing
ds_train = preprocess(ds_train, batchsize, shuffle=True)
ds_test = preprocess(ds_test, batchsize)
# Define models
models = def_models(layersize, filters)
# Train and evaluate models
best_model = None
highest_acc = 0
time_prev = time()
for name, model in models:
model, score = train(ds_train, model, epochs, ds_test=ds_test)
accuracy = score['sparse_categorical_accuracy']
# Save best model
if accuracy > highest_acc:
best_model = model
highest_acc = accuracy
# Calculate runtime
runtime, time_prev = time() - time_prev, time()
# Output for the user
print(f"Model {name} was evaluated with an average accuracy of {round(accuracy, 3)}. Runtime was {runtime}s")
# Return best trained model
return best_model
if __name__ == '__main__':
main()