-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_model.py
99 lines (72 loc) · 4.57 KB
/
create_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np # advanced math library
import matplotlib.pyplot as plt # MATLAB like plotting routines
import random # for generating random numbers
import data_access
from keras.datasets import mnist # MNIST dataset is included in Keras
from keras.models import Sequential # Model type to be used
from keras.layers.core import Dense, Dropout, Activation # Types of layers to be used in our model
from keras.utils import np_utils # NumPy related tools
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten
from keras.layers.normalization import BatchNormalization
def create_model():
model = Sequential() # Linear stacking of layers
# Convolution Layer 1
model.add(Conv2D(32, (3, 3), input_shape=(28,28,1))) # 32 different 3x3 kernels -- so 32 feature maps
model.add(BatchNormalization(axis=-1)) # normalize each feature map before activation
convLayer01 = Activation('relu') # activation
model.add(convLayer01)
# Convolution Layer 2
model.add(Conv2D(32, (3, 3))) # 32 different 3x3 kernels -- so 32 feature maps
model.add(BatchNormalization(axis=-1)) # normalize each feature map before activation
model.add(Activation('relu')) # activation
convLayer02 = MaxPooling2D(pool_size=(2,2)) # Pool the max values over a 2x2 kernel
model.add(convLayer02)
# Convolution Layer 3
model.add(Conv2D(64,(3, 3))) # 64 different 3x3 kernels -- so 64 feature maps
model.add(BatchNormalization(axis=-1)) # normalize each feature map before activation
convLayer03 = Activation('relu') # activation
model.add(convLayer03)
# Convolution Layer 4
model.add(Conv2D(64, (3, 3))) # 64 different 3x3 kernels -- so 64 feature maps
model.add(BatchNormalization(axis=-1)) # normalize each feature map before activation
model.add(Activation('relu')) # activation
convLayer04 = MaxPooling2D(pool_size=(2,2)) # Pool the max values over a 2x2 kernel
model.add(convLayer04)
model.add(Flatten()) # Flatten final 4x4x64 output matrix into a 1024-length vector
# Fully Connected Layer 5
model.add(Dense(512)) # 512 FCN nodes
model.add(BatchNormalization()) # normalization
model.add(Activation('relu')) # activation
# Fully Connected Layer 6
model.add(Dropout(0.2)) # 20% dropout of randomly selected nodes
model.add(Dense(10)) # final 10 FCN nodes
model.add(Activation('softmax')) # softmax activation
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def train_and_test_model(model, X_train, Y_train, X_test, Y_test):
# data augmentation prevents overfitting by slightly changing the data randomly
# Keras has a great built-in feature to do automatic augmentation
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
height_shift_range=0.08, zoom_range=0.08)
test_gen = ImageDataGenerator()
# We can then feed our augmented data in batches
# Besides loss function considerations as before, this method actually results in significant memory savings
# because we are actually LOADING the data into the network in batches before processing each batch
# Before the data was all loaded into memory, but then processed in batches.
train_generator = gen.flow(X_train, Y_train, batch_size=128)
test_generator = test_gen.flow(X_test, Y_test, batch_size=128)
# We can now train our model which is fed data by our batch loader
# Steps per epoch should always be total size of the set divided by the batch size
# SIGNIFICANT MEMORY SAVINGS (important for larger, deeper networks)
model.fit_generator(train_generator, steps_per_epoch=60000//128, epochs=5, verbose=1,
validation_data=test_generator, validation_steps=10000//128)
score = model.evaluate(X_test, Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])
if __name__ == '__main__':
(X_train, Y_train), (X_test, Y_test) = data_access.load_mnist_data()
model = create_model()
train_and_test_model(model, X_train, Y_train, X_test, Y_test)
model.save(data_access.MODEL_FILE_NAME)