-
Notifications
You must be signed in to change notification settings - Fork 30
/
deep learning with keras - binary classification.txt
646 lines (345 loc) · 15.3 KB
/
deep learning with keras - binary classification.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
x and y coordinates and labels 0 or 1 representing the colors
import seaborn as sns
sns.pairplot(circles, hue="target")
topology
1. two input layer one for x and y
2. four hidden layer
3. one output layer
sigmoid function = 1/(1+e**-Z)
>>>
model= Sequential()
model.add(Dense(4, input_shape=(2,),
activation='sigmoid'
))
#model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer=Adam(0.01),loss='binary_crossentropy')
#model.compile(optimizer=Adam(0.01),loss='mae')
model.summary()
plot_model(model, to_file='model.png')
img=plt.imread('model.png')
plt.imshow(img)
plt.show()
model.compile(optimizer='sgd',loss='binary_crossentropy')
model.train(coordinates, labels, epochs=20)
preds= model.predict(coordinates)
>>>
variance, skewness, kurtosis, entropy, class
# Import seaborn
import seaborn as sns
print(banknotes.keys)
# Use pairplot and set the hue to be our class
sns.pairplot(banknotes, hue='class')
# Show the plot
plt.show()
# Describe the data
print('Dataset stats: \n', banknotes.describe)
# Count the number of observations of each class
print('Observations per class: \n', banknotes['class'].value_counts)
>> multi class classification
xCoord, yCoord competitor
1. 2 input, 128 dense, 64 dense, 32 dense, 4 outputs
softmax
.6 Michael
.1 Susan
.2 Kate
.1 Steve
model.add(Dense(4, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy')
The log loss decreases as the model becomes more accurate in predicting.
>>>>>To Categorical
>>2 input, [128,64,32] hidden layer, 4 output
import pandas as pd
from keras.utils import to_categorical
df=pd.read_csv('data.csv')
df.response=pd.Categorical(df.response)
df.response=df.response.cat.codes
#turn response variable into one-hot encode response vector
y=to_categorical(df.response)
>>>
# Import to_categorical from keras utils module
from keras.utils import to_categorical
# Instantiate a sequential model
model = Sequential()
# Add 3 dense layers of 128, 64 and 32 neurons each
model.add(Dense(128, input_shape=(2,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
# Add a dense layer with as many neurons as competitors
model.add(Dense(4, activation="softmax"))
# Compile your model using categorical_crossentropy loss
model.compile(loss="categorical_crossentropy",
optimizer='adam',
metrics=['accuracy'])
model.summary()
# Transform into a categorical variable
darts.competitor = pd.Categorical(darts.competitor)
# Assign a number to each category (label encoding)
darts.competitor = darts.competitor.cat.codes
# Print the label encoded competitors
print('Label encoded competitors: \n',darts.competitor.head())
# Transform into a categorical variable
darts.competitor = pd.Categorical(darts.competitor)
# Use to_categorical on your labels
coordinates = darts.drop(['competitor'], axis=1)
competitors = to_categorical(darts.competitor)
# Now print the to_categorical() result
print('One-hot encoded competitors: \n',competitors)
# Train your model on the training data for 200 epochs
model.fit(coord_train,competitors_train,epochs=200)
# , your model accuracy on the test data
accuracy = model.evaluate(coord_test, competitors_test)[1]
# Print accuracy
print('Accuracy:', accuracy)
# Predict on coords_small_test
preds = model.predict(coords_small_test)
# Print preds vs true values
print("{:45} | {}".format('Raw Model Predictions','True labels'))
for i,pred in enumerate(preds):
print("{} | {}".format(pred,competitors_small_test[i]))
# Predict on coords_small_test
preds = model.predict(coords_small_test)
# Print preds vs true values
print("{:45} | {}".format('Raw Model Predictions','True labels'))
for i,pred in enumerate(preds):
print("{} | {}".format(pred,competitors_small_test[i]))
# Extract the indexes of the highest probable predictions
preds = [np.argmax(pred) for pred in preds]
# Print preds vs true values
print("{:10} | {}".format('Rounded Model Predictions','True labels'))
for i,pred in enumerate(preds):
print("{:25} | {}".format(pred,competitors_small_test[i]))
>>> multi-label
model=Sequential()
model.add(Dense(2,input_shape=(1,)))
model.add(Dense(3,activation='sigmoid'))
#each output will be between 0 and 1
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(X_train, y_train, epochs=100, validation_split=0.2)
#one versus rest classification
#sensor measurements result in parcels to water
Multi-label classification problems differ from multi-class problems in that each observation can be labeled with zero or more classes. So classes are not mutually exclusive.
To account for this behavior what we do is have an output layer with as many neurons as classes but this time, unlike in multi-class problems, each output neuron has a sigmoid activation function. This makes the output layer able to output a number between 0 and 1 in any of its neurons.
>>
# Instantiate a Sequential model
model=Sequential()
# Add a hidden layer of 64 neurons and a 20 neuron's input
model.add(Dense(64,input_shape=(20,),activiation='relu'))
# Add an output layer of 3 neurons with sigmoid activation
model.add(Dense(3,activation='sigmoid'))
# Compile your model with adam and binary crossentropy loss
model.compile(optimizer="adam",
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
# Train for 100 epochs using a validation split of 0.2
model.fit(sensors_train, parcels_train, epochs = 100, validation_split = 0.2)
# Predict on sensors_test and round up the predictions
preds = model.predict(sensors_test)
preds_rounded = np.round(preds)
# Print rounded preds
print('Rounded Predictions: \n', preds_rounded)
# Evaluate your model's accuracy on the test data
accuracy = model.evaluate(sensors_test, parcels_test)[1]
# Print accuracy
print('Accuracy:', accuracy)
>>callbacks
1) EarlyStopping
2) ModelCheckpoint
3) History
print(history.history['loss'])
print(history.history['acc'])
print(history.history['val_loss'])
print(history.history['val_acc'])
history, modelcheckpoint, earlystopping
fit does callbacks
#print(history.history['loss'])
plt.figure()
plt.plot(history.history['loss'])
plt.xlabel('loss')
plt.show()
>>>>>Early stopping
#useful because we don't know how many epochs will be required to complete training
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
#patience is the number of epochs to improve before the model is stopped
model.fit(X_train, y_train, epochs=100,
validation_data=(X_test,y_test),
callbacks=[early_stopping])
>>>model checkpoint
from keras.callbacks import ModelCheckpoint
#allows up to save the model as strings
model_save=ModelCheckpoint('best_model.hdf5', save_best_only=True)
model.fit(X_train, y_train, epochs=100,
validation_data=(X_test,y_test),
callbacks=[model_save])
>>>
# Train your model and save its history
history = model.fit(X_train, y_train, epochs = 50,
validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_loss(history.history['loss'], history.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(history.history['acc'], history.history['val_acc'])
# Import the early stopping callback
from keras.callbacks import EarlyStopping
# Define a callback to monitor val_acc
monitor_val_acc = EarlyStopping(monitor='val_acc',
patience=5)
# Train your model using the early stopping callback
model.fit(X_train, y_train,
epochs=1000, validation_data=(X_test,y_test),
callbacks=[monitor_val_acc])
>>>
# Import the EarlyStopping and ModelCheckpoint callbacks
from keras.callbacks import EarlyStopping,ModelCheckpoint
# Early stop on validation accuracy
monitor_val_acc = EarlyStopping(monitor ='val_acc', patience=3)
# Save the best model as best_banknote_model.hdf5
modelCheckpoint = ModelCheckpoint('best_banknote_model.hdf5', save_best_only = True)
# Fit your model for a stupid amount of epochs
history = model.fit(X_train, y_train,
epochs = 10000000,
callbacks = [monitor_val_acc, modelCheckpoint],
validation_data = (X_test, y_test))
>>>Learning Curves
1) loss learning curves decrease as epochs go by
accuracy learning curves
1) increase as epochs go by
****model overfitting can be identified if the training curves and the validation curves diverge
init_weights=model.get_weights()
train_accs[]
tests_accs[]
for train_size in train_sizes:
X_train_frac, -, y_train_frac, = train_test_split(X_train,y_train,train_size=train_size)
model.set_weights(initial_weights)
model.fit(X_train_frac, y_train_frac, epochs=100, verbose=0,
callbacks[EarlyStopping(monitor='loss', patience=1)]
train_acc=model.evalute(X_train_frac, y_train_frac, verbose=0)[1]
train_accs.append(train_acc)
test_acc=model.evalute(X_test_frac, y_test_frac, verbose=0)[1]
test_accs.append(train_acc)
>>>
# Instantiate a Sequential model
model = Sequential()
# Input and hidden layer with input_shape, 16 neurons, and relu
model.add(Dense(16, input_shape = (64,), activation = 'relu'))
# Output layer with 10 neurons (one per digit) and softmax
model.add(Dense(10, activation='softmax'))
# Compile your model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
# Test if your model works and can process input data
print(model.predict(X_train))
# Train your model for 60 epochs, using X_test and y_test as validation data
history = model.fit(X_train, y_train, epochs=60, validation_data=(X_test, y_test), verbose=0)
# Extract from the history object loss and val_loss to plot the learning curve
plot_loss(history.history['loss'],history.history['val_loss'])
for size in training_sizes:
# Get a fraction of training data (we only care about the training data)
X_train_frac, y_train_frac = X_train[:size], y_train[:size]
# Reset the model to the initial weights and train it on the new data fraction
model.set_weights(initial_weights)
model.fit(X_train_frac, y_train_frac, epochs = 50, callbacks = [early_stop])
# Evaluate and store the train fraction and the complete test set results
train_accs.append(model.evaluate(X_train_frac, y_train_frac)[1])
test_accs.append(model.evaluate(X_test, y_test)[1])
# Plot train vs test accuracies
plot_results(train_accs, test_accs)
>>>>Activation Functions
a=sum of inputs * weights + bias
a is passed into an activation function producing y
1. sigmoid varies between 0 and 1
2. tanh varies between -1 and 1
3. relu varies between 0 and infinity
4. Leaky relu between negative value and infinity
np.random.seed(1)
def get_model(act_function):
model=Sequential()
model.add(Dense(4, input_shape=(2,), activation=act_function))
model.add(Dense(1, activation='sigmoid'))
return model
activations=['relu','sigmoid','tanh','leaky_relu']
for funct in activations:
model= get_model(act_function=funct)
history=model.fit(X_train, y_train,
validation_data=(X_test,y_test),
epochs=100,verbose=False)
activation_results[funct]=history
val_loss_per_funct = {k:v.history['val_loss] for k,v in activation_results.items()}
val_loss_curves.pd.DataFrame(val_loss_per_funct)
val_loss_curves.plot(title='Loss per Activation function')
>>>>
# Activation functions to try
activations = ['relu','leaky_relu','sigmoid','tanh']
# Loop over the activation functions
activation_results = {}
for act in activations:
# Get a new model with the current activation
model = get_model(act)
# Fit the model
history = model.fit(X_train, y_train,
validation_data=(X_test,y_test),
epochs=20,verbose=0)
activation_results[act] = history
# Create a dataframe from val_loss_per_function
val_loss= {k:v.history['val_loss'] for k,v in activation_results.items()}
# Call plot on the dataframe
val_loss_per_function=pd.DataFrame(val_loss)
val_loss_per_function.plot()
plt.show()
# Create a dataframe from val_acc_per_function
val_acc = {k:v.history['val_acc'] for k,v in activation_results.items()}
# Call plot on the dataframe
val_acc_per_function=pd.DataFrame(val_acc)
val_acc_per_function.plot()
plt.show()
>>>>batch size and batch normalization
1. mini-batches advantages
a. networks train faster (more weight updates in same amount of time)
b. less RAM memory required, can train on huge datasets
c. noise can help networks reach a lower error, escaping local minima
2. mini-batches disadvantages
a. more iterations need to be run
b. need to be adjusted, we need to find a good batch size
keras uses a batch size of 32
The smaller a batch size, the more weight updates per epoch, but at a cost of a more unstable gradient descent. Specially if the batch size is too small and it's not
standardization
data-mean/standard deviation
batch normalization makes sure that independently of the changes, the inputs to the next layer are normalized
batch normalization advantages
1. improves gradient flow
2. allows higher learning rates
3. reduces dependence on weight initializations
4. acts as an unintended form of regularization
5. limits internal covariate shift
from keras.layers import BatchNormalization
model=Sequential()
model.add(Dense(3, input_shape=(2,), activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1,activation='sigmoid')
>>
model = get_model()
# Fit your model for 5 epochs with a batch of size the training set
model.fit(X_train, y_train, epochs=5, batch_size=len(X_train))
print("\n The accuracy when using the whole training set as a batch was: ",
model.evaluate(X_test, y_test)[1])
>>>
# Import batch normalization from keras layers
from keras.layers import BatchNormalization
# Build your deep network
batchnorm_model = Sequential()
batchnorm_model.add(Dense(50, input_shape=(64,), activation='relu', kernel_initializer='normal'))
batchnorm_model.add(BatchNormalization())
batchnorm_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
batchnorm_model.add(BatchNormalization())
batchnorm_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
batchnorm_model.add(BatchNormalization())
batchnorm_model.add(Dense(10, activation='softmax', kernel_initializer='normal'))
# Compile your model with sgd
batchnorm_model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
# Train your standard model, storing its history
history1 = standard_model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, verbose=0)
# Train the batch normalized model you recently built, store its history
history2 = batchnorm_model.fit(X_test, y_test, validation_data=(X_test,y_test), epochs=10, verbose=0)
# Call compare_acc_histories passing in both model histories
compare_histories_acc(history1, history2)