-
Notifications
You must be signed in to change notification settings - Fork 3
/
nnet-year-prediction-keras.py
116 lines (91 loc) · 3.65 KB
/
nnet-year-prediction-keras.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Year prediction using Keras neural nets
# This is the baseline file we used to run our experiments on condor.
# Every experiment on condor uses most of this code, with a few small modifications catered to that particular experiment.
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
import numpy as np
from sklearn import preprocessing
from keras import regularizers
from keras.utils import np_utils, generic_utils
# Defining the sequential model
model = Sequential()
# Our examples of 90 features, so input_dim = 90
model.add(Dense(units=100, activation='relu', input_dim=90))
model.add(Dense(units=100, activation='relu', kernel_regularizer=regularizers.l2(0.00001)))
model.add(Dense(units=100, activation='relu', kernel_regularizer=regularizers.l2(0.00001)))
model.add(Dense(units=100, activation='relu', kernel_regularizer=regularizers.l2(0.00001)))
model.add(Dense(units=100, activation='relu'))
#model.add(Flatten())
# Output is 0-2011, after conversion to categorical vars
model.add(Dense(units=2011, activation='softmax'))
# Tune the optimizer
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
labels = []
examples = []
print "GETTING DATASET"
print
# Replace filename with the path to the CSV where you have the year predictions data saved.
filename = "/mnt/c/Users/Aumit/Desktop/YearPredictionMSD.txt/yp.csv"
with open(filename, 'r') as f:
for line in f:
content = line.split(",")
labels.append(content[0])
content.pop(0)
# If we wanted pure lists, and convert from string to float
#content = [float(elem) for elem in content]
#content = map(float, content)
# If we want a list of numpy arrays, not necessary
#npa = np.asarray(content, dtype=np.float64)
examples.append(content)
print "SPLITTING TRAINING AND TEST SETS"
print
# Turning lists into numpy arrays
total_array = np.array(examples)
# Scale the features so they have 0 mean
total_scaled = preprocessing.scale(total_array)
# Numpy array of the labels
total_labels = np.array(labels)
# Split training and test:
# Increase or decrease these sizes affects run-time
training_examples = total_scaled[:10000]
#training_examples = random.sample(total_array, 10)
training_labels = total_labels[:10000]
# Use the following 1000 examples as text examples
test_examples = total_scaled[10000:11000]
test_labels = total_labels[10000:11000]
# Convert to categorical in order to produce proper output
y_train = keras.utils.to_categorical(training_labels, num_classes=2011)
y_test = keras.utils.to_categorical(test_labels, num_classes=2011)
# Train the model!
model.fit(training_examples, y_train, epochs=200, batch_size=32)
# Loss and metrics
loss_and_metrics = model.evaluate(test_examples, y_test, batch_size=32)
print loss_and_metrics
print ("Creating Plots!")
print (history_1.history.keys())
#accuracy
plt.figure(1)
plt.plot(history_1.history['acc'])
plt.plot(history_1.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("model_acc.png")
#loss
plt.figure(2)
plt.plot(history_1.history['loss'])
plt.plot(history_1.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("model_loss.png")