-
Notifications
You must be signed in to change notification settings - Fork 1
/
KerasNeuralNetwork.py
269 lines (188 loc) · 8.31 KB
/
KerasNeuralNetwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
#|**********************************************************************;
#* Project : Comparative Analysis on Machine Learning Algorithms for
# Predictive Maintenance in the Aviation Industry
#*
#* Program name : KerasNeuralNetwork.py
#*
#* Author : Robert Sutherland
#*
#* Date created : 22/04/20
#*
#* Purpose : To predict the RUL of an aircraft turbofan engine after
# a HPC fault occurs
#*
#*
#* Extra Information : The code used is an improvement and a variation of Rutgher
#* Rhigart's code for the RUL of an aircraft turbofan engine
#* this can be accessed here: https://github.com/RRighart/Gatu/blob/master/Gatu-script.ipynb
#*
#*
#* Date Author
#* 22/04/20 Robert Sutherland
#*
#|**********************************************************************;
#import all libraries needed
from tabulate import tabulate
import pandas as pd
import numpy as np
import math
np.random.seed(0)
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers.core import Dense
from sklearn.preprocessing import MinMaxScaler
#Function used to express time to failure as a fraction
def fractionTTF(dat,q):
#(time to failure - min. time to failure)/(max. time to failure - min. time to failure)
return(dat.TTF[q]-dat.TTF.min()) / float(dat.TTF.max()-dat.TTF.min())
#Function for estimating the predicted total number of cycles per unit
def totcycles(data):
#cycles /(1 - predicted fraction TTF)
return(data['cycles'] / (1-data['score']))
#Function to calculate RUL
def RULfunction(data):
#(max. predicted cycles) - (max. cycles)
return(data['maxpredcycles'] - data['maxcycles'])
#The following settings are used to avoid exponential values in output or
#tables and to display 50 rows maximum
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_columns', 10)
pd.options.display.max_rows=50
#reads a training dataset, a test dataset, and RUL dataset (this holds the
#true values for RUL)
train = pd.read_csv('Train.csv', parse_dates=False, decimal=".", header=None)
test = pd.read_csv('Test.csv', parse_dates=False, decimal=".", header=None)
RUL = pd.read_csv('RUL.csv', parse_dates=False, decimal=".", header=None)
#removes columns with standard deviation >0.001
train = train.loc[:, train.std() > 0.001]
test = test.loc[:, test.std() > 0.001]
#add column names
train.columns = ['unit', 'cycles', 'op_setting1', 's2', 's3', 's4', 's6', 's7', 's8', 's9', 's11', 's12', 's13', 's14', 's15', 's17', 's20', 's21']
test.columns = ['unit', 'cycles', 'op_setting1', 's2', 's3', 's4', 's6', 's7', 's8', 's9', 's11', 's12', 's13', 's14', 's15', 's17', 's20', 's21']
#using the groupby and merge function the max. number of cycles for each engine
#are added as an extra column and renamed 'maxcycles'
train = pd.merge(train, train.groupby('unit', as_index=False)['cycles'].max(), how='left', on='unit')
train.rename(columns={"cycles_x": "cycles", "cycles_y": "maxcycles"}, inplace=True)
#This line calculates the time to failure for every row
#(max. no. of cycles for that particular engine - no. of cycles for the engine)
train['TTF'] = train['maxcycles'] - train['cycles']
print (train['TTF'])
#Scaling
scaler = MinMaxScaler()
print (train.describe().transpose())
#Make a copy of the data so that there is a dataset for unscaled and scaled data
ntrain = train.copy()
ntest = test.copy()
#Select data needed for scaling
ntrain.iloc[:,2:18] = scaler.fit_transform(ntrain.iloc[:,2:18])
ntest.iloc[:,2:18] = scaler.transform(ntest.iloc[:,2:18])
print (ntrain.describe().transpose())
print (pd.DataFrame(ntest.columns).transpose())
print (ntest.describe().transpose())
#create empty arrays
fTTFz = []
fTTF = []
#iterate some integer i from min. value in engine to max. value in engine
for i in range(train['unit'].min(),train['unit'].max()+1):
dat=train[train.unit==i]
dat = dat.reset_index(drop=True)
for q in range(len(dat)):
fTTFz = fractionTTF(dat, q)
fTTF.append(fTTFz)
ntrain['fTTF'] = fTTF
print (ntrain['fTTF'])
print(ntrain['fTTF'].describe())
print (pd.DataFrame(ntrain.columns).transpose())
#assign training variables (sensor readings)
X_train = ntrain.values[:,1:18]
#assign true RUL values
Y_train = ntrain.values[:, 20]
#assign testing variables (sensor readings)
X_test = ntest.values[:,1:18]
#train the model
#Sequential model used
keras = Sequential()
#17 input layers, 6 hidden layers, 1 output layer
#Adam optimiser and relu activation
keras.add(Dense(6, input_dim=17, kernel_initializer='normal', activation='relu'))
keras.add(Dense(1, kernel_initializer='normal'))
#compile model
keras.compile(loss='mean_squared_error', optimizer='adam')
#fit model to training dataset
keras.fit(X_train, Y_train, epochs=20)
#predict fraction RUL values for testset
score = keras.predict(X_test)
#print (score[0:10])
#print(score.min(), score.max())
#re-convert fraction RUL to RUL cycles using similar method as before
test = pd.merge(test, test.groupby('unit', as_index=False)['cycles'].max(), how='left', on='unit')
test.rename(columns={"cycles_x": "cycles", "cycles_y": "maxcycles"}, inplace=True)
test['score'] = score
#print (test.head())
#call totcycles function to calculate the max. predicted cycles per unit in the testset
#adding a column named 'maxpredcycles'
test['maxpredcycles'] = totcycles(test)
#call RUL function to obtain RUL
#adding a column named 'RUL'
#this allows the RUL to be calculated at the point of the max. cycle reached in
#the test set
test['RUL'] = RULfunction(test)
#the following will compute the RUL per unit (based on the max. cycles)
#from the RUL column that contains predicted values for each
t = test.columns == 'RUL'
ind = [i for i, x in enumerate(t) if x]
predictedRUL = []
for i in range(test.unit.min(), test.unit.max()+1):
npredictedRUL=test[test.unit==i].iloc[test[test.unit==i].cycles.max()-1,ind]
predictedRUL.append(npredictedRUL)
xtrueRUL = list(RUL.loc[:,0])
otrueRUL = []
for i in range(0,len(xtrueRUL)):
otrueRUL = np.concatenate((otrueRUL, list(reversed(np.arange(xtrueRUL[i])))))
xpredictedRUL = list(round(x) for x in predictedRUL)
opredictedRUL = []
new_xpredictedRUL = []
for i in range (0,len(xpredictedRUL)):
test = xpredictedRUL[i].item()
new_xpredictedRUL.append(test)
for i in range(0,len(xpredictedRUL)):
testing = np.arange(xpredictedRUL[i].item())
testing = reversed(testing)
opredictedRUL = np.concatenate((opredictedRUL, list(testing)))
#identifies any differences in the DataFrame by concatenating true and predicted
df1 = pd.concat([pd.Series(RUL[0]), pd.Series(new_xpredictedRUL)], axis=1)
df1.columns = ['true', 'predicted']
df1.index = np.arange(1, len(df1)+1)
df1 = df1.head(10)
#plot bar graph of predicted RUL vs True RUL
df1.plot(kind='bar',figsize=(30,24))
plt.title('Keras Neural Network')
plt.xlabel('Engine ID')
plt.ylabel('Remaining Useful Life (No. of Cycles)')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
#calculates the residual of predicted and true RUL
df1['diff'] = df1['predicted']-df1['true']
#print (xpredictedRUL[0:5])
#print (df1.head(10))
#print True, Predicted, and Residual Values in table format
headers = ['True', 'Predicted', 'Residuals']
print('\n')
print(tabulate(df1.head(10), headers = headers))
#calculates overestimations and underestimations
est = pd.DataFrame({'Count': [(df1['diff']<0).sum(), (df1['diff']==0).sum(), (df1['diff']>0).sum()]}, columns=['Count'], index=['Smaller', 'Zero', 'Larger'])
#print (est)
#calculate MAE and RSME evaluation metrics
mae = mean_absolute_error(RUL, new_xpredictedRUL)
mse = mean_squared_error(RUL, new_xpredictedRUL)
rmse = math.sqrt(mse)
rmse = round(rmse, 2)
mae = round(mae, 2)
print('\n')
print(f'Root Mean Squared Error: {rmse}')
print('\n')
print(f'Mean Absolute Error: {mae}')
print('\n')