-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathCNN_model_OCV_train_predict.py
84 lines (71 loc) · 3.25 KB
/
CNN_model_OCV_train_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from keras.models import Sequential, load_model, Model
from keras.layers import Input, Dense, Activation, Conv2D, Dropout, Flatten
from keras import optimizers, utils, initializers, regularizers
import keras.backend as K
import numpy as np
import pandas as pd
import random
import os
SEED_NUM = 30000
pred_file_name = 'CNN_OCV_prediction_Initialization_SEED' + str(SEED_NUM)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"
data_path = 'v10_170713_5x5_include_na_dataset.npz'
label_path = "v10_170713_5x5_include_na_label.npz"
x_tr = np.load(data_path)['arr_0']
y_tr = np.load(label_path)['arr_0']
def split_train_validation(data_set, label_set, fold, k):
"""split train set and validation set"""
quo = int(len(data_set) / k)
x_train = np.delete(data_set, range(quo*fold,quo*(fold+1)), 0)
y_train = np.delete(label_set, range(quo*fold,quo*(fold+1)), 0)
x_test = data_set[quo*fold:quo*(fold+1)]
y_test = label_set[quo*fold:quo*(fold+1)]
return x_train, y_train, x_test, y_test
def norm_by_std_nan(train, val):
mask = np.ma.array(train, mask=np.isnan(train))
mean = np.mean(mask, 0)
std = np.std(mask, 0)
train = (train - mean) / std
train = np.where(train == np.nan, 0, train)
train = np.nan_to_num(train)
val = (val-mean)/std
val = np.where(val == np.nan, 0, val)
val = np.nan_to_num(val)
return train, val
fold = 10
epochs = 200
tr_batch_size = 100
ev_batch_size = 100
Input_width = 5
Input_height = 5
num_channels = 28
noise_std = 0.1
n_conv = 64
n_hidden = 128
for fold_num in range(fold):
save_directory = 'prediction/OCV/fold'+str(fold_num)
if not os.path.exists(save_directory):
os.makedirs(save_directory)
x_train, y_train, x_val, y_val = split_train_validation(x_tr, y_tr, fold_num, fold)
x_train, x_val = norm_by_std_nan(x_train, x_val)
x_train = x_train.reshape(len(x_train), Input_width, Input_height, num_channels)
x_val = x_val.reshape(len(x_val), Input_width, Input_height, num_channels)
model = Sequential([
Conv2D(n_conv, (3,3), kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.1, seed=SEED_NUM), bias_initializer=initializers.Constant(0.1), input_shape=(Input_width, Input_height, num_channels), padding='same'),
Activation('relu'),
Flatten(),
Dropout(0.5, seed=SEED_NUM),
Dense(n_hidden, activation='elu', kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.1, seed=SEED_NUM), bias_initializer=initializers.Constant(0.1)),
Dense(1, activation='linear')
])
nadam = optimizers.Nadam(lr=0.0002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
model.compile(optimizer=nadam, loss='mse', metrics=['mae'])
for epoch in range(epochs):
"""train"""
model.fit(x_train, y_train, epochs=1, batch_size=tr_batch_size, verbose=1)
pred = model.predict(x_val, batch_size=ev_batch_size).reshape(len(x_val),)
val_r2 = 1 - (np.sum(np.square(y_val - pred)) / np.sum(np.square(y_val - np.mean(y_val))))
print("epoch:{}, validation set r-squared:{}".format(epoch, val_r2))
sv_pred = np.array(pred).reshape(len(pred), 1)
np.savetxt(save_directory + '/' + pred_file_name + '_epoch' + str(epoch) + ".csv", sv_pred, delimiter=',')