-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
177 lines (143 loc) · 7.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import torch
from cls_TIS_model_sequential import Net
from cls_TIS_dataset import TISDataset
from fcn_train_model import train, evaluate
from fcn_checkpoint import save_checkpoint, save_last, save_best_model
from fcn_graph import graph_maker
from fcn_csv import csv_writer, max_acc_csv
from torchvision import transforms
from cls_TIS_model_utku import TISv1
from cls_TIS_model_exp3 import Net3
if __name__ == "__main__":
epochs = 130
BATCH_SIZE = 1024
# CUDA for PyTorch
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
# Transformation
transformations = transforms.Compose([transforms.ToTensor()])
# Dataset
# fileInLongPath = os.path.join(os.getcwd(), os.listdir(os.getcwd())[0])
# this will get the first file in the last directory of your path
# os.path.dirname(fileInLongPath) # this will get directory of file
# os.path.dirname(os.path.dirname(fileInLongPath)) # this will get the directory of the directory of the file
train_TIS_dataset = TISDataset('C:/Users/dongg/Desktop/bsc_project/Thesis_Project_Files/dataset'
'/raw_data/human/train/CCDS60-140.pos',
'C:/Users/dongg/Desktop/bsc_project/Thesis_Project_Files/'
'dataset/edited_data/human_balanced/CCDS60-140_balanced.neg')
test_TIS_dataset = TISDataset('C:/Users/dongg/Desktop/bsc_project/Thesis_Project_Files/dataset/'
'raw_data/human/test/chrom21_60-140.pos',
'C:/Users/dongg/Desktop/bsc_project/Thesis_Project_Files/dataset/'
'edited_data/human_balanced/chrom21_60-140_balanced.neg', transformations)
# Data Loader
train_loader = torch.utils.data.DataLoader(dataset=train_TIS_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_TIS_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
# Model and Training
model = Net().to(DEVICE)
#model = Net3().to(DEVICE)
#model = TISv1().to(DEVICE)
# Variables Declaration
l_rate = 0.05
moment = 0.9
nest = 'TRUE'
model_number = 28
wd = 0 # Weight decay
best_acc = 90 # Preventing the first accuracy to be saved. Normally it is a low value
optim_csv = 'SGD'
# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=l_rate, momentum=moment, nesterov=True, weight_decay=wd)
#optimizer = torch.optim.Adam(model.parameters(), lr=l_rate)
# loss function
criterion = torch.nn.CrossEntropyLoss()
# List to Keep Track
train_acc_list = []
test_acc_list = []
train_loss_list = []
test_loss_list = []
# Labels Separation Accuracy Check Lists
train_label_0_acc_list = []
train_label_1_acc_list = []
test_label_0_acc_list = []
test_label_1_acc_list = []
epoch_list = []
csv_write_list = []
max_list = []
# Variables for csv writer
'''
Always change learning rate/momentum/nesterov/model_number for different models
Some Notes: Added Weight Decay to the parameters
'''
# print statistics for train
for epoch in range(1, epochs+1):
train(model, train_loader, optimizer, criterion, epoch)
train_loss, train_acc, train_label_0_acc, train_label_1_acc = evaluate(model, train_loader)
print('[{}] Train Loss: {:.4f}, Train Accuracy: {:.2f}%'.format(
epoch, train_loss, train_acc))
train_acc_list.append(train_acc)
train_loss_list.append(train_loss)
train_label_0_acc_list.append(train_label_0_acc)
train_label_1_acc_list.append(train_label_1_acc)
epoch_list.append(epoch)
print('test number:', epoch)
# Evaluate the trained model using the test set/loader
test_loss, test_acc, test_label_0_acc, test_label_1_acc = evaluate(model, test_loader)
print('[{}] Test Loss: {:.4f}, Test Accuracy: {:.2f}%'.format(
epoch, test_loss, test_acc))
test_acc_list.append(test_acc)
test_loss_list.append(test_loss)
test_label_0_acc_list.append(test_label_0_acc)
test_label_1_acc_list.append(test_label_1_acc)
# Try to save the best model with best test accuracy
if test_acc > best_acc:
best_acc = test_acc
best_model_checkpoint = {'epoch': epoch, 'state_dict': model.state_dict(),
'best_acc': best_acc,
'optimizer': optimizer.state_dict()}
save_best_model(best_model_checkpoint)
# save in the existing csv by appending new row
csv_write_list += (model_number, epoch, l_rate, moment, nest, wd, BATCH_SIZE, train_acc, train_label_0_acc,
train_label_1_acc, test_acc, test_label_0_acc, test_label_1_acc, optim_csv,
train_loss, test_loss, 'following model8')
csv_writer(csv_write_list)
# Make an empty list to reset the appending process of the next epoch/train/test and other parameters
csv_write_list = []
# Save the model for every epoch
if epoch == 1:
checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
save_checkpoint(checkpoint)
'''
Here I want to find the max value of the test accuracy and their components in the index
If test accuracy is more important than train accuracy, my approach is correct (?)
'''
max_test_acc = max(test_acc_list)
max_index = test_acc_list.index(max_test_acc)
# Another csv file where it only contains max test acc value and their corresponding parameters
# CSV file name is max_model_tracker_edited.csv
max_list += (model_number, epoch_list[max_index], l_rate, moment, nest, wd, BATCH_SIZE,
train_acc_list[max_index], train_label_0_acc_list[max_index], train_label_1_acc_list[max_index],
max_test_acc, test_label_0_acc_list[max_index], test_label_1_acc_list[max_index], optim_csv,
train_loss_list[max_index], test_loss_list[max_index], 'following model8')
max_acc_csv(max_list)
print(max_test_acc, 'Max_test_accuracy')
print(max_index, 'max index of test')
print(test_acc_list[max_index], 'using index method')
# Make and Save Graph
graph_maker(train_acc_list, test_acc_list, epoch_list, model_number)
# Saving it as GPU or CPU does not matter. When you load with cpu code, it loads as cpu code
# That is what I found in Google :o !
last_model = model.state_dict()
save_last(last_model)
'''
# Load on GPU
device = torch.device("cuda")
model.load_state_dict(torch.load(PATH))
model.to(device)
# Load on CPU
device = torch.device('cpu')
model = Net()
model.load_state_dict(torch.load(PATH, map_location=device))
'''