-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_seq2loc.py
148 lines (102 loc) · 5.79 KB
/
train_seq2loc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import argparse
import os
import json
import torch
import numpy as np
from tensorboardX import SummaryWriter
import seq2loc
import seq2loc.models
import seq2loc.train_seq2loc
from seq2loc.datasets import Seq2LocDataset
import seq2loc.models.tiramisu.tiramisu_seq2loc as tiramisu_seq2loc
import pdb
import os
import torch.optim as optim
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
parser = argparse.ArgumentParser()
parser.add_argument('--GPU_ids', nargs='+', type=int, default=0, help='gpu id')
parser.add_argument('--my_seed', type=int, default=0, help='random seed')
parser.add_argument('--save_progress_iter', type=int, default=1, help='number of epochs between saving progress')
parser.add_argument('--save_state_iter', type=int, default=10, help='number of epochs between saving state')
parser.add_argument('--max_seq_len', type=int, default=20000, help='randomly trim sequences to this length')
parser.add_argument('--patch_size', type=int, default=-1, help='patch size for the dataset')
parser.add_argument('--model', type=str, default='seq2loc', help='name of model to use')
parser.add_argument('--model_seq', type=str, default='seq2loc', help='name of sequence model to use')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--nepochs', type=int, default=5000, help='total number of epochs')
parser.add_argument('--batch_size', type=int, default=64, help='batch size')
parser.add_argument('--loss', type=str, default='L1Loss', help='loss type')
parser.add_argument('--amsgrad', type=str2bool, default=False, help='use AMSGrad variant of ADAM')
parser.add_argument('--im_growth_rate', type=int, default=16, help='growth rate of the tiramisu net')
parser.add_argument('--seq_layers_deep', type=int, default=20, help ='number of layers in the model')
parser.add_argument('--seq_ch_intermed', type=int, default=256, help='number of intermediate channels between layers')
parser.add_argument('--seq_resid', type=str, default='sum', help = 'residual type of network, "sum" or "cat"')
parser.add_argument('--seq_dropout', type=float, default=0.5, help='dropout rate for sequence portion of network')
parser.add_argument('--seq_nout', type=int, default=1024, help='output size of the sequence model')
parser.add_argument('--seq_pooling', type=str, default='max', help='pooling type to use in the network, "max" or "avg"')
opts = parser.parse_args()
print(opts)
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(ID) for ID in opts.GPU_ids])
GPU_ids = list(range(0, len(opts.GPU_ids)))
GPU_id = GPU_ids[0]
torch.manual_seed(opts.my_seed)
torch.cuda.manual_seed(opts.my_seed)
np.random.seed(opts.my_seed)
writer = SummaryWriter()
save_dir = writer.file_writer.get_logdir()
#Save the preferences
with open('{}/prefs.json'.format(save_dir), 'w') as fp:
# pdb.set_trace()
json.dump(vars(opts), fp)
ds = Seq2LocDataset('./data/hpa_data_resized_train.csv',
max_seq_len = opts.max_seq_len,
patch_size = opts.patch_size,
GPU_id = GPU_id)
ds_validate = Seq2LocDataset('./data/hpa_data_resized_validate.csv',
max_seq_len = opts.max_seq_len,
patch_size = opts.patch_size,
GPU_id = GPU_id)
N_LETTERS = len(ds.sequence_map)
N_CLASSES = opts.seq_nout
criterion = getattr(torch.nn, opts.loss)()
if opts.model_seq == 'transformer':
model_seq = seq2loc.models.TransformerClassifier(N_LETTERS, N_CLASSES,
growth_rate = 64,
max_seq_len = 20000,
n_layers = opts.seq_layers_deep,
n_heads_per_layer = 6).cuda(GPU_id)
else:
model_seq = seq2loc.models.SeqConvResidClassifier(N_LETTERS, N_CLASSES,
kernel_size = 3,
layers_deep = opts.seq_layers_deep,
ch_intermed = opts.seq_ch_intermed,
pooling_type = opts.seq_pooling,
resid_type = opts.seq_resid,
dropout = opts.seq_dropout,
downsize_on_nth = 3).cuda(GPU_id)
if opts.model == 'tiramisu':
model = tiramisu_seq2loc.FCDenseNet(in_channels = 2,
down_blocks=(3,3,3,3,3),
up_blocks=(3,3,3,3,3),
bottleneck_layers=5,
growth_rate = opts.im_growth_rate,
string_net=model_seq)
elif opts.model == 'tiramisu_simple':
model = tiramisu_seq2loc.FCDenseNet_simple(in_channels = 2,
down_blocks=(3,3,3,3,3),
up_blocks=(3,3,3,3,3),
bottleneck_layers=5,
growth_rate = opts.im_growth_rate,
string_net=model_seq)
else:
model = seq2loc.models.Seq2Loc(2, 1, model_seq)
model = model.cuda(GPU_id)
model.apply(seq2loc.utils.model.weights_init)
opt = optim.Adam(model.parameters(), lr = opts.lr, betas=(0.5, 0.999), amsgrad=opts.amsgrad)
model = seq2loc.train_seq2loc.train(model, opt, criterion, ds, ds_validate, writer = writer, nepochs = opts.nepochs, batch_size = opts.batch_size)