forked from lolipopshock/Seq_Scene_Gen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_fg_model.py
272 lines (221 loc) · 14.4 KB
/
train_fg_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import argparse
import os, time
from shutil import copyfile
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
from data_loader_fg_model import CocoData
from utils import show_result, mse_loss, show_result_rgb
from networks import Discriminator, Generator_FG
from Feature_Matching import VGGLoss
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--log_dir', type=str, default='log', help='Name of the log folder')
parser.add_argument('--save_models', type=bool, default=True, help='Set True if you want to save trained models')
parser.add_argument('--pre_trained_model_path', type=str, default=None, help='Pre-trained model path')
parser.add_argument('--pre_trained_model_epoch', type=str, default='0', help='Pre-trained model epoch e.g 200')
parser.add_argument('--train_imgs_path', type=str, default='C:/Users/motur/coco/images/train2017', help='Path to training images')
parser.add_argument('--train_annotation_path', type=str, default='C:/Users/motur/coco/annotations/instances_train2017.json', help='Path to annotation file, .json file')
parser.add_argument('--category_names', type=str, default='giraffe,elephant,zebra,sheep,cow,bear',help='List of categories in MS-COCO dataset')
parser.add_argument('--num_test_img', type=int, default=16,help='Number of images saved during training')
parser.add_argument('--img_size', type=int, default=256,help='Generated image size')
parser.add_argument('--local_patch_size', type=int, default=256, help='Image size of instance images after interpolation')
parser.add_argument('--batch_size', type=int, default=16, help='Mini-batch size')
parser.add_argument('--train_epoch', type=int, default=400,help='Maximum training epoch')
parser.add_argument('--lr', type=float, default=0.0002, help='Initial learning rate')
parser.add_argument('--optim_step_size', type=int, default=80,help='Learning rate decay step size')
parser.add_argument('--optim_gamma', type=float, default=0.5,help='Learning rate decay ratio')
parser.add_argument('--critic_iter', type=int, default=5,help='Number of discriminator update against each generator update')
parser.add_argument('--noise_size', type=int, default=128,help='Noise vector size')
parser.add_argument('--lambda_FM', type=float, default=1,help='Trade-off param for feature matching loss')
parser.add_argument('--lambda_recon', type=float, default=0.00001,help='Trade-off param for reconstruction loss')
parser.add_argument('--num_res_blocks', type=int, default=5,help='Number of residual block in generator network')
parser.add_argument('--trade_off_G', type=float, default=0.1,help='Trade-off parameter which controls gradient flow to generator from D_local and D_glob')
opt = parser.parse_args()
print(opt)
#Create log folder
root = 'result_fg/' + opt.category_names + '/'
model = 'coco_model_'
result_folder_name = 'images_' + opt.log_dir
model_folder_name = 'models_' + opt.log_dir
if not os.path.isdir(root):
os.makedirs(root)
if not os.path.isdir(root + result_folder_name):
os.makedirs(root + result_folder_name)
if not os.path.isdir(root + model_folder_name):
os.makedirs(root + model_folder_name)
#Save the script
copyfile(os.path.basename(__file__), root + result_folder_name + '/' + os.path.basename(__file__))
#Define transformation for dataset images - e.g scaling
transform = transforms.Compose([transforms.Scale((opt.img_size,opt.img_size)),
transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])
#Load dataset
category_names = opt.category_names.split(',')
dataset = CocoData(root = opt.train_imgs_path,annFile = opt.train_annotation_path,category_names = category_names,
transform=transform,final_img_size=opt.img_size)
#Discard images contain very small instances
dataset.discard_small(min_area=0.03, max_area=1)
#Define data loader
train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True)
#For evaluation define fixed masks and noises
data_iter = iter(train_loader)
sample_batched = data_iter.next()
x_fixed = sample_batched['image'][0:opt.num_test_img]
x_fixed = Variable(x_fixed.cuda())
y_fixed = sample_batched['single_fg_mask'][0:opt.num_test_img]
y_fixed = Variable(y_fixed.cuda())
z_fixed = torch.randn((opt.num_test_img,opt.noise_size))
z_fixed= Variable(z_fixed.cuda())
#Define networks
G_fg = Generator_FG(z_dim=opt.noise_size, label_channel=len(category_names),num_res_blocks=opt.num_res_blocks)
D_glob = Discriminator(channels=3+len(category_names))
D_instance = Discriminator(channels=3+len(category_names),input_size=opt.local_patch_size)
G_fg.cuda()
D_glob.cuda()
D_instance.cuda()
#Load parameters from pre-trained models
if opt.pre_trained_model_path != None and opt.pre_trained_model_epoch != None:
try:
G_fg.load_state_dict(torch.load(opt.pre_trained_model_path + '/coco_model_G_fg_epoch_' + opt.pre_trained_model_epoch+'.pth'))
D_glob.load_state_dict(torch.load(opt.pre_trained_model_path + '/coco_model_D_glob_epoch_' + opt.pre_trained_model_epoch+'.pth'))
D_instance.load_state_dict(torch.load(opt.pre_trained_model_path + '/coco_model_D_local_epoch_' + opt.pre_trained_model_epoch+'.pth'))
print('Parameters are loaded!')
except:
print('Error: Pre-trained parameters are not loaded!')
pass
#Define interpolation operation
up_instance = nn.Upsample(size=(opt.local_patch_size,opt.local_patch_size),mode='bilinear')
#Define pooling operation for the case that image size and local patch size are mismatched
pooling_instance = nn.Sequential()
if opt.local_patch_size!=opt.img_size:
pooling_instance.add_module('0', nn.AvgPool2d(int(opt.img_size/opt.local_patch_size)))
#Define training loss function - binary cross entropy
BCE_loss = nn.BCELoss()
#Define feature matching loss
criterionVGG = VGGLoss()
criterionVGG = criterionVGG.cuda()
#Define optimizer
G_local_optimizer = optim.Adam(G_fg.parameters(), lr=opt.lr, betas=(0.0, 0.9))
D_local_optimizer = optim.Adam(list(filter(lambda p: p.requires_grad, D_glob.parameters())) +list(filter(lambda p: p.requires_grad, D_instance.parameters()))
, lr=opt.lr, betas=(0.0,0.9))
#Deine learning rate scheduler
scheduler_G = lr_scheduler.StepLR(G_local_optimizer, step_size=opt.optim_step_size, gamma=opt.optim_gamma)
scheduler_D = lr_scheduler.StepLR(D_local_optimizer, step_size=opt.optim_step_size, gamma=opt.optim_gamma)
#----------------------------TRAIN-----------------------------------------
print('training start!')
start_time = time.time()
for epoch in range(int(opt.pre_trained_model_epoch), opt.train_epoch):
epoch_start_time = time.time()
scheduler_G.step()
scheduler_D.step()
D_local_losses = []
G_local_losses = []
y_real_ = torch.ones(opt.batch_size)
y_fake_ = torch.zeros(opt.batch_size)
y_real_, y_fake_ = Variable(y_real_.cuda()), Variable(y_fake_.cuda())
data_iter = iter(train_loader)
num_iter = 0
while num_iter < len(train_loader):
j=0
while j < opt.critic_iter and num_iter < len(train_loader):
j += 1
sample_batched = data_iter.next()
num_iter += 1
x_ = sample_batched['image']
y_ = sample_batched['single_fg_mask']
fg_mask = sample_batched['seg_mask']
y_instances = sample_batched['mask_instance']
bbox = sample_batched['bbox']
mini_batch = x_.size()[0]
if mini_batch != opt.batch_size:
break
#Update discriminators - D
#Real examples
D_glob.zero_grad()
D_instance.zero_grad()
x_, y_ = Variable(x_.cuda()) , Variable(y_.cuda())
fg_mask = Variable(fg_mask.cuda())
y_reduced = torch.sum(y_,1).clamp(0,1).view(y_.size(0),1,opt.img_size,opt.img_size)
x_d = torch.cat([x_,fg_mask],1)
x_instances = torch.zeros((opt.batch_size,3,opt.local_patch_size,opt.local_patch_size))
x_instances = Variable(x_instances.cuda())
y_instances = Variable(y_instances.cuda())
y_instances = pooling_instance(y_instances)
G_instances = torch.zeros((opt.batch_size,3,opt.local_patch_size,opt.local_patch_size))
G_instances = Variable(G_instances.cuda())
#Obtain instances
for t in range(x_d.size()[0]):
x_instance = x_[t,0:3,bbox[0][t]:bbox[1][t],bbox[2][t]:bbox[3][t]]
x_instance = x_instance.contiguous().view(1,x_instance.size()[0],x_instance.size()[1],x_instance.size()[2])
x_instances[t] = up_instance(x_instance)
D_result_instance = D_instance(torch.cat([x_instances,y_instances],1)).squeeze()
D_result = D_glob(x_d).squeeze()
D_real_loss = BCE_loss(D_result, y_real_) + BCE_loss(D_result_instance, y_real_)
D_real_loss.backward()
#Fake examples
z_ = torch.randn((mini_batch,opt.noise_size))
z_ = Variable(z_.cuda())
#Generate fake images
G_fg_result = G_fg(z_,y_, torch.mul(x_,(1-y_reduced)))
G_result_d = torch.cat([G_fg_result,fg_mask],1)
#Obtain fake instances
for t in range(x_d.size()[0]):
G_instance = G_result_d[t,0:3,bbox[0][t]:bbox[1][t],bbox[2][t]:bbox[3][t]]
G_instance = G_instance.contiguous().view(1,G_instance.size()[0],G_instance.size()[1],G_instance.size()[2])
G_instances[t] = up_instance(G_instance)
D_result_instance = D_instance(torch.cat([G_instances,y_instances],1).detach()).squeeze()
D_result = D_glob(G_result_d.detach()).squeeze()
D_fake_loss = BCE_loss(D_result, y_fake_) + BCE_loss(D_result_instance, y_fake_)
D_fake_loss.backward()
D_local_optimizer.step()
D_train_loss = D_real_loss + D_fake_loss
D_local_losses.append(D_train_loss.data)
if mini_batch != opt.batch_size:
break
#Update generator G
G_fg.zero_grad()
D_result = D_glob(G_result_d).squeeze()
D_result_instance = D_instance(torch.cat([G_instances,y_instances],1)).squeeze()
G_train_loss = (1-opt.trade_off_G)*BCE_loss(D_result, y_real_) + opt.trade_off_G*BCE_loss(D_result_instance, y_real_)
#Feature matching loss between generated image and corresponding ground truth
FM_loss = criterionVGG(G_fg_result, x_)
#Reconstruction loss
Recon_loss = mse_loss(torch.mul(x_,(1-y_reduced) ), torch.mul(G_fg_result,(1-y_reduced)) )
total_loss = G_train_loss + opt.lambda_FM*FM_loss + opt.lambda_recon*Recon_loss
total_loss.backward()
G_local_optimizer.step()
G_local_losses.append(G_train_loss.data)
print('loss_d: %.3f, loss_g: %.3f' % (D_train_loss.data,G_train_loss.data))
if (num_iter % 100) == 0:
print('%d - %d complete!' % ((epoch+1), num_iter))
print(result_folder_name)
epoch_end_time = time.time()
per_epoch_ptime = epoch_end_time - epoch_start_time
print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), opt.train_epoch, per_epoch_ptime, torch.mean(torch.FloatTensor(D_local_losses)),
torch.mean(torch.FloatTensor(G_local_losses))))
#Save images
G_fg.eval()
if epoch == 0:
show_result_rgb((epoch+1),x_fixed ,save=True, path=root + result_folder_name+ '/' + model + str(epoch + 1 ) + '_gt.png')
for t in range(y_fixed.size()[1]):
show_result_rgb((epoch+1), y_fixed[:,t:t+1,:,:] ,save=True, path=root + result_folder_name+ '/' + model + str(epoch + 1 ) +'_'+ str(t) +'_masked.png')
show_result_rgb((epoch+1), G_fg(z_fixed,y_fixed,torch.mul(x_fixed,(1-torch.sum(y_fixed,1).view(y_fixed.size(0),1,opt.img_size,opt.img_size)))) ,save=True, path=root + result_folder_name+ '/' + model + str(epoch + 1 ) + '_fg.png')
G_fg.train()
#Save model params
if opt.save_models and (epoch>11 and epoch % 50 == 0 ):
torch.save(G_fg.state_dict(), root +model_folder_name+ '/' + model + 'G_fg_epoch_'+str(epoch)+'.pth')
torch.save(D_glob.state_dict(), root + model_folder_name +'/'+ model + 'D_glob_epoch_'+str(epoch)+'.pth')
torch.save(D_instance.state_dict(), root +model_folder_name+ '/' + model + 'D_local_epoch_'+str(epoch)+'.pth')
torch.save(G_fg.state_dict(), root + model_folder_name + '/' + model + 'G_fg_epoch_' + str(epoch) + '.pth')
torch.save(D_glob.state_dict(), root + model_folder_name + '/' + model + 'D_glob_epoch_' + str(epoch) + '.pth')
torch.save(D_instance.state_dict(), root + model_folder_name + '/' + model + 'D_local_epoch_' + str(epoch) + '.pth')
end_time = time.time()
total_ptime = end_time - start_time
print("Training finish!... save training results")
print('Training time: ' + str(total_ptime))
if __name__ == '__main__':
main()