diff --git a/README_ospp.md b/README_ospp.md deleted file mode 100644 index 93a32638..00000000 --- a/README_ospp.md +++ /dev/null @@ -1,3 +0,0 @@ -# OSPP -I changed the sedna source code to implement my algorithm. -Please turn to https://github.com/kubeedge/sedna/pull/378 and https://github.com/nailtu30/sedna/blob/ospp-final/README_ospp.md for more information. \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/__init__.py new file mode 100755 index 00000000..42e08e58 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/__init__.py @@ -0,0 +1 @@ +from . import train \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/diffaug.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/diffaug.py new file mode 100755 index 00000000..54c0894f --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/diffaug.py @@ -0,0 +1,76 @@ +# Differentiable Augmentation for Data-Efficient GAN Training +# Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han +# https://arxiv.org/pdf/2006.10738 + +import torch +import torch.nn.functional as F + + +def DiffAugment(x, policy='', channels_first=True): + if policy: + if not channels_first: + x = x.permute(0, 3, 1, 2) + for p in policy.split(','): + for f in AUGMENT_FNS[p]: + x = f(x) + if not channels_first: + x = x.permute(0, 2, 3, 1) + x = x.contiguous() + return x + + +def rand_brightness(x): + x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5) + return x + + +def rand_saturation(x): + x_mean = x.mean(dim=1, keepdim=True) + x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean + return x + + +def rand_contrast(x): + x_mean = x.mean(dim=[1, 2, 3], keepdim=True) + x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean + return x + + +def rand_translation(x, ratio=0.125): + shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) + translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device) + translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device) + grid_batch, grid_x, grid_y = torch.meshgrid( + torch.arange(x.size(0), dtype=torch.long, device=x.device), + torch.arange(x.size(2), dtype=torch.long, device=x.device), + torch.arange(x.size(3), dtype=torch.long, device=x.device), + ) + grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1) + grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1) + x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0]) + x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2) + return x + + +def rand_cutout(x, ratio=0.5): + cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) + offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device) + offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device) + grid_batch, grid_x, grid_y = torch.meshgrid( + torch.arange(x.size(0), dtype=torch.long, device=x.device), + torch.arange(cutout_size[0], dtype=torch.long, device=x.device), + torch.arange(cutout_size[1], dtype=torch.long, device=x.device), + ) + grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1) + grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1) + mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device) + mask[grid_batch, grid_x, grid_y] = 0 + x = x * mask.unsqueeze(1) + return x + + +AUGMENT_FNS = { + 'color': [rand_brightness, rand_saturation, rand_contrast], + 'translation': [rand_translation], + 'cutout': [rand_cutout], +} \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/generate_fake_imgs.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/generate_fake_imgs.py new file mode 100755 index 00000000..ece277e8 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/generate_fake_imgs.py @@ -0,0 +1,43 @@ +import torch + +from models import Generator, weights_init + +import matplotlib.pyplot as plt + +import os + +from collections import OrderedDict + +import numpy as np + +from skimage import io + + +device = 'cuda' + +ngf = 64 +nz = 256 +im_size = 1024 +netG = Generator(ngf=ngf, nz=nz, im_size=im_size).to(device) +weights_init(netG) +weights = torch.load(os.getcwd() + '/train_results/test1/models/50000.pth') +netG_weights = OrderedDict() +for name, weight in weights['g'].items(): + name = name.split('.')[1:] + name = '.'.join(name) + netG_weights[name] = weight +netG.load_state_dict(netG_weights) +current_batch_size = 1 + + +index = 1 +while index <= 3000: + noise = torch.Tensor(current_batch_size, nz).normal_(0, 1).to(device) + fake_images = netG(noise)[0] + for fake_image in fake_images: + fake_image = fake_image.detach().cpu().numpy().transpose(1, 2, 0) + fake_image = fake_image * np.array([0.5, 0.5, 0.5]) + fake_image = fake_image + np.array([0.5, 0.5, 0.5]) + fake_image = (fake_image * 255).astype(np.uint8) + io.imsave('../data/fake_imgs1/' + str(index) + '.png', fake_image) + index += 1 diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/__init__.py new file mode 100755 index 00000000..f4a84a76 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/__init__.py @@ -0,0 +1,182 @@ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import skimage +import torch +from torch.autograd import Variable + +from lpips import dist_model + + +from skimage.metrics import structural_similarity as compare_ssim + + +class PerceptualLoss(torch.nn.Module): + # VGG using our perceptually-learned weights (LPIPS metric) + def __init__(self, model='net-lin', net='alex', colorspace='rgb', spatial=False, use_gpu=True, gpu_ids=[0]): + # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG as a perceptual loss + super(PerceptualLoss, self).__init__() + print('Setting up Perceptual loss...') + self.use_gpu = use_gpu + self.spatial = spatial + self.gpu_ids = gpu_ids + self.model = dist_model.DistModel() + self.model.initialize(model=model, net=net, use_gpu=use_gpu, + colorspace=colorspace, spatial=self.spatial, gpu_ids=gpu_ids) + print('...[%s] initialized' % self.model.name()) + print('...Done') + + def forward(self, pred, target, normalize=False): + """ + Pred and target are Variables. + If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1] + If normalize is False, assumes the images are already between [-1,+1] + + Inputs pred and target are Nx3xHxW + Output pytorch Variable N long + """ + + if normalize: + target = 2 * target - 1 + pred = 2 * pred - 1 + + return self.model.forward(target, pred) + + +def normalize_tensor(in_feat, eps=1e-10): + norm_factor = torch.sqrt(torch.sum(in_feat**2, dim=1, keepdim=True)) + return in_feat/(norm_factor+eps) + + +def l2(p0, p1, range=255.): + return .5*np.mean((p0 / range - p1 / range)**2) + + +def psnr(p0, p1, peak=255.): + return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2)) + + +def dssim(p0, p1, range=255.): + return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2. + + +def rgb2lab(in_img, mean_cent=False): + from skimage import color + img_lab = color.rgb2lab(in_img) + if(mean_cent): + img_lab[:, :, 0] = img_lab[:, :, 0]-50 + return img_lab + + +def tensor2np(tensor_obj): + # change dimension of a tensor object into a numpy array + return tensor_obj[0].cpu().float().numpy().transpose((1, 2, 0)) + + +def np2tensor(np_obj): + # change dimenion of np array into tensor array + return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1))) + + +def tensor2tensorlab(image_tensor, to_norm=True, mc_only=False): + # image tensor to lab tensor + from skimage import color + + img = tensor2im(image_tensor) + img_lab = color.rgb2lab(img) + if(mc_only): + img_lab[:, :, 0] = img_lab[:, :, 0]-50 + if(to_norm and not mc_only): + img_lab[:, :, 0] = img_lab[:, :, 0]-50 + img_lab = img_lab/100. + + return np2tensor(img_lab) + + +def tensorlab2tensor(lab_tensor, return_inbnd=False): + from skimage import color + import warnings + warnings.filterwarnings("ignore") + + lab = tensor2np(lab_tensor)*100. + lab[:, :, 0] = lab[:, :, 0]+50 + + rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')), 0, 1) + if(return_inbnd): + # convert back to lab, see if we match + lab_back = color.rgb2lab(rgb_back.astype('uint8')) + mask = 1.*np.isclose(lab_back, lab, atol=2.) + mask = np2tensor(np.prod(mask, axis=2)[:, :, np.newaxis]) + return (im2tensor(rgb_back), mask) + else: + return im2tensor(rgb_back) + + +def rgb2lab(input): + from skimage import color + return color.rgb2lab(input / 255.) + + +def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.): + image_numpy = image_tensor[0].cpu().float().numpy() + image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor + return image_numpy.astype(imtype) + + +def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.): + return torch.Tensor((image / factor - cent) + [:, :, :, np.newaxis].transpose((3, 2, 0, 1))) + + +def tensor2vec(vector_tensor): + return vector_tensor.data.cpu().numpy()[:, :, 0, 0] + + +def voc_ap(rec, prec, use_07_metric=False): + """ ap = voc_ap(rec, prec, [use_07_metric]) + Compute VOC AP given precision and recall. + If use_07_metric is true, uses the + VOC 07 11 point method (default:False). + """ + if use_07_metric: + # 11 point metric + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11. + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.): + # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.): + image_numpy = image_tensor[0].cpu().float().numpy() + image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor + return image_numpy.astype(imtype) + + +def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.): + # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.): + return torch.Tensor((image / factor - cent) + [:, :, :, np.newaxis].transpose((3, 2, 0, 1))) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/base_model.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/base_model.py new file mode 100755 index 00000000..9fdb9306 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/base_model.py @@ -0,0 +1,58 @@ +import os +import torch +from torch.autograd import Variable +from pdb import set_trace as st +from IPython import embed + +class BaseModel(): + def __init__(self): + pass; + + def name(self): + return 'BaseModel' + + def initialize(self, use_gpu=True, gpu_ids=[0]): + self.use_gpu = use_gpu + self.gpu_ids = gpu_ids + + def forward(self): + pass + + def get_image_paths(self): + pass + + def optimize_parameters(self): + pass + + def get_current_visuals(self): + return self.input + + def get_current_errors(self): + return {} + + def save(self, label): + pass + + # helper saving function that can be used by subclasses + def save_network(self, network, path, network_label, epoch_label): + save_filename = '%s_net_%s.pth' % (epoch_label, network_label) + save_path = os.path.join(path, save_filename) + torch.save(network.state_dict(), save_path) + + # helper loading function that can be used by subclasses + def load_network(self, network, network_label, epoch_label): + save_filename = '%s_net_%s.pth' % (epoch_label, network_label) + save_path = os.path.join(self.save_dir, save_filename) + print('Loading network from %s'%save_path) + network.load_state_dict(torch.load(save_path)) + + def update_learning_rate(): + pass + + def get_image_paths(self): + return self.image_paths + + def save_done(self, flag=False): + np.save(os.path.join(self.save_dir, 'done_flag'),flag) + np.savetxt(os.path.join(self.save_dir, 'done_flag'),[flag,],fmt='%i') + diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/dist_model.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/dist_model.py new file mode 100755 index 00000000..bb0d3bda --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/dist_model.py @@ -0,0 +1,289 @@ + +from __future__ import absolute_import + +import sys +import numpy as np +import torch +from torch import nn +import os +from collections import OrderedDict +from torch.autograd import Variable +import itertools +from .base_model import BaseModel +from scipy.ndimage import zoom +import fractions +import functools +import skimage.transform +from tqdm import tqdm + +from IPython import embed + +from . import networks_basic as networks +import lpips as util + + +class DistModel(BaseModel): + def name(self): + return self.model_name + + def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=False, pnet_tune=False, model_path=None, + use_gpu=True, printNet=False, spatial=False, + is_train=False, lr=.0001, beta1=0.5, version='0.1', gpu_ids=[0]): + ''' + INPUTS + model - ['net-lin'] for linearly calibrated network + ['net'] for off-the-shelf network + ['L2'] for L2 distance in Lab colorspace + ['SSIM'] for ssim in RGB colorspace + net - ['squeeze','alex','vgg'] + model_path - if None, will look in weights/[NET_NAME].pth + colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM + use_gpu - bool - whether or not to use a GPU + printNet - bool - whether or not to print network architecture out + spatial - bool - whether to output an array containing varying distances across spatial dimensions + spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below). + spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images. + spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear). + is_train - bool - [True] for training mode + lr - float - initial learning rate + beta1 - float - initial momentum term for adam + version - 0.1 for latest, 0.0 was original (with a bug) + gpu_ids - int array - [0] by default, gpus to use + ''' + BaseModel.initialize(self, use_gpu=use_gpu, gpu_ids=gpu_ids) + + self.model = model + self.net = net + self.is_train = is_train + self.spatial = spatial + self.gpu_ids = gpu_ids + self.model_name = '%s [%s]' % (model, net) + + if(self.model == 'net-lin'): # pretrained net + linear layer + self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_tune=pnet_tune, pnet_type=net, + use_dropout=True, spatial=spatial, version=version, lpips=True) + kw = {} + if not use_gpu: + kw['map_location'] = 'cpu' + if(model_path is None): + import inspect + model_path = os.path.abspath(os.path.join(inspect.getfile( + self.initialize), '..', 'weights/v%s/%s.pth' % (version, net))) + + if(not is_train): + self.net.load_state_dict(torch.load( + model_path, **kw), strict=False) + + elif(self.model == 'net'): # pretrained network + self.net = networks.PNetLin( + pnet_rand=pnet_rand, pnet_type=net, lpips=False) + elif(self.model in ['L2', 'l2']): + # not really a network, only for testing + self.net = networks.L2(use_gpu=use_gpu, colorspace=colorspace) + self.model_name = 'L2' + elif(self.model in ['DSSIM', 'dssim', 'SSIM', 'ssim']): + self.net = networks.DSSIM(use_gpu=use_gpu, colorspace=colorspace) + self.model_name = 'SSIM' + else: + raise ValueError("Model [%s] not recognized." % self.model) + + self.parameters = list(self.net.parameters()) + + if self.is_train: # training mode + # extra network on top to go from distances (d0,d1) => predicted human judgment (h*) + self.rankLoss = networks.BCERankingLoss() + self.parameters += list(self.rankLoss.net.parameters()) + self.lr = lr + self.old_lr = lr + self.optimizer_net = torch.optim.Adam( + self.parameters, lr=lr, betas=(beta1, 0.999)) + else: # test mode + self.net.eval() + + if(use_gpu): + self.net.to(gpu_ids[0]) + self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids) + if(self.is_train): + self.rankLoss = self.rankLoss.to( + device=gpu_ids[0]) # just put this on GPU0 + + if(printNet): + networks.print_network(self.net) + + def forward(self, in0, in1, retPerLayer=False): + ''' Function computes the distance between image patches in0 and in1 + INPUTS + in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1] + OUTPUT + computed distances between in0 and in1 + ''' + + return self.net.forward(in0, in1, retPerLayer=retPerLayer) + + # ***** TRAINING FUNCTIONS ***** + def optimize_parameters(self): + self.forward_train() + self.optimizer_net.zero_grad() + self.backward_train() + self.optimizer_net.step() + self.clamp_weights() + + def clamp_weights(self): + for module in self.net.modules(): + if(hasattr(module, 'weight') and module.kernel_size == (1, 1)): + module.weight.data = torch.clamp(module.weight.data, min=0) + + def set_input(self, data): + self.input_ref = data['ref'] + self.input_p0 = data['p0'] + self.input_p1 = data['p1'] + self.input_judge = data['judge'] + + if(self.use_gpu): + self.input_ref = self.input_ref.to(device=self.gpu_ids[0]) + self.input_p0 = self.input_p0.to(device=self.gpu_ids[0]) + self.input_p1 = self.input_p1.to(device=self.gpu_ids[0]) + self.input_judge = self.input_judge.to(device=self.gpu_ids[0]) + + self.var_ref = Variable(self.input_ref, requires_grad=True) + self.var_p0 = Variable(self.input_p0, requires_grad=True) + self.var_p1 = Variable(self.input_p1, requires_grad=True) + + def forward_train(self): # run forward pass + self.d0 = self.forward(self.var_ref, self.var_p0) + self.d1 = self.forward(self.var_ref, self.var_p1) + self.acc_r = self.compute_accuracy(self.d0, self.d1, self.input_judge) + + self.var_judge = Variable(1.*self.input_judge).view(self.d0.size()) + + self.loss_total = self.rankLoss.forward( + self.d0, self.d1, self.var_judge*2.-1.) + + return self.loss_total + + def backward_train(self): + torch.mean(self.loss_total).backward() + + def compute_accuracy(self, d0, d1, judge): + ''' d0, d1 are Variables, judge is a Tensor ''' + d1_lt_d0 = (d1 < d0).cpu().data.numpy().flatten() + judge_per = judge.cpu().numpy().flatten() + return d1_lt_d0*judge_per + (1-d1_lt_d0)*(1-judge_per) + + def get_current_errors(self): + retDict = OrderedDict([('loss_total', self.loss_total.data.cpu().numpy()), + ('acc_r', self.acc_r)]) + + for key in retDict.keys(): + retDict[key] = np.mean(retDict[key]) + + return retDict + + def get_current_visuals(self): + zoom_factor = 256/self.var_ref.data.size()[2] + + ref_img = util.tensor2im(self.var_ref.data) + p0_img = util.tensor2im(self.var_p0.data) + p1_img = util.tensor2im(self.var_p1.data) + + ref_img_vis = zoom(ref_img, [zoom_factor, zoom_factor, 1], order=0) + p0_img_vis = zoom(p0_img, [zoom_factor, zoom_factor, 1], order=0) + p1_img_vis = zoom(p1_img, [zoom_factor, zoom_factor, 1], order=0) + + return OrderedDict([('ref', ref_img_vis), + ('p0', p0_img_vis), + ('p1', p1_img_vis)]) + + def save(self, path, label): + if(self.use_gpu): + self.save_network(self.net.module, path, '', label) + else: + self.save_network(self.net, path, '', label) + self.save_network(self.rankLoss.net, path, 'rank', label) + + def update_learning_rate(self, nepoch_decay): + lrd = self.lr / nepoch_decay + lr = self.old_lr - lrd + + for param_group in self.optimizer_net.param_groups: + param_group['lr'] = lr + + self.old_lr = lr + + +def score_2afc_dataset(data_loader, func, name=''): + ''' Function computes Two Alternative Forced Choice (2AFC) score using + distance function 'func' in dataset 'data_loader' + INPUTS + data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside + func - callable distance function - calling d=func(in0,in1) should take 2 + pytorch tensors with shape Nx3xXxY, and return numpy array of length N + OUTPUTS + [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators + [1] - dictionary with following elements + d0s,d1s - N arrays containing distances between reference patch to perturbed patches + gts - N array in [0,1], preferred patch selected by human evaluators + (closer to "0" for left patch p0, "1" for right patch p1, + "0.6" means 60pct people preferred right patch, 40pct preferred left) + scores - N array in [0,1], corresponding to what percentage function agreed with humans + CONSTS + N - number of test triplets in data_loader + ''' + + d0s = [] + d1s = [] + gts = [] + + for data in tqdm(data_loader.load_data(), desc=name): + d0s += func(data['ref'], data['p0'] + ).data.cpu().numpy().flatten().tolist() + d1s += func(data['ref'], data['p1'] + ).data.cpu().numpy().flatten().tolist() + gts += data['judge'].cpu().numpy().flatten().tolist() + + d0s = np.array(d0s) + d1s = np.array(d1s) + gts = np.array(gts) + scores = (d0s < d1s)*(1.-gts) + (d1s < d0s)*gts + (d1s == d0s)*.5 + + return(np.mean(scores), dict(d0s=d0s, d1s=d1s, gts=gts, scores=scores)) + + +def score_jnd_dataset(data_loader, func, name=''): + ''' Function computes JND score using distance function 'func' in dataset 'data_loader' + INPUTS + data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside + func - callable distance function - calling d=func(in0,in1) should take 2 + pytorch tensors with shape Nx3xXxY, and return pytorch array of length N + OUTPUTS + [0] - JND score in [0,1], mAP score (area under precision-recall curve) + [1] - dictionary with following elements + ds - N array containing distances between two patches shown to human evaluator + sames - N array containing fraction of people who thought the two patches were identical + CONSTS + N - number of test triplets in data_loader + ''' + + ds = [] + gts = [] + + for data in tqdm(data_loader.load_data(), desc=name): + ds += func(data['p0'], data['p1']).data.cpu().numpy().tolist() + gts += data['same'].cpu().numpy().flatten().tolist() + + sames = np.array(gts) + ds = np.array(ds) + + sorted_inds = np.argsort(ds) + ds_sorted = ds[sorted_inds] + sames_sorted = sames[sorted_inds] + + TPs = np.cumsum(sames_sorted) + FPs = np.cumsum(1-sames_sorted) + FNs = np.sum(sames_sorted)-TPs + + precs = TPs/(TPs+FPs) + recs = TPs/(TPs+FNs) + score = util.voc_ap(recs, precs) + + return(score, dict(ds=ds, sames=sames)) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/networks_basic.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/networks_basic.py new file mode 100755 index 00000000..1d23f059 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/networks_basic.py @@ -0,0 +1,187 @@ + +from __future__ import absolute_import + +import sys +import torch +import torch.nn as nn +import torch.nn.init as init +from torch.autograd import Variable +import numpy as np +from pdb import set_trace as st +from skimage import color +from IPython import embed +from . import pretrained_networks as pn + +import lpips as util + +def spatial_average(in_tens, keepdim=True): + return in_tens.mean([2,3],keepdim=keepdim) + +def upsample(in_tens, out_H=64): # assumes scale factor is same for H and W + in_H = in_tens.shape[2] + scale_factor = 1.*out_H/in_H + + return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens) + +# Learned perceptual metric +class PNetLin(nn.Module): + def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, use_dropout=True, spatial=False, version='0.1', lpips=True): + super(PNetLin, self).__init__() + + self.pnet_type = pnet_type + self.pnet_tune = pnet_tune + self.pnet_rand = pnet_rand + self.spatial = spatial + self.lpips = lpips + self.version = version + self.scaling_layer = ScalingLayer() + + if(self.pnet_type in ['vgg','vgg16']): + net_type = pn.vgg16 + self.chns = [64,128,256,512,512] + elif(self.pnet_type=='alex'): + net_type = pn.alexnet + self.chns = [64,192,384,256,256] + elif(self.pnet_type=='squeeze'): + net_type = pn.squeezenet + self.chns = [64,128,256,384,384,512,512] + self.L = len(self.chns) + + self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune) + + if(lpips): + self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout) + self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout) + self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout) + self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout) + self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout) + self.lins = [self.lin0,self.lin1,self.lin2,self.lin3,self.lin4] + if(self.pnet_type=='squeeze'): # 7 layers for squeezenet + self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout) + self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout) + self.lins+=[self.lin5,self.lin6] + + def forward(self, in0, in1, retPerLayer=False): + # v0.0 - original release had a bug, where input was not scaled + in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1)) if self.version=='0.1' else (in0, in1) + outs0, outs1 = self.net.forward(in0_input), self.net.forward(in1_input) + feats0, feats1, diffs = {}, {}, {} + + for kk in range(self.L): + feats0[kk], feats1[kk] = util.normalize_tensor(outs0[kk]), util.normalize_tensor(outs1[kk]) + diffs[kk] = (feats0[kk]-feats1[kk])**2 + + if(self.lpips): + if(self.spatial): + res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)] + else: + res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)] + else: + if(self.spatial): + res = [upsample(diffs[kk].sum(dim=1,keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)] + else: + res = [spatial_average(diffs[kk].sum(dim=1,keepdim=True), keepdim=True) for kk in range(self.L)] + + val = res[0] + for l in range(1,self.L): + val += res[l] + + if(retPerLayer): + return (val, res) + else: + return val + +class ScalingLayer(nn.Module): + def __init__(self): + super(ScalingLayer, self).__init__() + self.register_buffer('shift', torch.Tensor([-.030,-.088,-.188])[None,:,None,None]) + self.register_buffer('scale', torch.Tensor([.458,.448,.450])[None,:,None,None]) + + def forward(self, inp): + return (inp - self.shift) / self.scale + + +class NetLinLayer(nn.Module): + ''' A single linear layer which does a 1x1 conv ''' + def __init__(self, chn_in, chn_out=1, use_dropout=False): + super(NetLinLayer, self).__init__() + + layers = [nn.Dropout(),] if(use_dropout) else [] + layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),] + self.model = nn.Sequential(*layers) + + +class Dist2LogitLayer(nn.Module): + ''' takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) ''' + def __init__(self, chn_mid=32, use_sigmoid=True): + super(Dist2LogitLayer, self).__init__() + + layers = [nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True),] + layers += [nn.LeakyReLU(0.2,True),] + layers += [nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True),] + layers += [nn.LeakyReLU(0.2,True),] + layers += [nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True),] + if(use_sigmoid): + layers += [nn.Sigmoid(),] + self.model = nn.Sequential(*layers) + + def forward(self,d0,d1,eps=0.1): + return self.model.forward(torch.cat((d0,d1,d0-d1,d0/(d1+eps),d1/(d0+eps)),dim=1)) + +class BCERankingLoss(nn.Module): + def __init__(self, chn_mid=32): + super(BCERankingLoss, self).__init__() + self.net = Dist2LogitLayer(chn_mid=chn_mid) + # self.parameters = list(self.net.parameters()) + self.loss = torch.nn.BCELoss() + + def forward(self, d0, d1, judge): + per = (judge+1.)/2. + self.logit = self.net.forward(d0,d1) + return self.loss(self.logit, per) + +# L2, DSSIM metrics +class FakeNet(nn.Module): + def __init__(self, use_gpu=True, colorspace='Lab'): + super(FakeNet, self).__init__() + self.use_gpu = use_gpu + self.colorspace=colorspace + +class L2(FakeNet): + + def forward(self, in0, in1, retPerLayer=None): + assert(in0.size()[0]==1) # currently only supports batchSize 1 + + if(self.colorspace=='RGB'): + (N,C,X,Y) = in0.size() + value = torch.mean(torch.mean(torch.mean((in0-in1)**2,dim=1).view(N,1,X,Y),dim=2).view(N,1,1,Y),dim=3).view(N) + return value + elif(self.colorspace=='Lab'): + value = util.l2(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)), + util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float') + ret_var = Variable( torch.Tensor((value,) ) ) + if(self.use_gpu): + ret_var = ret_var.cuda() + return ret_var + +class DSSIM(FakeNet): + + def forward(self, in0, in1, retPerLayer=None): + assert(in0.size()[0]==1) # currently only supports batchSize 1 + + if(self.colorspace=='RGB'): + value = util.dssim(1.*util.tensor2im(in0.data), 1.*util.tensor2im(in1.data), range=255.).astype('float') + elif(self.colorspace=='Lab'): + value = util.dssim(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)), + util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float') + ret_var = Variable( torch.Tensor((value,) ) ) + if(self.use_gpu): + ret_var = ret_var.cuda() + return ret_var + +def print_network(net): + num_params = 0 + for param in net.parameters(): + num_params += param.numel() + print('Network',net) + print('Total number of parameters: %d' % num_params) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/pretrained_networks.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/pretrained_networks.py new file mode 100755 index 00000000..077a2441 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/lpips/pretrained_networks.py @@ -0,0 +1,181 @@ +from collections import namedtuple +import torch +from torchvision import models as tv +from IPython import embed + +class squeezenet(torch.nn.Module): + def __init__(self, requires_grad=False, pretrained=True): + super(squeezenet, self).__init__() + pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + self.slice5 = torch.nn.Sequential() + self.slice6 = torch.nn.Sequential() + self.slice7 = torch.nn.Sequential() + self.N_slices = 7 + for x in range(2): + self.slice1.add_module(str(x), pretrained_features[x]) + for x in range(2,5): + self.slice2.add_module(str(x), pretrained_features[x]) + for x in range(5, 8): + self.slice3.add_module(str(x), pretrained_features[x]) + for x in range(8, 10): + self.slice4.add_module(str(x), pretrained_features[x]) + for x in range(10, 11): + self.slice5.add_module(str(x), pretrained_features[x]) + for x in range(11, 12): + self.slice6.add_module(str(x), pretrained_features[x]) + for x in range(12, 13): + self.slice7.add_module(str(x), pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h = self.slice1(X) + h_relu1 = h + h = self.slice2(h) + h_relu2 = h + h = self.slice3(h) + h_relu3 = h + h = self.slice4(h) + h_relu4 = h + h = self.slice5(h) + h_relu5 = h + h = self.slice6(h) + h_relu6 = h + h = self.slice7(h) + h_relu7 = h + vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7']) + out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7) + + return out + + +class alexnet(torch.nn.Module): + def __init__(self, requires_grad=False, pretrained=True): + super(alexnet, self).__init__() + alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + self.slice5 = torch.nn.Sequential() + self.N_slices = 5 + for x in range(2): + self.slice1.add_module(str(x), alexnet_pretrained_features[x]) + for x in range(2, 5): + self.slice2.add_module(str(x), alexnet_pretrained_features[x]) + for x in range(5, 8): + self.slice3.add_module(str(x), alexnet_pretrained_features[x]) + for x in range(8, 10): + self.slice4.add_module(str(x), alexnet_pretrained_features[x]) + for x in range(10, 12): + self.slice5.add_module(str(x), alexnet_pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h = self.slice1(X) + h_relu1 = h + h = self.slice2(h) + h_relu2 = h + h = self.slice3(h) + h_relu3 = h + h = self.slice4(h) + h_relu4 = h + h = self.slice5(h) + h_relu5 = h + alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5']) + out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5) + + return out + +class vgg16(torch.nn.Module): + def __init__(self, requires_grad=False, pretrained=True): + super(vgg16, self).__init__() + vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + self.slice5 = torch.nn.Sequential() + self.N_slices = 5 + for x in range(4): + self.slice1.add_module(str(x), vgg_pretrained_features[x]) + for x in range(4, 9): + self.slice2.add_module(str(x), vgg_pretrained_features[x]) + for x in range(9, 16): + self.slice3.add_module(str(x), vgg_pretrained_features[x]) + for x in range(16, 23): + self.slice4.add_module(str(x), vgg_pretrained_features[x]) + for x in range(23, 30): + self.slice5.add_module(str(x), vgg_pretrained_features[x]) + if not requires_grad: + for param in self.parameters(): + param.requires_grad = False + + def forward(self, X): + h = self.slice1(X) + h_relu1_2 = h + h = self.slice2(h) + h_relu2_2 = h + h = self.slice3(h) + h_relu3_3 = h + h = self.slice4(h) + h_relu4_3 = h + h = self.slice5(h) + h_relu5_3 = h + vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3']) + out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3) + + return out + + + +class resnet(torch.nn.Module): + def __init__(self, requires_grad=False, pretrained=True, num=18): + super(resnet, self).__init__() + if(num==18): + self.net = tv.resnet18(pretrained=pretrained) + elif(num==34): + self.net = tv.resnet34(pretrained=pretrained) + elif(num==50): + self.net = tv.resnet50(pretrained=pretrained) + elif(num==101): + self.net = tv.resnet101(pretrained=pretrained) + elif(num==152): + self.net = tv.resnet152(pretrained=pretrained) + self.N_slices = 5 + + self.conv1 = self.net.conv1 + self.bn1 = self.net.bn1 + self.relu = self.net.relu + self.maxpool = self.net.maxpool + self.layer1 = self.net.layer1 + self.layer2 = self.net.layer2 + self.layer3 = self.net.layer3 + self.layer4 = self.net.layer4 + + def forward(self, X): + h = self.conv1(X) + h = self.bn1(h) + h = self.relu(h) + h_relu1 = h + h = self.maxpool(h) + h = self.layer1(h) + h_conv2 = h + h = self.layer2(h) + h_conv3 = h + h = self.layer3(h) + h_conv4 = h + h = self.layer4(h) + h_conv5 = h + + outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5']) + out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5) + + return out diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/models.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/models.py new file mode 100755 index 00000000..91c760dd --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/models.py @@ -0,0 +1,395 @@ +from random import randint +import torch +import torch.nn as nn +from torch.nn.utils import spectral_norm +import torch.nn.functional as F + +import random + +seq = nn.Sequential + + +def weights_init(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + try: + m.weight.data.normal_(0.0, 0.02) + except: + pass + elif classname.find('BatchNorm') != -1: + m.weight.data.normal_(1.0, 0.02) + m.bias.data.fill_(0) + + +def conv2d(*args, **kwargs): + return spectral_norm(nn.Conv2d(*args, **kwargs)) + + +def convTranspose2d(*args, **kwargs): + return spectral_norm(nn.ConvTranspose2d(*args, **kwargs)) + + +def batchNorm2d(*args, **kwargs): + return nn.BatchNorm2d(*args, **kwargs) + + +def linear(*args, **kwargs): + return spectral_norm(nn.Linear(*args, **kwargs)) + + +class PixelNorm(nn.Module): + def forward(self, input): + return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8) + + +class Reshape(nn.Module): + def __init__(self, shape): + super().__init__() + self.target_shape = shape + + def forward(self, feat): + batch = feat.shape[0] + return feat.view(batch, *self.target_shape) + + +class GLU(nn.Module): + def forward(self, x): + nc = x.size(1) + assert nc % 2 == 0, 'channels dont divide 2!' + nc = int(nc / 2) + return x[:, :nc] * torch.sigmoid(x[:, nc:]) + + +class NoiseInjection(nn.Module): + def __init__(self): + super().__init__() + + self.weight = nn.Parameter(torch.zeros(1), requires_grad=True) + + def forward(self, feat, noise=None): + if noise is None: + batch, _, height, width = feat.shape + noise = torch.randn(batch, 1, height, width).to(feat.device) + + return feat + self.weight * noise + + +class Swish(nn.Module): + def forward(self, feat): + return feat * torch.sigmoid(feat) + + +class SEBlock(nn.Module): + def __init__(self, ch_in, ch_out): + super().__init__() + + self.main = nn.Sequential(nn.AdaptiveAvgPool2d(4), + conv2d(ch_in, ch_out, 4, 1, 0, + bias=False), Swish(), + conv2d(ch_out, ch_out, 1, 1, 0, bias=False), nn.Sigmoid()) + + def forward(self, feat_small, feat_big): + return feat_big * self.main(feat_small) + + +class InitLayer(nn.Module): + def __init__(self, nz, channel): + super().__init__() + + self.init = nn.Sequential( + convTranspose2d(nz, channel * 2, 4, 1, 0, bias=False), + batchNorm2d(channel * 2), GLU()) + + def forward(self, noise): + noise = noise.view(noise.shape[0], -1, 1, 1) + return self.init(noise) + + +def UpBlock(in_planes, out_planes): + block = nn.Sequential( + nn.Upsample(scale_factor=2, mode='nearest'), + conv2d(in_planes, out_planes * 2, 3, 1, 1, bias=False), + batchNorm2d(out_planes * 2), GLU()) + return block + + +def UpBlockComp(in_planes, out_planes): + block = nn.Sequential( + nn.Upsample(scale_factor=2, mode='nearest'), + conv2d(in_planes, out_planes * 2, 3, 1, 1, bias=False), + NoiseInjection(), + batchNorm2d(out_planes * 2), GLU(), + conv2d(out_planes, out_planes * 2, 3, 1, 1, bias=False), + NoiseInjection(), + batchNorm2d(out_planes * 2), GLU() + ) + return block + + +class Generator(nn.Module): + def __init__(self, ngf=64, nz=100, nc=3, im_size=1024): + super(Generator, self).__init__() + + nfc_multi = {4: 16, 8: 8, 16: 4, 32: 2, 64: 2, + 128: 1, 256: 0.5, 512: 0.25, 1024: 0.125} + nfc = {} + for k, v in nfc_multi.items(): + nfc[k] = int(v * ngf) + + self.im_size = im_size + + self.init = InitLayer(nz, channel=nfc[4]) + + self.feat_8 = UpBlockComp(nfc[4], nfc[8]) + self.feat_16 = UpBlock(nfc[8], nfc[16]) + self.feat_32 = UpBlockComp(nfc[16], nfc[32]) + self.feat_64 = UpBlock(nfc[32], nfc[64]) + self.feat_128 = UpBlockComp(nfc[64], nfc[128]) + self.feat_256 = UpBlock(nfc[128], nfc[256]) + + self.se_64 = SEBlock(nfc[4], nfc[64]) + self.se_128 = SEBlock(nfc[8], nfc[128]) + self.se_256 = SEBlock(nfc[16], nfc[256]) + + self.to_128 = conv2d(nfc[128], nc, 1, 1, 0, bias=False) + self.to_big = conv2d(nfc[im_size], nc, 3, 1, 1, bias=False) + + if im_size > 256: + self.feat_512 = UpBlockComp(nfc[256], nfc[512]) + self.se_512 = SEBlock(nfc[32], nfc[512]) + if im_size > 512: + self.feat_1024 = UpBlock(nfc[512], nfc[1024]) + + def forward(self, input): + + feat_4 = self.init(input) + feat_8 = self.feat_8(feat_4) + feat_16 = self.feat_16(feat_8) + feat_32 = self.feat_32(feat_16) + + feat_64 = self.se_64(feat_4, self.feat_64(feat_32)) + + feat_128 = self.se_128(feat_8, self.feat_128(feat_64)) + + feat_256 = self.se_256(feat_16, self.feat_256(feat_128)) + + if self.im_size == 256: + return [self.to_big(feat_256), self.to_128(feat_128)] + + feat_512 = self.se_512(feat_32, self.feat_512(feat_256)) + if self.im_size == 512: + return [self.to_big(feat_512), self.to_128(feat_128)] + + feat_1024 = self.feat_1024(feat_512) + + im_128 = torch.tanh(self.to_128(feat_128)) + im_1024 = torch.tanh(self.to_big(feat_1024)) + + return [im_1024, im_128] + + +class DownBlock(nn.Module): + def __init__(self, in_planes, out_planes): + super(DownBlock, self).__init__() + + self.main = nn.Sequential( + conv2d(in_planes, out_planes, 4, 2, 1, bias=False), + batchNorm2d(out_planes), nn.LeakyReLU(0.2, inplace=True), + ) + + def forward(self, feat): + return self.main(feat) + + +class DownBlockComp(nn.Module): + def __init__(self, in_planes, out_planes): + super(DownBlockComp, self).__init__() + + self.main = nn.Sequential( + conv2d(in_planes, out_planes, 4, 2, 1, bias=False), + batchNorm2d(out_planes), nn.LeakyReLU(0.2, inplace=True), + conv2d(out_planes, out_planes, 3, 1, 1, bias=False), + batchNorm2d(out_planes), nn.LeakyReLU(0.2) + ) + + self.direct = nn.Sequential( + nn.AvgPool2d(2, 2), + conv2d(in_planes, out_planes, 1, 1, 0, bias=False), + batchNorm2d(out_planes), nn.LeakyReLU(0.2)) + + def forward(self, feat): + return (self.main(feat) + self.direct(feat)) / 2 + + +class Discriminator(nn.Module): + def __init__(self, ndf=64, nc=3, im_size=512): + super(Discriminator, self).__init__() + self.ndf = ndf + self.im_size = im_size + + nfc_multi = {4: 16, 8: 16, 16: 8, 32: 4, 64: 2, + 128: 1, 256: 0.5, 512: 0.25, 1024: 0.125} + nfc = {} + for k, v in nfc_multi.items(): + nfc[k] = int(v * ndf) + + if im_size == 1024: + self.down_from_big = nn.Sequential( + conv2d(nc, nfc[1024], 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True), + conv2d(nfc[1024], nfc[512], 4, 2, 1, bias=False), + batchNorm2d(nfc[512]), + nn.LeakyReLU(0.2, inplace=True)) + elif im_size == 512: + self.down_from_big = nn.Sequential( + conv2d(nc, nfc[512], 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True)) + elif im_size == 256: + self.down_from_big = nn.Sequential( + conv2d(nc, nfc[512], 3, 1, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True)) + + self.down_4 = DownBlockComp(nfc[512], nfc[256]) + self.down_8 = DownBlockComp(nfc[256], nfc[128]) + self.down_16 = DownBlockComp(nfc[128], nfc[64]) + self.down_32 = DownBlockComp(nfc[64], nfc[32]) + self.down_64 = DownBlockComp(nfc[32], nfc[16]) + + self.rf_big = nn.Sequential( + conv2d(nfc[16], nfc[8], 1, 1, 0, bias=False), + batchNorm2d(nfc[8]), nn.LeakyReLU(0.2, inplace=True), + conv2d(nfc[8], 1, 4, 1, 0, bias=False)) + + self.se_2_16 = SEBlock(nfc[512], nfc[64]) + self.se_4_32 = SEBlock(nfc[256], nfc[32]) + self.se_8_64 = SEBlock(nfc[128], nfc[16]) + + self.down_from_small = nn.Sequential( + conv2d(nc, nfc[256], 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True), + DownBlock(nfc[256], nfc[128]), + DownBlock(nfc[128], nfc[64]), + DownBlock(nfc[64], nfc[32]), ) + + self.rf_small = conv2d(nfc[32], 1, 4, 1, 0, bias=False) + + self.decoder_big = SimpleDecoder(nfc[16], nc) + self.decoder_part = SimpleDecoder(nfc[32], nc) + self.decoder_small = SimpleDecoder(nfc[32], nc) + + def forward(self, imgs, label, part=None): + if type(imgs) is not list: + imgs = [F.interpolate(imgs, size=self.im_size), + F.interpolate(imgs, size=128)] + + feat_2 = self.down_from_big(imgs[0]) + feat_4 = self.down_4(feat_2) + feat_8 = self.down_8(feat_4) + + feat_16 = self.down_16(feat_8) + feat_16 = self.se_2_16(feat_2, feat_16) + + feat_32 = self.down_32(feat_16) + feat_32 = self.se_4_32(feat_4, feat_32) + + feat_last = self.down_64(feat_32) + feat_last = self.se_8_64(feat_8, feat_last) + rf_0 = self.rf_big(feat_last).view(-1) + + feat_small = self.down_from_small(imgs[1]) + rf_1 = self.rf_small(feat_small).view(-1) + + if label == 'real': + rec_img_big = self.decoder_big(feat_last) + rec_img_small = self.decoder_small(feat_small) + + assert part is not None + rec_img_part = None + if part == 0: + rec_img_part = self.decoder_part(feat_32[:, :, :8, :8]) + if part == 1: + rec_img_part = self.decoder_part(feat_32[:, :, :8, 8:]) + if part == 2: + rec_img_part = self.decoder_part(feat_32[:, :, 8:, :8]) + if part == 3: + rec_img_part = self.decoder_part(feat_32[:, :, 8:, 8:]) + + return torch.cat([rf_0, rf_1]), [rec_img_big, rec_img_small, rec_img_part] + + return torch.cat([rf_0, rf_1]) + + +class SimpleDecoder(nn.Module): + """docstring for CAN_SimpleDecoder""" + + def __init__(self, nfc_in=64, nc=3): + super(SimpleDecoder, self).__init__() + + nfc_multi = {4: 16, 8: 8, 16: 4, 32: 2, 64: 2, + 128: 1, 256: 0.5, 512: 0.25, 1024: 0.125} + nfc = {} + for k, v in nfc_multi.items(): + nfc[k] = int(v * 32) + + def upBlock(in_planes, out_planes): + block = nn.Sequential( + nn.Upsample(scale_factor=2, mode='nearest'), + conv2d(in_planes, out_planes * 2, 3, 1, 1, bias=False), + batchNorm2d(out_planes * 2), GLU()) + return block + + self.main = nn.Sequential(nn.AdaptiveAvgPool2d(8), + upBlock(nfc_in, nfc[16]), + upBlock(nfc[16], nfc[32]), + upBlock(nfc[32], nfc[64]), + upBlock(nfc[64], nfc[128]), + conv2d(nfc[128], nc, 3, 1, 1, bias=False), + nn.Tanh()) + + def forward(self, input): + # input shape: c x 4 x 4 + return self.main(input) + + +def random_crop(image, size): + h, w = image.shape[2:] + ch = randint(0, h - size - 1) + cw = randint(0, w - size - 1) + return image[:, :, ch:ch + size, cw:cw + size] + + +class TextureDiscriminator(nn.Module): + def __init__(self, ndf=64, nc=3, im_size=512): + super(TextureDiscriminator, self).__init__() + self.ndf = ndf + self.im_size = im_size + + nfc_multi = {4: 16, 8: 8, 16: 8, 32: 4, 64: 2, + 128: 1, 256: 0.5, 512: 0.25, 1024: 0.125} + nfc = {} + for k, v in nfc_multi.items(): + nfc[k] = int(v * ndf) + + self.down_from_small = nn.Sequential( + conv2d(nc, nfc[256], 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True), + DownBlock(nfc[256], nfc[128]), + DownBlock(nfc[128], nfc[64]), + DownBlock(nfc[64], nfc[32]), ) + self.rf_small = nn.Sequential( + conv2d(nfc[16], 1, 4, 1, 0, bias=False)) + + self.decoder_small = SimpleDecoder(nfc[32], nc) + + def forward(self, img, label): + img = random_crop(img, size=128) + + feat_small = self.down_from_small(img) + rf = self.rf_small(feat_small).view(-1) + + if label == 'real': + rec_img_small = self.decoder_small(feat_small) + + return rf, rec_img_small, img + + return rf diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/operation.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/operation.py new file mode 100755 index 00000000..3feebb10 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/operation.py @@ -0,0 +1,129 @@ +import lmdb +from io import BytesIO +import os +import numpy as np +import torch +import torch.utils.data as data +from torch.utils.data import Dataset +from PIL import Image +from copy import deepcopy +import shutil +import json + + +def InfiniteSampler(n): + """Data sampler""" + i = n - 1 + order = np.random.permutation(n) + while True: + yield order[i] + i += 1 + if i >= n: + np.random.seed() + order = np.random.permutation(n) + i = 0 + + +class InfiniteSamplerWrapper(data.sampler.Sampler): + """Data sampler wrapper""" + + def __init__(self, data_source): + self.num_samples = len(data_source) + + def __iter__(self): + return iter(InfiniteSampler(self.num_samples)) + + def __len__(self): + return 2 ** 31 + + +def copy_G_params(model): + flatten = deepcopy(list(p.data for p in model.parameters())) + return flatten + + +def load_params(model, new_param): + for p, new_p in zip(model.parameters(), new_param): + p.data.copy_(new_p) + + +def get_dir(args): + task_name = 'train_results/' + args['name'] + saved_model_folder = os.path.join(task_name, 'models') + saved_image_folder = os.path.join(task_name, 'images') + + os.makedirs(saved_model_folder, exist_ok=True) + os.makedirs(saved_image_folder, exist_ok=True) + + return saved_model_folder, saved_image_folder + + +class ImageFolder(Dataset): + """docstring for ArtDataset""" + + def __init__(self, root, transform=None): + super(ImageFolder, self).__init__() + self.root = root + + self.frame = self._parse_frame() + self.transform = transform + + def _parse_frame(self): + frame = [] + img_names = os.listdir(self.root) + img_names.sort() + for i in range(len(img_names)): + image_path = os.path.join(self.root, img_names[i]) + if image_path[-4:] == '.jpg' or image_path[-4:] == '.png' or image_path[-5:] == '.jpeg': + frame.append(image_path) + return frame + + def __len__(self): + return len(self.frame) + + def __getitem__(self, idx): + file = self.frame[idx] + img = Image.open(file).convert('RGB') + + if self.transform: + img = self.transform(img) + + return img + + +class MultiResolutionDataset(Dataset): + def __init__(self, path, transform, resolution=256): + self.env = lmdb.open( + path, + max_readers=32, + readonly=True, + lock=False, + readahead=False, + meminit=False, + ) + + if not self.env: + raise IOError('Cannot open lmdb dataset', path) + + with self.env.begin(write=False) as txn: + self.length = int( + txn.get('length'.encode('utf-8')).decode('utf-8')) + + self.resolution = resolution + self.transform = transform + + def __len__(self): + return self.length + + def __getitem__(self, index): + with self.env.begin(write=False) as txn: + key = f'{self.resolution}-{str(index).zfill(5)}'.encode('utf-8') + img_bytes = txn.get(key) + #key_asp = f'aspect_ratio-{str(index).zfill(5)}'.encode('utf-8') + #aspect_ratio = float(txn.get(key_asp).decode()) + + buffer = BytesIO(img_bytes) + img = Image.open(buffer) + img = self.transform(img) + + return img diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/train.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/train.py new file mode 100755 index 00000000..1d84dd95 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/GAN/train.py @@ -0,0 +1,188 @@ +from util import load_yaml +import sys +import lpips +import torch +from torch import nn +import torch.optim as optim +import torch.nn.functional as F +from torch.utils.data.dataloader import DataLoader +from torchvision import transforms +from torchvision import utils as vutils + +import argparse +import random +from tqdm import tqdm + +import csv + +from models import weights_init, Discriminator, Generator +from operation import copy_G_params, load_params, get_dir +from operation import ImageFolder, InfiniteSamplerWrapper +from diffaug import DiffAugment +policy = 'color,translation' +percept = lpips.PerceptualLoss(model='net-lin', net='vgg', use_gpu=True) + + +def crop_image_by_part(image, part): + hw = image.shape[2] // 2 + if part == 0: + return image[:, :, :hw, :hw] + if part == 1: + return image[:, :, :hw, hw:] + if part == 2: + return image[:, :, hw:, :hw] + if part == 3: + return image[:, :, hw:, hw:] + + +def train_d(net, data, label="real"): + """Train function of discriminator""" + if label == "real": + part = random.randint(0, 3) + pred, [rec_all, rec_small, rec_part] = net(data, label, part=part) + err = F.relu(torch.rand_like(pred) * 0.2 + 0.8 - pred).mean() + \ + percept(rec_all, F.interpolate(data, rec_all.shape[2])).sum() + \ + percept(rec_small, F.interpolate(data, rec_small.shape[2])).sum() + \ + percept(rec_part, F.interpolate( + crop_image_by_part(data, part), rec_part.shape[2])).sum() + err.backward() + return pred.mean().item(), rec_all, rec_small, rec_part + else: + pred = net(data, label) + err = F.relu(torch.rand_like(pred) * 0.2 + 0.8 + pred).mean() + err.backward() + return pred.mean().item() + + +def train(args): + data_root = args['path'] + total_iterations = args['iter'] + # checkpoint = args.ckpt + batch_size = args['batch_size'] + im_size = args['im_size'] + ndf = 64 + ngf = 64 + nz = 256 + nlr = 0.0002 + nbeta1 = 0.5 + use_cuda = True + multi_gpu = False + dataloader_workers = 8 + current_iteration = 0 + save_interval = 100 + saved_model_folder, saved_image_folder = get_dir(args) + + device = torch.device("cpu") + if use_cuda: + device = torch.device("cuda:0") + + transform_list = [ + transforms.Resize((int(im_size), int(im_size))), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + ] + trans = transforms.Compose(transform_list) + + if 'lmdb' in data_root: + from operation import MultiResolutionDataset + dataset = MultiResolutionDataset(data_root, trans, 1024) + else: + dataset = ImageFolder(root=data_root, transform=trans) + + dataloader = iter(DataLoader(dataset, batch_size=batch_size, shuffle=False, + sampler=InfiniteSamplerWrapper(dataset), num_workers=dataloader_workers, + pin_memory=True)) + netG = Generator(ngf=ngf, nz=nz, im_size=im_size) + netG.apply(weights_init) + + netD = Discriminator(ndf=ndf, im_size=im_size) + netD.apply(weights_init) + + netG.to(device) + netD.to(device) + + avg_param_G = copy_G_params(netG) + + fixed_noise = torch.FloatTensor(8, nz).normal_(0, 1).to(device) + + optimizerG = optim.Adam(netG.parameters(), lr=nlr, betas=(nbeta1, 0.999)) + optimizerD = optim.Adam(netD.parameters(), lr=nlr, betas=(nbeta1, 0.999)) + + if multi_gpu: + netG = nn.DataParallel(netG.to(device)) + netD = nn.DataParallel(netD.to(device)) + + with open('train_cityscape.csv', 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['epoch', 'd_loss', 'g_loss']) + + for iteration in tqdm(range(current_iteration, total_iterations + 1)): + real_image = next(dataloader) + real_image = real_image.to(device) + current_batch_size = real_image.size(0) + noise = torch.Tensor(current_batch_size, nz).normal_(0, 1).to(device) + + fake_images = netG(noise) + + real_image = DiffAugment(real_image, policy=policy) + fake_images = [DiffAugment(fake, policy=policy) + for fake in fake_images] + + # 2. train Discriminator + netD.zero_grad() + + err_dr, rec_img_all, rec_img_small, rec_img_part = train_d( + netD, real_image, label="real") + train_d(netD, [fi.detach() for fi in fake_images], label="fake") + optimizerD.step() + + # 3. train Generator + netG.zero_grad() + pred_g = netD(fake_images, "fake") + err_g = -pred_g.mean() + + err_g.backward() + optimizerG.step() + + for p, avg_p in zip(netG.parameters(), avg_param_G): + avg_p.mul_(0.999).add_(0.001 * p.data) + + if iteration % (save_interval * 10) == 0: + backup_para = copy_G_params(netG) + load_params(netG, avg_param_G) + with torch.no_grad(): + vutils.save_image(netG(fixed_noise)[0].add(1).mul(0.5), saved_image_folder + '/%d.jpg' % iteration, + nrow=4) + vutils.save_image(torch.cat([ + F.interpolate(real_image, 128), + rec_img_all, rec_img_small, + rec_img_part]).add(1).mul(0.5), saved_image_folder + '/rec_%d.jpg' % iteration) + load_params(netG, backup_para) + + if iteration % (save_interval * 50) == 0 or iteration == total_iterations: + backup_para = copy_G_params(netG) + load_params(netG, avg_param_G) + torch.save({'g': netG.state_dict(), 'd': netD.state_dict()}, + saved_model_folder + '/%d.pth' % iteration) + load_params(netG, backup_para) + torch.save({'g': netG.state_dict(), + 'd': netD.state_dict(), + 'g_ema': avg_param_G, + 'opt_g': optimizerG.state_dict(), + 'opt_d': optimizerD.state_dict()}, saved_model_folder + '/all_%d.pth' % iteration) + + with open('train_cityscape.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([iteration, err_dr, -err_g.item()]) + + +if __name__ == "__main__": + configs = load_yaml('../config.yaml') + args = dict() + args['path'] = configs['dataset_path'] + args['iter'] = configs['GAN'][0]['iter'] + args['batch_size'] = configs['GAN'][1]['batch_size'] + args['im_size'] = configs['GAN'][2]['im_size'] + args['name'] = configs['GAN'][3]['name'] + train(args) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/README.md b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/README.md new file mode 100644 index 00000000..f977591c --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/README.md @@ -0,0 +1,73 @@ +# Integrating GAN and Self-taught Learning into Ianvs Lifelong Learning + +## Overview + +We proposal an approach of combining [GAN](https://en.wikipedia.org/wiki/Generative_adversarial_network) and [Self-taught Learning](https://ai.stanford.edu/~hllee/icml07-selftaughtlearning.pdf) to solve small sample problem in Ianvs lifelong learning, as shown in the figure below, corresponding to the [pull request](https://github.com/kubeedge/sedna/pull/378). + +For quick start, just jump directly to [Developer Notes](##Developer%20Notes). + +![](./imgs/ianvs-lifelonglearning.png) + +We describe the arichtecture and the process. More details can be seen in [Architecture](##Architecture). + +![](./imgs/ianvs-lifelonglearning2.png) + +1. Train GAN with orginal small sample data +2. GAN generates more data according to the probability distribution +3. Train Autoencoder (which is consist of encoder and decoder) with the data generated by GAN +4. Use encoder to get data representation of original small sample data +5. Use data representation and orignal labels to train model that the unseen task model needs +6. Ouput a well trained model + +## Architecture + +- Overview + + ![](./imgs/overview.png) + +- GAN (we refer to [Towards Faster and Stabilized GAN Training for High-fidelity Few-shot Image Synthesis](https://openreview.net/forum?id=1Fqg133qRaI)) + + Discriminator + + ![](./imgs/discriminator.png) + + Generator + + ![](./imgs/generator.png) + +- Convolutional AutoEncoder of Self-taught Learning + + ![](./imgs/cae.png) + +## Developer Notes + +```bash +GANwithSelf-taughtLearning # root path of the project + - config.yaml # the config of input path (dataset and model to be trained) as well as hyperparameter of GAN and Self-taught Learning + - GAN # GAN module + - models.py # define discriminator and generator + - train.py # train GAN here + - ./train_results # training outputs, like GAN model, training loss and evaluation of GAN + - self-taught-learning # self-taught learning module + - models.py # Define AutoEncoder. Here we use Convolutional AutoEncoder (CAE). + - train.py # train GAN here + - ./train_results # training outputs, like encoder model and training loss + - model-to-be-trained # model to be trained module + - train.py # train model + - model.py # define model + - ./train_results # training results of the model to be trained + - util.py # util module +``` +For the trained weights and models, please refer to [Google drive](https://drive.google.com/drive/folders/1IOQCQ3sntxrbt7RtJIsSlBo0PFrR7Ets?usp=drive_link). + +There are two ways to use it. You can use the module on its own or as an integration into Ianvs. + +For using the module on its own, config the `config.yaml` to let `GANwithSelf-taughtLearning` know where the **dataset** is, what **model** you want to train and what the **hyperparameters** are. + +Then a common use process can be shown below: + +1. run `python ./GAN/train.py` +2. run `python ./self-taught-learning/train.py` +3. run `./train.py` + +For using the module as an integration into Ianvs, please first compile this version of [Sedna](https://github.com/nailtu30/sedna/tree/ospp-final), where we integrate `GANwithSelf-taughtLearning` into folder `lib/sedna/algorithms/unseen_task_processing`. Then just start Ivans normally, where our algorithm has been integrated into the main process, and you can simulate unseen task via setting `model to be trained` in the `config.yaml`. diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/__init__.py new file mode 100755 index 00000000..84bbda38 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/__init__.py @@ -0,0 +1,2 @@ +from . import deeplabv3 +from . import selftaughtlearning diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/config.yaml b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/config.yaml new file mode 100755 index 00000000..3e5aae45 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/config.yaml @@ -0,0 +1,26 @@ +dataset_path: '../data/img' + +# GAN config +GAN: + - iter: 50000 + - batch_size: 8 + - im_size: 1024 + - name: 'test2' + +# Self-taught Learning config +STL: + - iter: 100 + - lr: 1.0e-3 + - batch_size: 30 + - name: 'encoder_models' + +# model to be trained config +# here we take deeplabv3 as example +deeplabv3: + - iter: 1000 + - batch_size: 3 + - lr: 1.0e-4 + - name: "1" + - cityscapes_data_path: "" # developer needs to specify + - cityscapes_meta_path: "" # developer needs to specify + - class_weights: "" # developer needs to specify \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/__init__.py new file mode 100755 index 00000000..92c55495 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/__init__.py @@ -0,0 +1,3 @@ +from . import train +from . import datasets +from . import model diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/datasets.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/datasets.py new file mode 100755 index 00000000..bd4e1eb6 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/datasets.py @@ -0,0 +1,128 @@ +import torch +import torch.utils.data + +import numpy as np +import cv2 +import os + +train_dirs = ["jena/", "zurich/", "weimar/", "ulm/", "tubingen/", "stuttgart/", + "strasbourg/", "monchengladbach/", "krefeld/", "hanover/", + "hamburg/", "erfurt/", "dusseldorf/", "darmstadt/", "cologne/", + "bremen/", "bochum/", "aachen/"] +val_dirs = ["frankfurt/", "munster/", "lindau/"] +test_dirs = ["berlin", "bielefeld", "bonn", "leverkusen", "mainz", "munich"] + + +class DatasetTrain(torch.utils.data.Dataset): + def __init__(self, cityscapes_data_path, cityscapes_meta_path): + self.img_dir = cityscapes_data_path + "/leftImg8bit/train/" + self.label_dir = cityscapes_meta_path + "/label_imgs/" + + self.img_h = 1024 + self.img_w = 2048 + + self.new_img_h = 512 + self.new_img_w = 1024 + + self.examples = [] + for train_dir in train_dirs: + train_img_dir_path = self.img_dir + train_dir + + file_names = os.listdir(train_img_dir_path) + for file_name in file_names: + img_id = file_name.split("_leftImg8bit.png")[0] + + img_path = train_img_dir_path + file_name + + label_img_path = self.label_dir + img_id + ".png" + + example = {} + example["img_path"] = img_path + example["label_img_path"] = label_img_path + example["img_id"] = img_id + self.examples.append(example) + self.examples = self.examples[0:99] + self.num_examples = len(self.examples) + + def __getitem__(self, index): + example = self.examples[index] + + img_path = example["img_path"] + img = cv2.imread(img_path, -1) + label_img_path = example["label_img_path"] + label_img = cv2.imread(label_img_path, -1) + label_img = cv2.resize(label_img, (self.new_img_w, self.new_img_h), + interpolation=cv2.INTER_NEAREST) + img = img/255.0 + img = img - np.array([0.485, 0.456, 0.406]) + img = img/np.array([0.229, 0.224, 0.225]) + img = np.transpose(img, (2, 0, 1)) + img = img.astype(np.float32) + img = torch.from_numpy(img) + label_img = torch.from_numpy(label_img) + + return (img, label_img) + + def __len__(self): + return self.num_examples + + +class DatasetVal(torch.utils.data.Dataset): + def __init__(self, cityscapes_data_path, cityscapes_meta_path): + self.img_dir = cityscapes_data_path + "/leftImg8bit/val/" + self.label_dir = cityscapes_meta_path + "/label_imgs/" + + self.img_h = 1024 + self.img_w = 2048 + + self.new_img_h = 1024 + self.new_img_w = 2048 + + self.examples = [] + for val_dir in val_dirs: + val_img_dir_path = self.img_dir + val_dir + + file_names = os.listdir(val_img_dir_path) + for file_name in file_names: + img_id = file_name.split("_leftImg8bit.png")[0] + + img_path = val_img_dir_path + file_name + + label_img_path = self.label_dir + img_id + ".png" + label_img = cv2.imread(label_img_path, -1) + + example = {} + example["img_path"] = img_path + example["label_img_path"] = label_img_path + example["img_id"] = img_id + self.examples.append(example) + self.examples = self.examples[0:99] + self.num_examples = len(self.examples) + + def __getitem__(self, index): + example = self.examples[index] + + img_id = example["img_id"] + + img_path = example["img_path"] + img = cv2.imread(img_path, -1) + img = cv2.resize(img, (self.new_img_w, self.new_img_h), + interpolation=cv2.INTER_NEAREST) + + label_img_path = example["label_img_path"] + label_img = cv2.imread(label_img_path, -1) + label_img = cv2.resize(label_img, (self.new_img_w, self.new_img_h), + interpolation=cv2.INTER_NEAREST) + img = img/255.0 + img = img - np.array([0.485, 0.456, 0.406]) + img = img/np.array([0.229, 0.224, 0.225]) + img = np.transpose(img, (2, 0, 1)) + img = img.astype(np.float32) + + img = torch.from_numpy(img) + label_img = torch.from_numpy(label_img) + + return (img, label_img, img_id) + + def __len__(self): + return self.num_examples diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/__init__.py new file mode 100755 index 00000000..be8d2e0d --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/__init__.py @@ -0,0 +1,3 @@ +from . import aspp +from . import deeplabv3 +from . import resnet \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/aspp.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/aspp.py new file mode 100755 index 00000000..ccb49b88 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/aspp.py @@ -0,0 +1,99 @@ +# camera-ready + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class ASPP(nn.Module): + def __init__(self, num_classes): + super(ASPP, self).__init__() + + self.conv_1x1_1 = nn.Conv2d(512, 256, kernel_size=1) + self.bn_conv_1x1_1 = nn.BatchNorm2d(256) + + self.conv_3x3_1 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=6, dilation=6) + self.bn_conv_3x3_1 = nn.BatchNorm2d(256) + + self.conv_3x3_2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=12, dilation=12) + self.bn_conv_3x3_2 = nn.BatchNorm2d(256) + + self.conv_3x3_3 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=18, dilation=18) + self.bn_conv_3x3_3 = nn.BatchNorm2d(256) + + self.avg_pool = nn.AdaptiveAvgPool2d(1) + + self.conv_1x1_2 = nn.Conv2d(512, 256, kernel_size=1) + self.bn_conv_1x1_2 = nn.BatchNorm2d(256) + + self.conv_1x1_3 = nn.Conv2d(1280, 256, kernel_size=1) # (1280 = 5*256) + self.bn_conv_1x1_3 = nn.BatchNorm2d(256) + + self.conv_1x1_4 = nn.Conv2d(256, num_classes, kernel_size=1) + + def forward(self, feature_map): + # (feature_map has shape (batch_size, 512, h/16, w/16)) (assuming self.resnet is ResNet18_OS16 or ResNet34_OS16. If self.resnet instead is ResNet18_OS8 or ResNet34_OS8, it will be (batch_size, 512, h/8, w/8)) + + feature_map_h = feature_map.size()[2] # (== h/16) + feature_map_w = feature_map.size()[3] # (== w/16) + + out_1x1 = F.relu(self.bn_conv_1x1_1(self.conv_1x1_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + out_3x3_1 = F.relu(self.bn_conv_3x3_1(self.conv_3x3_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + out_3x3_2 = F.relu(self.bn_conv_3x3_2(self.conv_3x3_2(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + out_3x3_3 = F.relu(self.bn_conv_3x3_3(self.conv_3x3_3(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + + out_img = self.avg_pool(feature_map) # (shape: (batch_size, 512, 1, 1)) + out_img = F.relu(self.bn_conv_1x1_2(self.conv_1x1_2(out_img))) # (shape: (batch_size, 256, 1, 1)) + out_img = F.upsample(out_img, size=(feature_map_h, feature_map_w), mode="bilinear") # (shape: (batch_size, 256, h/16, w/16)) + + out = torch.cat([out_1x1, out_3x3_1, out_3x3_2, out_3x3_3, out_img], 1) # (shape: (batch_size, 1280, h/16, w/16)) + out = F.relu(self.bn_conv_1x1_3(self.conv_1x1_3(out))) # (shape: (batch_size, 256, h/16, w/16)) + out = self.conv_1x1_4(out) # (shape: (batch_size, num_classes, h/16, w/16)) + + return out + +class ASPP_Bottleneck(nn.Module): + def __init__(self, num_classes): + super(ASPP_Bottleneck, self).__init__() + + self.conv_1x1_1 = nn.Conv2d(4*512, 256, kernel_size=1) + self.bn_conv_1x1_1 = nn.BatchNorm2d(256) + + self.conv_3x3_1 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=6, dilation=6) + self.bn_conv_3x3_1 = nn.BatchNorm2d(256) + + self.conv_3x3_2 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=12, dilation=12) + self.bn_conv_3x3_2 = nn.BatchNorm2d(256) + + self.conv_3x3_3 = nn.Conv2d(4*512, 256, kernel_size=3, stride=1, padding=18, dilation=18) + self.bn_conv_3x3_3 = nn.BatchNorm2d(256) + + self.avg_pool = nn.AdaptiveAvgPool2d(1) + + self.conv_1x1_2 = nn.Conv2d(4*512, 256, kernel_size=1) + self.bn_conv_1x1_2 = nn.BatchNorm2d(256) + + self.conv_1x1_3 = nn.Conv2d(1280, 256, kernel_size=1) # (1280 = 5*256) + self.bn_conv_1x1_3 = nn.BatchNorm2d(256) + + self.conv_1x1_4 = nn.Conv2d(256, num_classes, kernel_size=1) + + def forward(self, feature_map): + # (feature_map has shape (batch_size, 4*512, h/16, w/16)) + + feature_map_h = feature_map.size()[2] # (== h/16) + feature_map_w = feature_map.size()[3] # (== w/16) + + out_1x1 = F.relu(self.bn_conv_1x1_1(self.conv_1x1_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + out_3x3_1 = F.relu(self.bn_conv_3x3_1(self.conv_3x3_1(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + out_3x3_2 = F.relu(self.bn_conv_3x3_2(self.conv_3x3_2(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + out_3x3_3 = F.relu(self.bn_conv_3x3_3(self.conv_3x3_3(feature_map))) # (shape: (batch_size, 256, h/16, w/16)) + + out_img = self.avg_pool(feature_map) # (shape: (batch_size, 512, 1, 1)) + out_img = F.relu(self.bn_conv_1x1_2(self.conv_1x1_2(out_img))) # (shape: (batch_size, 256, 1, 1)) + out_img = F.upsample(out_img, size=(feature_map_h, feature_map_w), mode="bilinear") # (shape: (batch_size, 256, h/16, w/16)) + + out = torch.cat([out_1x1, out_3x3_1, out_3x3_2, out_3x3_3, out_img], 1) # (shape: (batch_size, 1280, h/16, w/16)) + out = F.relu(self.bn_conv_1x1_3(self.conv_1x1_3(out))) # (shape: (batch_size, 256, h/16, w/16)) + out = self.conv_1x1_4(out) # (shape: (batch_size, num_classes, h/16, w/16)) + + return out diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/deeplabv3.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/deeplabv3.py new file mode 100755 index 00000000..0be3cd35 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/deeplabv3.py @@ -0,0 +1,45 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import os +import sys +from sedna.algorithms.unseen_task_processing.GANwithSelfTaughtLearning.deeplabv3.model.resnet import ResNet18_OS16, ResNet34_OS16, ResNet50_OS16, ResNet101_OS16, ResNet152_OS16, ResNet18_OS8, ResNet34_OS8 +from sedna.algorithms.unseen_task_processing.GANwithSelfTaughtLearning.deeplabv3.model.aspp import ASPP, ASPP_Bottleneck + +class DeepLabV3(nn.Module): + def __init__(self, model_id, project_dir): + super(DeepLabV3, self).__init__() + + self.num_classes = 20 + + self.model_id = model_id + self.project_dir = project_dir + self.create_model_dirs() + + self.resnet = ResNet18_OS8() # NOTE! specify the type of ResNet here + self.aspp = ASPP(num_classes=self.num_classes) # NOTE! if you use ResNet50-152, set self.aspp = ASPP_Bottleneck(num_classes=self.num_classes) instead + + def forward(self, x): + # (x has shape (batch_size, 3, h, w)) + + h = x.size()[2] + w = x.size()[3] + + feature_map = self.resnet(x) # (shape: (batch_size, 512, h/16, w/16)) (assuming self.resnet is ResNet18_OS16 or ResNet34_OS16. If self.resnet is ResNet18_OS8 or ResNet34_OS8, it will be (batch_size, 512, h/8, w/8). If self.resnet is ResNet50-152, it will be (batch_size, 4*512, h/16, w/16)) + # print(feature_map.shape) + output = self.aspp(feature_map) # (shape: (batch_size, num_classes, h/16, w/16)) + + output = F.upsample(output, size=(h, w), mode="bilinear") # (shape: (batch_size, num_classes, h, w)) + + return output + + def create_model_dirs(self): + self.logs_dir = self.project_dir + "/training_logs" + self.model_dir = self.logs_dir + "/model_%s" % self.model_id + self.checkpoints_dir = self.model_dir + "/checkpoints" + if not os.path.exists(self.logs_dir): + os.makedirs(self.logs_dir) + if not os.path.exists(self.model_dir): + os.makedirs(self.model_dir) + os.makedirs(self.checkpoints_dir) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/resnet.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/resnet.py new file mode 100755 index 00000000..80d2dacd --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/model/resnet.py @@ -0,0 +1,233 @@ +# camera-ready + +# NOTE! OS: output stride, the ratio of input image resolution to final output resolution (OS16: output size is (img_h/16, img_w/16)) (OS8: output size is (img_h/8, img_w/8)) + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.models as models + +def make_layer(block, in_channels, channels, num_blocks, stride=1, dilation=1): + strides = [stride] + [1]*(num_blocks - 1) # (stride == 2, num_blocks == 4 --> strides == [2, 1, 1, 1]) + + blocks = [] + for stride in strides: + blocks.append(block(in_channels=in_channels, channels=channels, stride=stride, dilation=dilation)) + in_channels = block.expansion*channels + + layer = nn.Sequential(*blocks) # (*blocks: call with unpacked list entires as arguments) + + return layer + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_channels, channels, stride=1, dilation=1): + super(BasicBlock, self).__init__() + + out_channels = self.expansion*channels + + self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False) + self.bn1 = nn.BatchNorm2d(channels) + + self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False) + self.bn2 = nn.BatchNorm2d(channels) + + if (stride != 1) or (in_channels != out_channels): + conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False) + bn = nn.BatchNorm2d(out_channels) + self.downsample = nn.Sequential(conv, bn) + else: + self.downsample = nn.Sequential() + + def forward(self, x): + # (x has shape: (batch_size, in_channels, h, w)) + + out = F.relu(self.bn1(self.conv1(x))) # (shape: (batch_size, channels, h, w) if stride == 1, (batch_size, channels, h/2, w/2) if stride == 2) + out = self.bn2(self.conv2(out)) # (shape: (batch_size, channels, h, w) if stride == 1, (batch_size, channels, h/2, w/2) if stride == 2) + + out = out + self.downsample(x) # (shape: (batch_size, channels, h, w) if stride == 1, (batch_size, channels, h/2, w/2) if stride == 2) + + out = F.relu(out) # (shape: (batch_size, channels, h, w) if stride == 1, (batch_size, channels, h/2, w/2) if stride == 2) + + return out + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_channels, channels, stride=1, dilation=1): + super(Bottleneck, self).__init__() + + out_channels = self.expansion*channels + + self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(channels) + + self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False) + self.bn2 = nn.BatchNorm2d(channels) + + self.conv3 = nn.Conv2d(channels, out_channels, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(out_channels) + + if (stride != 1) or (in_channels != out_channels): + conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False) + bn = nn.BatchNorm2d(out_channels) + self.downsample = nn.Sequential(conv, bn) + else: + self.downsample = nn.Sequential() + + def forward(self, x): + # (x has shape: (batch_size, in_channels, h, w)) + + out = F.relu(self.bn1(self.conv1(x))) # (shape: (batch_size, channels, h, w)) + out = F.relu(self.bn2(self.conv2(out))) # (shape: (batch_size, channels, h, w) if stride == 1, (batch_size, channels, h/2, w/2) if stride == 2) + out = self.bn3(self.conv3(out)) # (shape: (batch_size, out_channels, h, w) if stride == 1, (batch_size, out_channels, h/2, w/2) if stride == 2) + + out = out + self.downsample(x) # (shape: (batch_size, out_channels, h, w) if stride == 1, (batch_size, out_channels, h/2, w/2) if stride == 2) + + out = F.relu(out) # (shape: (batch_size, out_channels, h, w) if stride == 1, (batch_size, out_channels, h/2, w/2) if stride == 2) + + return out + +class ResNet_Bottleneck_OS16(nn.Module): + def __init__(self, num_layers): + super(ResNet_Bottleneck_OS16, self).__init__() + + if num_layers == 50: + resnet = models.resnet50() + # load pretrained model: + resnet.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/resnet/resnet50-19c8e357.pth")) + # remove fully connected layer, avg pool and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-3]) + + print ("pretrained resnet, 50") + elif num_layers == 101: + resnet = models.resnet101() + # load pretrained model: + resnet.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/resnet/resnet101-5d3b4d8f.pth")) + # remove fully connected layer, avg pool and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-3]) + + print ("pretrained resnet, 101") + elif num_layers == 152: + resnet = models.resnet152() + # load pretrained model: + resnet.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/resnet/resnet152-b121ed2d.pth")) + # remove fully connected layer, avg pool and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-3]) + + print ("pretrained resnet, 152") + else: + raise Exception("num_layers must be in {50, 101, 152}!") + + self.layer5 = make_layer(Bottleneck, in_channels=4*256, channels=512, num_blocks=3, stride=1, dilation=2) + + def forward(self, x): + # (x has shape (batch_size, 3, h, w)) + + # pass x through (parts of) the pretrained ResNet: + c4 = self.resnet(x) # (shape: (batch_size, 4*256, h/16, w/16)) (it's called c4 since 16 == 2^4) + + output = self.layer5(c4) # (shape: (batch_size, 4*512, h/16, w/16)) + + return output + +class ResNet_BasicBlock_OS16(nn.Module): + def __init__(self, num_layers): + super(ResNet_BasicBlock_OS16, self).__init__() + + if num_layers == 18: + resnet = models.resnet18() + # load pretrained model: + resnet.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/resnet/resnet18-5c106cde.pth")) + # remove fully connected layer, avg pool and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-3]) + + num_blocks = 2 + print ("pretrained resnet, 18") + elif num_layers == 34: + resnet = models.resnet34() + # load pretrained model: + resnet.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/resnet/resnet34-333f7ec4.pth")) + # remove fully connected layer, avg pool and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-3]) + + num_blocks = 3 + print ("pretrained resnet, 34") + else: + raise Exception("num_layers must be in {18, 34}!") + + self.layer5 = make_layer(BasicBlock, in_channels=256, channels=512, num_blocks=num_blocks, stride=1, dilation=2) + + def forward(self, x): + # (x has shape (batch_size, 3, h, w)) + + # pass x through (parts of) the pretrained ResNet: + c4 = self.resnet(x) # (shape: (batch_size, 256, h/16, w/16)) (it's called c4 since 16 == 2^4) + + output = self.layer5(c4) # (shape: (batch_size, 512, h/16, w/16)) + + return output + +class ResNet_BasicBlock_OS8(nn.Module): + def __init__(self, num_layers): + super(ResNet_BasicBlock_OS8, self).__init__() + + if num_layers == 18: + resnet = models.resnet18() + # load pretrained model: + resnet.load_state_dict(torch.load("/home/nailtu/PycharmProjects/deeplabv3-master/pretrained_models/resnet/resnet18-5c106cde.pth")) + # remove fully connected layer, avg pool, layer4 and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-4]) + + num_blocks_layer_4 = 2 + num_blocks_layer_5 = 2 + print ("pretrained resnet, 18") + elif num_layers == 34: + resnet = models.resnet34() + # load pretrained model: + resnet.load_state_dict(torch.load("/root/deeplabv3/pretrained_models/resnet/resnet34-333f7ec4.pth")) + # remove fully connected layer, avg pool, layer4 and layer5: + self.resnet = nn.Sequential(*list(resnet.children())[:-4]) + + num_blocks_layer_4 = 6 + num_blocks_layer_5 = 3 + print ("pretrained resnet, 34") + else: + raise Exception("num_layers must be in {18, 34}!") + + self.layer4 = make_layer(BasicBlock, in_channels=128, channels=256, num_blocks=num_blocks_layer_4, stride=1, dilation=2) + + self.layer5 = make_layer(BasicBlock, in_channels=256, channels=512, num_blocks=num_blocks_layer_5, stride=1, dilation=4) + + def forward(self, x): + # (x has shape (batch_size, 3, h, w)) + + # pass x through (parts of) the pretrained ResNet: + c3 = self.resnet(x) # (shape: (batch_size, 128, h/8, w/8)) (it's called c3 since 8 == 2^3) + + output = self.layer4(c3) # (shape: (batch_size, 256, h/8, w/8)) + output = self.layer5(output) # (shape: (batch_size, 512, h/8, w/8)) + + return output + +def ResNet18_OS16(): + return ResNet_BasicBlock_OS16(num_layers=18) + +def ResNet34_OS16(): + return ResNet_BasicBlock_OS16(num_layers=34) + +def ResNet50_OS16(): + return ResNet_Bottleneck_OS16(num_layers=50) + +def ResNet101_OS16(): + return ResNet_Bottleneck_OS16(num_layers=101) + +def ResNet152_OS16(): + return ResNet_Bottleneck_OS16(num_layers=152) + +def ResNet18_OS8(): + return ResNet_BasicBlock_OS8(num_layers=18) + +def ResNet34_OS8(): + return ResNet_BasicBlock_OS8(num_layers=34) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/train.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/train.py new file mode 100755 index 00000000..851c9efd --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/train.py @@ -0,0 +1,147 @@ +from sedna.common.log import LOGGER +from sedna.algorithms.unseen_task_processing.GANwithSelfTaughtLearning.util import load_yaml +import matplotlib.pyplot as plt +import os + +from sedna.algorithms.unseen_task_processing.GANwithSelfTaughtLearning.deeplabv3.datasets import DatasetTrain, DatasetVal +from sedna.algorithms.unseen_task_processing.GANwithSelfTaughtLearning.deeplabv3.model.deeplabv3 import DeepLabV3 +import sys +from sedna.algorithms.unseen_task_processing.GANwithSelfTaughtLearning.deeplabv3.utils.utils import add_weight_decay + +import torch +import torch.utils.data +import torch.nn as nn +from torch.autograd import Variable +import torch.optim as optim +import torch.nn.functional as F + +import numpy as np +import pickle +import matplotlib +matplotlib.use("Agg") + + +class Encoder(nn.Module): + def __init__(self) -> None: + super(Encoder, self).__init__() + self.enc3 = nn.Conv2d( + in_channels=3, out_channels=8, kernel_size=3, stride=2, padding=1 + ) + self.enc4 = nn.Conv2d( + in_channels=8, out_channels=3, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + x = F.relu(self.enc3(x)) + x = F.relu(self.enc4(x)) + return x + + +def train_deepblabv3(): + configs = load_yaml('../config.yaml') + model_id = configs['deeplabv3'][3]['name'] + + encoder = Encoder().cuda() + encoder.load_state_dict(torch.load( + '../self-taught-learning/train_results/encoder_models4/encoder50.pth')) + + num_epochs = configs['deeplabv3'][0]['iter'] + batch_size = configs['deeplabv3'][1]['batch_size'] + learning_rate = configs['deeplabv3'][2]['lr'] + + network = DeepLabV3(model_id, project_dir=os.getcwd()).cuda() + + train_dataset = DatasetTrain(cityscapes_data_path=configs['deeplabv3'][4]['cityscapes_data_path'], + cityscapes_meta_path=configs['deeplabv3'][5]['cityscapes_meta_path']) + val_dataset = DatasetVal(cityscapes_data_path=configs['deeplabv3'][4]['cityscapes_data_path'], + cityscapes_meta_path=configs['deeplabv3'][5]['cityscapes_meta_path']) + + num_train_batches = int(len(train_dataset) / batch_size) + num_val_batches = int(len(val_dataset) / batch_size) + + train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, shuffle=True, + num_workers=1) + val_loader = torch.utils.data.DataLoader(dataset=val_dataset, + batch_size=batch_size, shuffle=False, + num_workers=1) + + params = add_weight_decay(network, l2_value=0.0001) + optimizer = torch.optim.Adam(params, lr=learning_rate) + + with open(configs['deeplabv3'][6]['class_weights'], "rb") as file: + class_weights = np.array(pickle.load(file)) + class_weights = torch.from_numpy(class_weights) + class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda() + + loss_fn = nn.CrossEntropyLoss(weight=class_weights) + + epoch_losses_train = [] + epoch_losses_val = [] + for epoch in range(num_epochs): + LOGGER.info("epoch: %d/%d" % (epoch + 1, num_epochs)) + + ############################################################################ + # train: + ############################################################################ + network.train() # (set in training mode, this affects BatchNorm and dropout) + batch_losses = [] + for step, (imgs, label_imgs) in enumerate(train_loader): + imgs = Variable(imgs).cuda() + # encoder images + imgs = encoder(imgs) + label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() + + outputs = network(imgs) + + loss = loss_fn(outputs, label_imgs) + loss_value = loss.data.cpu().numpy() + batch_losses.append(loss_value) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + epoch_loss = np.mean(batch_losses) + epoch_losses_train.append(epoch_loss) + with open("%s/epoch_losses_train.pkl" % network.model_dir, "wb") as file: + pickle.dump(epoch_losses_train, file) + LOGGER.info("train loss: %g" % epoch_loss) + plt.figure(1) + plt.plot(epoch_losses_train, "k^") + plt.plot(epoch_losses_train, "k") + plt.ylabel("loss") + plt.xlabel("epoch") + plt.title("train loss per epoch") + plt.savefig("%s/epoch_losses_train.png" % network.model_dir) + plt.close(1) + + network.eval() + batch_losses = [] + for step, (imgs, label_imgs, img_ids) in enumerate(val_loader): + with torch.no_grad(): + imgs = Variable(imgs).cuda() + label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() + + outputs = network(imgs) + loss = loss_fn(outputs, label_imgs) + loss_value = loss.data.cpu().numpy() + batch_losses.append(loss_value) + + epoch_loss = np.mean(batch_losses) + epoch_losses_val.append(epoch_loss) + with open("%s/epoch_losses_val.pkl" % network.model_dir, "wb") as file: + pickle.dump(epoch_losses_val, file) + LOGGER.info("val loss: %g" % epoch_loss) + plt.figure(1) + plt.plot(epoch_losses_val, "k^") + plt.plot(epoch_losses_val, "k") + plt.ylabel("loss") + plt.xlabel("epoch") + plt.title("val loss per epoch") + plt.savefig("%s/epoch_losses_val.png" % network.model_dir) + plt.close(1) + + +if __name__ == '__main__': + train_deepblabv3() diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/__init__.py new file mode 100755 index 00000000..9f9161bf --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/__init__.py @@ -0,0 +1 @@ +from . import utils \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/preprocess_data.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/preprocess_data.py new file mode 100755 index 00000000..ca9dc83b --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/preprocess_data.py @@ -0,0 +1,190 @@ +# camera-ready + +import pickle +import numpy as np +import cv2 +import os +from collections import namedtuple + +# (NOTE! this is taken from the official Cityscapes scripts:) +Label = namedtuple( 'Label' , [ + + 'name' , # The identifier of this label, e.g. 'car', 'person', ... . + # We use them to uniquely name a class + + 'id' , # An integer ID that is associated with this label. + # The IDs are used to represent the label in ground truth images + # An ID of -1 means that this label does not have an ID and thus + # is ignored when creating ground truth images (e.g. license plate). + # Do not modify these IDs, since exactly these IDs are expected by the + # evaluation server. + + 'trainId' , # Feel free to modify these IDs as suitable for your method. Then create + # ground truth images with train IDs, using the tools provided in the + # 'preparation' folder. However, make sure to validate or submit results + # to our evaluation server using the regular IDs above! + # For trainIds, multiple labels might have the same ID. Then, these labels + # are mapped to the same class in the ground truth images. For the inverse + # mapping, we use the label that is defined first in the list below. + # For example, mapping all void-type classes to the same ID in training, + # might make sense for some approaches. + # Max value is 255! + + 'category' , # The name of the category that this label belongs to + + 'categoryId' , # The ID of this category. Used to create ground truth images + # on category level. + + 'hasInstances', # Whether this label distinguishes between single instances or not + + 'ignoreInEval', # Whether pixels having this class as ground truth label are ignored + # during evaluations or not + + 'color' , # The color of this label + ] ) + +# (NOTE! this is taken from the official Cityscapes scripts:) +labels = [ + # name id trainId category catId hasInstances ignoreInEval color + Label( 'unlabeled' , 0 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ), + Label( 'ego vehicle' , 1 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ), + Label( 'rectification border' , 2 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ), + Label( 'out of roi' , 3 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ), + Label( 'static' , 4 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ), + Label( 'dynamic' , 5 , 19 , 'void' , 0 , False , True , (111, 74, 0) ), + Label( 'ground' , 6 , 19 , 'void' , 0 , False , True , ( 81, 0, 81) ), + Label( 'road' , 7 , 0 , 'flat' , 1 , False , False , (128, 64,128) ), + Label( 'sidewalk' , 8 , 1 , 'flat' , 1 , False , False , (244, 35,232) ), + Label( 'parking' , 9 , 19 , 'flat' , 1 , False , True , (250,170,160) ), + Label( 'rail track' , 10 , 19 , 'flat' , 1 , False , True , (230,150,140) ), + Label( 'building' , 11 , 2 , 'construction' , 2 , False , False , ( 70, 70, 70) ), + Label( 'wall' , 12 , 3 , 'construction' , 2 , False , False , (102,102,156) ), + Label( 'fence' , 13 , 4 , 'construction' , 2 , False , False , (190,153,153) ), + Label( 'guard rail' , 14 , 19 , 'construction' , 2 , False , True , (180,165,180) ), + Label( 'bridge' , 15 , 19 , 'construction' , 2 , False , True , (150,100,100) ), + Label( 'tunnel' , 16 , 19 , 'construction' , 2 , False , True , (150,120, 90) ), + Label( 'pole' , 17 , 5 , 'object' , 3 , False , False , (153,153,153) ), + Label( 'polegroup' , 18 , 19 , 'object' , 3 , False , True , (153,153,153) ), + Label( 'traffic light' , 19 , 6 , 'object' , 3 , False , False , (250,170, 30) ), + Label( 'traffic sign' , 20 , 7 , 'object' , 3 , False , False , (220,220, 0) ), + Label( 'vegetation' , 21 , 8 , 'nature' , 4 , False , False , (107,142, 35) ), + Label( 'terrain' , 22 , 9 , 'nature' , 4 , False , False , (152,251,152) ), + Label( 'sky' , 23 , 10 , 'sky' , 5 , False , False , ( 70,130,180) ), + Label( 'person' , 24 , 11 , 'human' , 6 , True , False , (220, 20, 60) ), + Label( 'rider' , 25 , 12 , 'human' , 6 , True , False , (255, 0, 0) ), + Label( 'car' , 26 , 13 , 'vehicle' , 7 , True , False , ( 0, 0,142) ), + Label( 'truck' , 27 , 14 , 'vehicle' , 7 , True , False , ( 0, 0, 70) ), + Label( 'bus' , 28 , 15 , 'vehicle' , 7 , True , False , ( 0, 60,100) ), + Label( 'caravan' , 29 , 19 , 'vehicle' , 7 , True , True , ( 0, 0, 90) ), + Label( 'trailer' , 30 , 19 , 'vehicle' , 7 , True , True , ( 0, 0,110) ), + Label( 'train' , 31 , 16 , 'vehicle' , 7 , True , False , ( 0, 80,100) ), + Label( 'motorcycle' , 32 , 17 , 'vehicle' , 7 , True , False , ( 0, 0,230) ), + Label( 'bicycle' , 33 , 18 , 'vehicle' , 7 , True , False , (119, 11, 32) ), + Label( 'license plate' , -1 , 19 , 'vehicle' , 7 , False , True , ( 0, 0,142) ), +] + +# create a function which maps id to trainId: +id_to_trainId = {label.id: label.trainId for label in labels} +id_to_trainId_map_func = np.vectorize(id_to_trainId.get) + +train_dirs = ["jena/", "zurich/", "weimar/", "ulm/", "tubingen/", "stuttgart/", + "strasbourg/", "monchengladbach/", "krefeld/", "hanover/", + "hamburg/", "erfurt/", "dusseldorf/", "darmstadt/", "cologne/", + "bremen/", "bochum/", "aachen/"] +val_dirs = ["frankfurt/", "munster/", "lindau/"] +test_dirs = ["berlin", "bielefeld", "bonn", "leverkusen", "mainz", "munich"] + +cityscapes_data_path = "/home/nailtu/data/cityscapes" +cityscapes_meta_path = "/home/nailtu/data/cityscapes/meta" + +if not os.path.exists(cityscapes_meta_path): + os.makedirs(cityscapes_meta_path) +if not os.path.exists(cityscapes_meta_path + "/label_imgs"): + os.makedirs(cityscapes_meta_path + "/label_imgs") + +################################################################################ +# convert all labels to label imgs with trainId pixel values (and save to disk): +################################################################################ +train_label_img_paths = [] + +img_dir = cityscapes_data_path + "/leftImg8bit/train/" +label_dir = cityscapes_data_path + "/gtFine/train/" +for train_dir in train_dirs: + print (train_dir) + + train_img_dir_path = img_dir + train_dir + train_label_dir_path = label_dir + train_dir + + file_names = os.listdir(train_img_dir_path) + for file_name in file_names: + img_id = file_name.split("_leftImg8bit.png")[0] + + gtFine_img_path = train_label_dir_path + img_id + "_gtFine_labelIds.png" + gtFine_img = cv2.imread(gtFine_img_path, -1) # (shape: (1024, 2048)) + + # convert gtFine_img from id to trainId pixel values: + label_img = id_to_trainId_map_func(gtFine_img) # (shape: (1024, 2048)) + label_img = label_img.astype(np.uint8) + + cv2.imwrite(cityscapes_meta_path + "/label_imgs/" + img_id + ".png", label_img) + train_label_img_paths.append(cityscapes_meta_path + "/label_imgs/" + img_id + ".png") + +img_dir = cityscapes_data_path + "/leftImg8bit/val/" +label_dir = cityscapes_data_path + "/gtFine/val/" +for val_dir in val_dirs: + print (val_dir) + + val_img_dir_path = img_dir + val_dir + val_label_dir_path = label_dir + val_dir + + file_names = os.listdir(val_img_dir_path) + for file_name in file_names: + img_id = file_name.split("_leftImg8bit.png")[0] + + gtFine_img_path = val_label_dir_path + img_id + "_gtFine_labelIds.png" + gtFine_img = cv2.imread(gtFine_img_path, -1) # (shape: (1024, 2048)) + + # convert gtFine_img from id to trainId pixel values: + label_img = id_to_trainId_map_func(gtFine_img) # (shape: (1024, 2048)) + label_img = label_img.astype(np.uint8) + + cv2.imwrite(cityscapes_meta_path + "/label_imgs/" + img_id + ".png", label_img) + +################################################################################ +# compute the class weigths: +################################################################################ +print ("computing class weights") + +num_classes = 20 + +trainId_to_count = {} +for trainId in range(num_classes): + trainId_to_count[trainId] = 0 + +# get the total number of pixels in all train label_imgs that are of each object class: +for step, label_img_path in enumerate(train_label_img_paths): + if step % 100 == 0: + print (step) + + label_img = cv2.imread(label_img_path, -1) + + for trainId in range(num_classes): + # count how many pixels in label_img which are of object class trainId: + trainId_mask = np.equal(label_img, trainId) + trainId_count = np.sum(trainId_mask) + + # add to the total count: + trainId_to_count[trainId] += trainId_count + +# compute the class weights according to the ENet paper: +class_weights = [] +total_count = sum(trainId_to_count.values()) +for trainId, count in trainId_to_count.items(): + trainId_prob = float(count)/float(total_count) + trainId_weight = 1/np.log(1.02 + trainId_prob) + class_weights.append(trainId_weight) + +print (class_weights) + +with open(cityscapes_meta_path + "/class_weights.pkl", "wb") as file: + pickle.dump(class_weights, file, protocol=2) # (protocol=2 is needed to be able to open this file with python2) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/random_code.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/random_code.py new file mode 100755 index 00000000..4c19463f --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/random_code.py @@ -0,0 +1,23 @@ +# camera-ready + +# this file contains code snippets which I have found (more or less) useful at +# some point during the project. Probably nothing interesting to see here. + +import pickle +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +import numpy as np + +model_id = "13_2_2_2" + +with open("/home/fregu856/exjobb/training_logs/multitask/model_" + model_id + "/epoch_losses_train.pkl", "rb") as file: + train_loss = pickle.load(file) + +with open("/home/fregu856/exjobb/training_logs/multitask/model_" + model_id + "/epoch_losses_val.pkl", "rb") as file: + val_loss = pickle.load(file) + +print ("train loss min:", np.argmin(np.array(train_loss)), np.min(np.array(train_loss))) + +print ("val loss min:", np.argmin(np.array(val_loss)), np.min(np.array(val_loss))) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/utils.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/utils.py new file mode 100755 index 00000000..b1844c10 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/deeplabv3/utils/utils.py @@ -0,0 +1,56 @@ +# camera-ready + +import torch +import torch.nn as nn + +import numpy as np + +def add_weight_decay(net, l2_value, skip_list=()): + # https://raberrytv.wordpress.com/2017/10/29/pytorch-weight-decay-made-easy/ + + decay, no_decay = [], [] + for name, param in net.named_parameters(): + if not param.requires_grad: + continue # frozen weights + if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list: + no_decay.append(param) + else: + decay.append(param) + + return [{'params': no_decay, 'weight_decay': 0.0}, {'params': decay, 'weight_decay': l2_value}] + +# function for colorizing a label image: +def label_img_to_color(img): + label_to_color = { + 0: [128, 64,128], + 1: [244, 35,232], + 2: [ 70, 70, 70], + 3: [102,102,156], + 4: [190,153,153], + 5: [153,153,153], + 6: [250,170, 30], + 7: [220,220, 0], + 8: [107,142, 35], + 9: [152,251,152], + 10: [ 70,130,180], + 11: [220, 20, 60], + 12: [255, 0, 0], + 13: [ 0, 0,142], + 14: [ 0, 0, 70], + 15: [ 0, 60,100], + 16: [ 0, 80,100], + 17: [ 0, 0,230], + 18: [119, 11, 32], + 19: [81, 0, 81] + } + + img_height, img_width = img.shape + + img_color = np.zeros((img_height, img_width, 3)) + for row in range(img_height): + for col in range(img_width): + label = img[row, col] + + img_color[row, col] = np.array(label_to_color[label]) + + return img_color diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/cae.png b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/cae.png new file mode 100644 index 00000000..57dcb21a Binary files /dev/null and b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/cae.png differ diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/discriminator.png b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/discriminator.png new file mode 100644 index 00000000..d8bfa8df Binary files /dev/null and b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/discriminator.png differ diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/generator.png b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/generator.png new file mode 100644 index 00000000..dd07fd94 Binary files /dev/null and b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/generator.png differ diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/ianvs-lifelonglearning.png b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/ianvs-lifelonglearning.png new file mode 100644 index 00000000..5a5497fa Binary files /dev/null and b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/ianvs-lifelonglearning.png differ diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/ianvs-lifelonglearning2.png b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/ianvs-lifelonglearning2.png new file mode 100644 index 00000000..b409c4dc Binary files /dev/null and b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/ianvs-lifelonglearning2.png differ diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/overview.png b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/overview.png new file mode 100644 index 00000000..4596bbe5 Binary files /dev/null and b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/imgs/overview.png differ diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/__init__.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/__init__.py new file mode 100755 index 00000000..42e08e58 --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/__init__.py @@ -0,0 +1 @@ +from . import train \ No newline at end of file diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/models.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/models.py new file mode 100755 index 00000000..b1eb6fea --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/models.py @@ -0,0 +1,47 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class Encoder(nn.Module): + def __init__(self) -> None: + super(Encoder, self).__init__() + self.enc1 = nn.Conv2d( + in_channels=3, out_channels=8, kernel_size=3, stride=2, padding=1 + ) + self.enc2 = nn.Conv2d( + in_channels=8, out_channels=3, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + x = F.relu(self.enc1(x)) + x = F.relu(self.enc2(x)) + return x + + +class Decoder(nn.Module): + def __init__(self) -> None: + super(Decoder, self).__init__() + self.dec1 = nn.ConvTranspose2d( + in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1, output_padding=0 + ) + self.dec2 = nn.ConvTranspose2d( + in_channels=8, out_channels=3, kernel_size=3, stride=2, padding=1, output_padding=1 + ) + + def forward(self, x): + x = F.relu(self.dec1(x)) + x = F.relu(self.dec2(x)) + return x + + +class Autoencoder(nn.Module): + def __init__(self) -> None: + super(Autoencoder, self).__init__() + self.encoder = Encoder() + self.decoder = Decoder() + + def forward(self, x): + x = self.encoder(x) + # print(x.shape) + x = self.decoder(x) + return x diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/train.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/train.py new file mode 100755 index 00000000..069e439e --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/selftaughtlearning/train.py @@ -0,0 +1,147 @@ +from torchvision.utils import save_image +from models import Autoencoder +import torch +import cv2 +import numpy as np +from torch.utils.data import DataLoader +import os +import torch.nn as nn +import torch.optim as optim +import csv +import time +import sys +from util import load_yaml + + +class DatasetAutoEncoder(torch.utils.data.Dataset): + def __init__(self, fake_images_path): + self.img_dir = fake_images_path + + self.new_img_w = 2048 + + self.new_img_h = 1024 + + self.examples = [] + + file_names = os.listdir(fake_images_path) + + for file_name in file_names: + img_path = fake_images_path + file_name + self.examples.append({'img_path': img_path}) + + self.examples = self.examples[0:60] + + self.num_examples = len(self.examples) + + def __getitem__(self, index): + example = self.examples[index] + + img_path = example["img_path"] + img = cv2.imread(img_path, -1) + img = cv2.resize(img, (self.new_img_w, self.new_img_h), + interpolation=cv2.INTER_NEAREST) + img = img / 255.0 + img = img - np.array([0.485, 0.456, 0.406]) + img = img / np.array([0.229, 0.224, 0.225]) + img = np.transpose(img, (2, 0, 1)) + + img = img.astype(np.float32) + + img = torch.from_numpy(img) + img = torch.Tensor(img) + return img + + def __len__(self): + return self.num_examples + + +def save_decoded_image(img, name): + img = img.view(1, 3, 1024, 2048) + save_image(img, name) + + +def train(unseen_samples): + configs = load_yaml('../config.yaml') + device = 'cuda' if torch.cuda.is_available() else 'cpu' + LEARNING_RATE = configs['STL'][1]['lr'] + NUM_EPOCHS = configs['STL'][0]['iter'] + batch_size = configs['STL'][2]['batch_size'] + name = configs['STL'][3]['name'] + save_dir = 'train_results/' + name + if not os.path.exists(save_dir): + os.mkdir(save_dir) + net = Autoencoder().to(device) + # encoder_dataset = DatasetAutoEncoder(fake_images_path='../data/fake_imgs/') + encoder_dataset = unseen_samples + encoder_loader = DataLoader( + dataset=encoder_dataset, batch_size=batch_size, drop_last=True) + criterion = nn.MSELoss() + optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE) + train_loss = [] + with open('train_loss1.csv', 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['epoch', 'loss']) + for epoch in range(1, NUM_EPOCHS + 1): + running_loss = 0.0 + for batch_idx, img in enumerate(encoder_loader): + img = img.to(device) + optimizer.zero_grad() + outputs = net(img) + loss = criterion(outputs, img) + loss.backward() + optimizer.step() + running_loss += loss.item() + loss = running_loss / len(encoder_loader) + train_loss.append(loss) + with open('train_loss1.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([epoch, loss]) + torch.save(net.encoder.state_dict(), save_dir + + '/encoder{}.pth'.format(epoch)) + save_decoded_image(img[0].cpu().data, + name=save_dir + '/original{}.png'.format(epoch)) + save_decoded_image( + outputs[0].cpu().data, name=save_dir + '/decoded{}.png'.format(epoch)) + + +if __name__ == '__main__': + configs = load_yaml('../config.yaml') + device = 'cuda' if torch.cuda.is_available() else 'cpu' + LEARNING_RATE = configs['STL'][1]['lr'] + NUM_EPOCHS = configs['STL'][0]['iter'] + batch_size = configs['STL'][2]['batch_size'] + name = configs['STL'][3]['name'] + save_dir = 'train_results/' + name + if not os.path.exists(save_dir): + os.mkdir(save_dir) + net = Autoencoder().to(device) + encoder_dataset = DatasetAutoEncoder(fake_images_path='../data/fake_imgs/') + encoder_loader = DataLoader( + dataset=encoder_dataset, batch_size=batch_size, drop_last=True) + criterion = nn.MSELoss() + optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE) + train_loss = [] + with open('train_loss1.csv', 'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['epoch', 'loss']) + for epoch in range(1, NUM_EPOCHS + 1): + running_loss = 0.0 + for batch_idx, img in enumerate(encoder_loader): + img = img.to(device) + optimizer.zero_grad() + outputs = net(img) + loss = criterion(outputs, img) + loss.backward() + optimizer.step() + running_loss += loss.item() + loss = running_loss / len(encoder_loader) + train_loss.append(loss) + with open('train_loss1.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([epoch, loss]) + torch.save(net.encoder.state_dict(), save_dir + + '/encoder{}.pth'.format(epoch)) + save_decoded_image(img[0].cpu().data, + name=save_dir + '/original{}.png'.format(epoch)) + save_decoded_image( + outputs[0].cpu().data, name=save_dir + '/decoded{}.png'.format(epoch)) diff --git a/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/util.py b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/util.py new file mode 100755 index 00000000..005a9cab --- /dev/null +++ b/examples/cityscapes/unseen_task_processing/GANwithSelfTaughtLearning/util.py @@ -0,0 +1,12 @@ +import yaml + + +def load_yaml(path): + with open(path) as f: + data = yaml.load(f, Loader=yaml.FullLoader) + # print(data) + return data + + +if __name__ == '__main__': + load_yaml('config.yaml')