From e9727d167fd8289715be06e956cc42f06eda9fa6 Mon Sep 17 00:00:00 2001 From: Eduard Trulls Date: Mon, 16 Oct 2017 16:07:03 +0200 Subject: [PATCH] First release. --- .gitignore | 9 + collate.py | 34 ++ config.py | 253 +++++++++ datasets/__init__.py | 0 datasets/eccv2016/__init__.py | 0 datasets/eccv2016/custom_types.py | 617 ++++++++++++++++++++ datasets/eccv2016/eccv.py | 733 ++++++++++++++++++++++++ datasets/eccv2016/helper.py | 831 +++++++++++++++++++++++++++ datasets/eccv2016/math_tools.py | 359 ++++++++++++ datasets/eccv2016/wrapper.py | 127 +++++ datasets/lift.py | 334 +++++++++++ datasets/test.py | 150 +++++ layers.py | 316 +++++++++++ losses.py | 129 +++++ main.py | 81 +++ modules/__init__.py | 0 modules/bypass.py | 69 +++ modules/lift_desc.py | 127 +++++ modules/lift_kp.py | 181 ++++++ modules/lift_ori.py | 151 +++++ modules/spatial_transformer.py | 222 ++++++++ networks/__init__.py | 0 networks/lift.py | 899 ++++++++++++++++++++++++++++++ readme.md | 112 ++++ test_desc.sh | 2 + test_kp.sh | 2 + test_kp_legacy.sh | 2 + test_ori.sh | 2 + test_orig_lift.sh | 81 +++ tester.py | 405 ++++++++++++++ trainer.py | 250 +++++++++ utils/__init__.py | 45 ++ utils/config.py | 68 +++ utils/dump.py | 199 +++++++ utils/kp_tools.py | 682 +++++++++++++++++++++++ utils/layer.py | 85 +++ utils/legacy.py | 183 ++++++ utils/network.py | 64 +++ utils/nms.py | 375 +++++++++++++ utils/test.py | 386 +++++++++++++ utils/tf.py | 62 +++ utils/train.py | 90 +++ 42 files changed, 8717 insertions(+) create mode 100644 .gitignore create mode 100644 collate.py create mode 100644 config.py create mode 100644 datasets/__init__.py create mode 100644 datasets/eccv2016/__init__.py create mode 100644 datasets/eccv2016/custom_types.py create mode 100644 datasets/eccv2016/eccv.py create mode 100644 datasets/eccv2016/helper.py create mode 100644 datasets/eccv2016/math_tools.py create mode 100644 datasets/eccv2016/wrapper.py create mode 100644 datasets/lift.py create mode 100644 datasets/test.py create mode 100644 layers.py create mode 100644 losses.py create mode 100644 main.py create mode 100644 modules/__init__.py create mode 100644 modules/bypass.py create mode 100644 modules/lift_desc.py create mode 100644 modules/lift_kp.py create mode 100644 modules/lift_ori.py create mode 100644 modules/spatial_transformer.py create mode 100644 networks/__init__.py create mode 100644 networks/lift.py create mode 100644 readme.md create mode 100755 test_desc.sh create mode 100755 test_kp.sh create mode 100755 test_kp_legacy.sh create mode 100755 test_ori.sh create mode 100755 test_orig_lift.sh create mode 100644 tester.py create mode 100644 trainer.py create mode 100644 utils/__init__.py create mode 100644 utils/config.py create mode 100644 utils/dump.py create mode 100644 utils/kp_tools.py create mode 100644 utils/layer.py create mode 100644 utils/legacy.py create mode 100644 utils/network.py create mode 100644 utils/nms.py create mode 100644 utils/test.py create mode 100644 utils/tf.py create mode 100644 utils/train.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c30bd5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.pyc +logs +*.pyc +*__pycache__* +*.dll +*.tfrecords +*.h5 +test/ +gpupy diff --git a/collate.py b/collate.py new file mode 100644 index 0000000..e416033 --- /dev/null +++ b/collate.py @@ -0,0 +1,34 @@ +from os import listdir +from os.path import isfile, isdir, join +from argparse import ArgumentParser +import tensorflow as tf +from utils.dump import loadh5 + + +def look_for_checkpoints(folder, task): + dirs = [f + for f in listdir(folder) + if isdir(join(folder, f))] + + for d in dirs: + # print(join(folder, d)) + if d == task or (d in ['kp', 'ori', 'desc', 'joint'] and task == 'all'): + cp = tf.train.latest_checkpoint(join(folder, d)) + if cp is not None: + # Load best validation result + r = loadh5(join(folder, d, 'best_val_loss.h5'))[d] + s = loadh5(join(folder, d, 'step.h5'))[d] + print('{0:s} -> {1:.05f} [{2:d}]'.format(join(folder, d), r, s)) + else: + look_for_checkpoints(join(folder, d), task) + + +parser = ArgumentParser() +parser.add_argument('--logdir', type=str, required=True) +parser.add_argument('--task', type=str, default='all', help='kp, ori, desc, joint, all') +params = parser.parse_args() + +print() +print('*** Logdir: "{}" (task: {}) ***'.format(params.logdir, params.task)) +look_for_checkpoints(params.logdir, params.task) +print() diff --git a/config.py b/config.py new file mode 100644 index 0000000..ffe324b --- /dev/null +++ b/config.py @@ -0,0 +1,253 @@ +# config.py --- +# +# Filename: config.py +# Description: Configuration module. +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Wed Jun 28 13:08:23 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# Adapted from original code at +# https://github.com/carpedm20/simulated-unsupervised-tensorflow +# +# + +# Change Log: +# +# +# + +# Code: + + +import argparse +import getpass +import json +import os +from utils.config import get_patch_size + + +def str2bool(v): + return v.lower() in ("true", "1") + + +arg_lists = [] +parser = argparse.ArgumentParser() +username = getpass.getuser() + + +def add_argument_group(name): + arg = parser.add_argument_group(name) + arg_lists.append(arg) + return arg + + +# ---------------------------------------- +# Network +net_arg = add_argument_group("Network") +net_arg.add_argument("--kp_input_size", type=int, default=48, help="") +net_arg.add_argument("--kp_filter_size", type=int, default=25, help="") +net_arg.add_argument("--kp_base_scale", type=float, default=2.0, help="") +net_arg.add_argument("--kp_com_strength", type=float, default=10.0, help="") +# net_arg.add_argument("--ori_input_size", type=int, default=28, help="") +net_arg.add_argument("--ori_input_size", type=int, default=64, help="") +net_arg.add_argument("--desc_input_size", type=int, default=64, help="") +net_arg.add_argument("--desc_support_ratio", type=float, default=6.0, help="") + +# Module selection +net_arg.add_argument("--module_kp", type=str, default="lift_kp", help="") +net_arg.add_argument("--module_ori", type=str, default="lift_ori", help="") +net_arg.add_argument("--module_desc", type=str, default="lift_desc", help="") + +# Batch-norm on-off +net_arg.add_argument("--use_input_batch_norm", type=str2bool, default=False, help="") +net_arg.add_argument("--use_batch_norm", type=str2bool, default=True, help="") + +# Data compatibility option +net_arg.add_argument("--old_data_compat", type=str2bool, default=False, help="Use hard-mined, non-augmented set") + +# Orientation detector options +net_arg.add_argument("--use_augmented_set", type=str2bool, default=False, help="Use/extract the dataset for augmented rotations") +net_arg.add_argument("--augment_rotations", type=str2bool, default=False, help="") +net_arg.add_argument("--use_dropout_ori", type=bool, default=False, help="") +net_arg.add_argument("--ori_activation", type=str, default="ghh", choices=["ghh", "tanh"], help="") + +# Descriptor options +net_arg.add_argument("--desc_activ", type=str, default="relu", help="Descriptor activation") +net_arg.add_argument("--desc_pool", type=str, default="avg_pool", help="Descriptor pooling") +net_arg.add_argument("--use_subtractive_norm", type=str2bool, default=False, help="Descriptor subtractive normalization") +net_arg.add_argument("--use_triplet_loss", type=str2bool, default=True, help="Triplet loss") + +# Use old mean/std +net_arg.add_argument("--use_old_mean_std", type=str2bool, default=False, help="") + +# ---------------------------------------- +# Loss Function +loss_arg = add_argument_group("Loss") +loss_arg.add_argument("--alpha_overlap", type=float, default=1e0, help="") +loss_arg.add_argument("--alpha_classification", type=float, default=1e-8, + help="") +loss_arg.add_argument("--alpha_margin", type=float, default=4.0, help="") +# for joint training +loss_arg.add_argument("--alpha_kp", type=float, default=1e0, help="") +loss_arg.add_argument("--alpha_desc", type=float, default=1e0, help="") +loss_arg.add_argument("--kp_scoremap_softmax_strength", + type=float, default=10.0, help="") + +# ---------------------------------------- +# Data +data_arg = add_argument_group("Data") +data_arg.add_argument("--nchannel", type=int, default=1, help="") +data_arg.add_argument("--data_type", type=str, default="eccv2016", help="") +data_arg.add_argument("--data_name", type=str, default="piccadilly", help="") +data_arg.add_argument( + "--data_dir", type=str, help=( + "The directory containing the dataset. " + "Will look for {data_dir}/{data_name}" + ), default="/cvlabdata2/home/{}/Datasets/".format(username), +) +data_arg.add_argument( + "--temp_dir", type=str, help=( + "The temporary directory where data related cache will be stored." + ), default="/cvlabdata2/home/{}/Temp/".format(username), +) +data_arg.add_argument( + "--scratch_dir", type=str, help=( + "The temporary directory that will be used as cache." + "We have this since the large data is typically stored in a " + "network share" + ), default="/scratch/{}/Temp/".format(username), +) +data_arg.add_argument( + "--pair_dir", type=str, help=( + "Creating pairs are time consuming. " + "We store the pair in this directory. " + "This behavior might be removed in the future. " + ), default="./pairs", +) +data_arg.add_argument("--regen_pairs", type=str2bool, default=False, help="") + +# ---------------------------------------- +# Task +task_arg = add_argument_group("Task") +task_arg.add_argument("--task", type=str, default="train", + choices=["train", "test"], + help="") +task_arg.add_argument("--subtask", type=str, default="desc", + choices=["kp", "ori", "desc", "joint"], + help="") +task_arg.add_argument("--logdir", type=str, default="", help="") +task_arg.add_argument("--finetune", type=str, default="kp", help="e.g. 'kp+ori+desc'") + +# ---------------------------------------- +# Training +train_arg = add_argument_group("Train") +train_arg.add_argument("--random_seed", type=int, default=1234, help="") +train_arg.add_argument("--batch_size", type=int, default=128, help="") +train_arg.add_argument("--pair_interval", type=int, default=1, help="") +train_arg.add_argument("--pair_use_cache", type=str2bool, + default=True, help="") +train_arg.add_argument("--max_step", type=int, default=1e7, help="") +train_arg.add_argument("--optimizer", type=str, default="adam", + choices=["adam", "rmsprop", "sgd"], + help="") +train_arg.add_argument("--learning_rate", type=float, default=1e-3, help="") +train_arg.add_argument("--max_grad_norm", type=float, default=-1.0, help="") +train_arg.add_argument("--check_numerics", type=str2bool, + default=True, help="") +train_arg.add_argument("--tqdm_width", type=int, default=79, help="") +train_arg.add_argument("--mining_sched", type=str, default="none", + help="Scheduler: 'none', 'step', 'smooth'") +train_arg.add_argument("--mining_base", type=int, default=1, + help="Starting number of batches") +train_arg.add_argument("--mining_step", type=int, default=0, + help="Add one batch every these many (0 to disable)") +train_arg.add_argument("--mining_ceil", type=int, default=0, + help="Max number of batches (0 to disable)") + +# Pretrain information to force in if needed +train_arg.add_argument("--pretrained_kp", type=str, default="", help="") +train_arg.add_argument("--pretrained_ori", type=str, default="", help="") +train_arg.add_argument("--pretrained_desc", type=str, default="", help="") +train_arg.add_argument("--pretrained_joint", type=str, default="", help="") + +# ---------------------------------------- +# Validation +valid_arg = add_argument_group("Validation") +valid_arg.add_argument("--validation_interval", type=int, default=1e3, help="") +valid_arg.add_argument("--validation_rounds", type=int, default=100, help="") +valid_arg.add_argument("--neg_per_pos", type=float, default=100.0, help="") +valid_arg.add_argument("--valid_method", type=str, default="desc", help="") + +# ---------------------------------------- +# Test +test_arg = add_argument_group("Test") +test_arg.add_argument("--test_img_file", type=str, default="", help="") +test_arg.add_argument("--test_kp_file", type=str, default="", help="") +test_arg.add_argument("--test_out_file", type=str, default="", help="") +test_arg.add_argument("--test_num_keypoint", type=int, default=1000, help="") +test_arg.add_argument("--test_scl_intv", type=int, default=4, help="") +test_arg.add_argument("--test_min_scale_log2", type=int, default=1, help="") +test_arg.add_argument("--test_max_scale_log2", type=int, default=4, help="") +test_arg.add_argument("--test_kp_use_tensorflow", + type=str2bool, default=True, help="") +test_arg.add_argument("--test_nearby_ratio", type=float, default=1.0, help="") +test_arg.add_argument("--test_nms_intv", type=int, default=2, help="") +test_arg.add_argument("--test_edge_th", type=float, default=10.0, help="") + +train_arg = add_argument_group("Misc") +loss_arg.add_argument("--usage", type=float, default=0.96, help="Force GPU memory usage") + + +def get_config(argv): + config, unparsed = parser.parse_known_args() + + # Sanity checks + if config.augment_rotations and not config.use_augmented_set: + config.use_augmented_set = True + print('-- Forcing use_augmented_set = True') + + if config.augment_rotations and config.subtask is "desc": + raise RuntimeError("Rotation augmentation is incompatible " + "with descriptor training.") + + if config.old_data_compat and ( + config.use_augmented_set or config.augment_rotations): + raise RuntimeError("Options incompatible with legacy data generation.") + + if config.subtask == 'joint': + what = config.finetune.split('+') + if ("kp" not in what) and ("ori" not in what) and \ + ("desc" not in what): + raise RuntimeError("Nothing to finetune? Check --finetune") + + + # Create the prefix automatically from run command + if config.logdir == "": + config.logdir = "-".join(argv) + config.logdir = os.path.join("logs", + config.logdir.replace("main.py", "")) + + return config, unparsed + + +def save_config(model_dir, config): + param_path = os.path.join(model_dir, config.subtask, "params.json") + + print("[*] MODEL dir: %s" % model_dir) + print("[*] PARAM path: %s" % param_path) + + with open(param_path, "w") as fp: + json.dump(config.__dict__, fp, indent=4, sort_keys=True) + +# +# config.py ends here diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasets/eccv2016/__init__.py b/datasets/eccv2016/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasets/eccv2016/custom_types.py b/datasets/eccv2016/custom_types.py new file mode 100644 index 0000000..c80d3a0 --- /dev/null +++ b/datasets/eccv2016/custom_types.py @@ -0,0 +1,617 @@ +# custom_types.py --- +# +# Filename: custom_types.py +# Description: Python Module for custom types +# Author: Kwang +# Maintainer: +# Created: Fri Jan 16 12:01:52 2015 (+0100) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# +# Copyright (C), EPFL Computer Vision Lab. +# +# + +# Code: + +# ------------------------------------------- +# Imports +from __future__ import print_function + +import hashlib +import os +import shutil +import string +from copy import deepcopy +from datetime import timedelta +from inspect import currentframe, getframeinfo + +import numpy as np +from flufl.lock import Lock +from parse import parse # ??? is parse a standard package ??? + + +# ------------------------------------------- +# Path structure +class pathConfig: + dataset = None + temp = None + result = None + debug = None + + train_data = None + train_mask = None + + # ------------------------------------------------------------------------- + # Prefix stuff that will affect keypoint generation and pairing + def prefix_dataset(self, param, do_sort=True): + prefixstr = "" + + group_list = ['dataset'] + + exclude_list = {} + exclude_list['dataset'] = ['trainSetList', 'validSetList'] + + for group_name in group_list: + key_list = deepcopy( + list(getattr(param, group_name).__dict__.keys())) + if do_sort: + key_list.sort() + for key_name in key_list: + if key_name not in exclude_list[group_name]: + prefixstr += str(getattr( + getattr(param, group_name), + key_name)) + + prefixstr = hashlib.md5(prefixstr.encode()).hexdigest() + prefixstr += "/" + + return prefixstr + + # -------------------------------------------------------------------------- + # Prefix stuff that will affect patch extraction + def prefix_patch(self, param, do_sort=True): + prefixstr = "" + + group_list = ['patch'] + + exclude_list = {} + exclude_list['patch'] = [] + + for group_name in group_list: + key_list = deepcopy( + list(getattr(param, group_name).__dict__.keys())) + if do_sort: + key_list.sort() + for key_name in key_list: + if key_name not in exclude_list[group_name]: + prefixstr += str(getattr(getattr(param, + group_name), key_name)) + + prefixstr = hashlib.md5(prefixstr.encode()).hexdigest() + prefixstr += "/" + + return prefixstr + + # ------------------------------------------------------------------------- + # Prefix stuff that will affect learning outcome + def prefix_learning(self, param, do_sort=True): + prefixstr = "" + + group_list = ['model', 'learning'] + + for group_name in group_list: + key_list = deepcopy( + list(getattr(param, group_name).__dict__.keys())) + if do_sort: + key_list.sort() + for key_name in key_list: + prefixstr += str(getattr(getattr(param, group_name), key_name)) + + prefixstr = hashlib.md5(prefixstr.encode()).hexdigest() + prefixstr += "/" + + return prefixstr + + def setupTrain(self, param, dataset_in): + + # Lock to prevent race condition + if not os.path.exists(".locks"): + os.makedirs(".locks") + lock_file = ".locks/setup.lock" + if os.name == "posix": + + lock = Lock(lock_file) + lock.lifetime = timedelta(days=2) + frameinfo = getframeinfo(currentframe()) + print(">> {}/{}: waiting to obtain lock <<".format( + frameinfo.filename, frameinfo.lineno)) + lock.lock() + print(">> obtained lock for posix system <<") + elif os.name == "nt": + import filelock + lock = filelock.FileLock(lock_file) + lock.acquire() + if lock.is_locked: + print(">> obtained lock for windows system <<") + else: + print("Unknown operating system, lock unavailable") + + + + # --------------------------------------------------------------------- + # Base path + # for dataset + # self.dataset = os.getenv('PROJ_DATA_DIR', '') + # if self.dataset == '': + # self.dataset = os.path.expanduser("~/Datasets") + # self.dataset += "/" + str(dataset_in) + self.dataset = os.path.join( + param.data_dir, str(dataset_in) + ).rstrip("/") + "/" + # for temp + # self.temp = os.getenv('PROJ_TEMP_DIR', '') + # if self.temp == '': + # self.temp = os.path.expanduser("~/Temp") + # self.temp += "/" + str(dataset_in) + self.temp = os.path.join( + param.temp_dir, str(dataset_in) + ).rstrip("/") + "/" + # for volatile temp + # self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '') + # if self.volatile_temp == '': + # self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp" + # self.volatile_temp += "/" + str(dataset_in) + self.volatile_temp = os.path.join( + param.scratch_dir, str(dataset_in) + ).rstrip("/") + "/" + # self.negdata = os.path.expanduser("~/Datasets/NegData/") # LEGACY + + # # create dump directory if it does not exist + # if not os.path.exists(self.temp): + # os.makedirs(self.temp) + + # --------------------------------------------------------------------- + # Path for data loading + + # path for synthetic data generation + self.train_data = self.dataset + "train/" + self.prefix_dataset(param) + self.train_mask = (self.dataset + "train/" + + self.prefix_dataset(param) + "masks/") + if not os.path.exists(self.train_data): + # Check if the un-sorted prefix exists + unsorted_hash_path = (self.dataset + "train/" + + self.prefix_dataset(param, do_sort=False)) + if os.path.exists(unsorted_hash_path): + os.symlink(unsorted_hash_path.rstrip("/"), + self.train_data.rstrip("/")) + # shutil.copytree(unsorted_hash_path, self.train_data) + # shutil.rmtree(unsorted_hash_path) + + # dump folder for dataset selection (sampling and etc) + self.train_dump = self.temp + "train/" + self.prefix_dataset(param) + if not os.path.exists(self.train_dump): + # Check if the un-sorted prefix exists + unsorted_hash_path = (self.temp + "train/" + + self.prefix_dataset(param, do_sort=False)) + if os.path.exists(unsorted_hash_path): + os.symlink(unsorted_hash_path.rstrip("/"), + self.train_dump.rstrip("/")) + # shutil.copytree(unsorted_hash_path, self.train_dump) + # shutil.rmtree(unsorted_hash_path) + + # dump folder for patch extraction (if necessary) + self.patch_dump = (self.temp + "train/" + self.prefix_dataset(param) + + self.prefix_patch(param)) + if not os.path.exists(self.patch_dump): + # Check if the un-sorted prefix exists + unsorted_hash_path = (self.temp + "train/" + + self.prefix_dataset(param, do_sort=False) + + self.prefix_patch(param, do_sort=False)) + if os.path.exists(unsorted_hash_path): + os.symlink(unsorted_hash_path.rstrip("/"), + self.patch_dump.rstrip("/")) + # shutil.copytree(unsorted_hash_path, self.patch_dump) + # shutil.rmtree(unsorted_hash_path) + + # volatile dump folder for patch extraction (if necessary) + self.volatile_patch_dump = (self.volatile_temp + "train/" + + self.prefix_dataset(param) + + self.prefix_patch(param)) + # if not os.path.exists(self.volatile_patch_dump): + # # Check if the un-sorted prefix exists + # unsorted_hash_path = (self.volatile_temp + "train/" + + # self.prefix_dataset(param, do_sort=False) + + # self.prefix_patch(param, do_sort=False)) + # os.symlink(unsorted_hash_path.rstrip("/"), + # self.volatile_patch_dump.rstrip("/")) + # # shutil.copytree(unsorted_hash_path, self.volatile_patch_dump) + # # shutil.rmtree(unsorted_hash_path) + + # debug info folder + self.debug = self.dataset + "debug/" + self.prefix_dataset(param) + if not os.path.exists(self.debug): + # Check if the un-sorted prefix exists + unsorted_hash_path = (self.dataset + "debug/" + + self.prefix_dataset(param, do_sort=False)) + if os.path.exists(unsorted_hash_path): + shutil.copytree(unsorted_hash_path, self.debug) + shutil.rmtree(unsorted_hash_path) + + # # --------------------------------------------------------------------- + # # Path for the model learning + # resdir = os.getenv('PROJ_RES_DIR', '') + # if resdir == '': + # resdir = os.path.expanduser("~/Results") + # self.result = (resdir + "/" + + # self.getResPrefix(param) + + # self.prefix_dataset(param) + + # self.prefix_patch(param) + + # self.prefix_learning(param)) + # if not os.path.exists(self.result): + # # Check if the un-sorted prefix exists + # unsorted_hash_path = (resdir + "/" + + # self.getResPrefix(param, do_sort=False) + + # self.prefix_dataset(param, do_sort=False) + + # self.prefix_patch(param, do_sort=False) + + # self.prefix_learning(param, do_sort=False)) + + # if os.path.exists(unsorted_hash_path): + # shutil.copytree(unsorted_hash_path, self.result) + # shutil.rmtree(unsorted_hash_path) + + # # create result directory if it does not exist + # if not os.path.exists(self.result): + # os.makedirs(self.result) + + if os.name == "posix": + lock.unlock() + elif os.name == "nt": + lock.release() + else: + pass + + def getResPrefix(self, param, do_sort=True): + trainSetList = deepcopy(list(param.dataset.trainSetList)) + if hasattr(param.dataset, "validSetList"): + trainSetList += deepcopy(list(param.dataset.validSetList)) + + if do_sort: + trainSetList.sort() + res_prefix = param.dataset.dataType + '/' + \ + hashlib.md5( + "".join(trainSetList).encode()).hexdigest() + + # this is probably deprecated + if 'prefixStr' in param.__dict__.keys(): + print('I am not deprecated!!!') + res_prefix = param.prefixStr + "/" + res_prefix + + return res_prefix + '/' + + +# ------------------------------------------- +# Parameter structure +class paramGroup: + + def __init__(self): + # do nothing + return + + +class paramStruct: + + def __init__(self): + + # --------------------------------------------------------------------- + # NOTICE: everything set to None is to make it crash without config + + # --------------------------------------------------------------------- + # Legacy + self.legacy = paramGroup() + # WHETHER TO GET A NEW CANNONICAL ANGLE WHETHER WE LEARN WITH SIFT + # PRE_WARPING + self.legacy.bRefineAngle = False + self.legacy.bLearn4Refienment = False + self.legacy.bUseCached = True # Boolean for using dumped data + # THe throw away threshold in degrees (not used if negative) + self.legacy.fThrowAwayTh = -1 + self.legacy.fConsiderRatio = 3.0 + # If we want to learn the std to filter out bad keypoints + self.legacy.bLearnStd = False + + self.legacy.numBin = 72 + # to mark parts to ignore when applying the shape constraint + self.legacy.fRBFKernelTh = 0.01 + # width of the kernel in bins (kernelWidth == 0.368, 2*kernelWidth == + # 0.018 in shape) + self.legacy.kernelWidth = 2.0 + # CNN_LEARNING_RATE = 0.01 + # GHH_LEARNING_RATE = 0.003 + self.legacy.hyperparamClassif = 1.0 + self.legacy.hyperparamShape = 0.01 * \ + (1.0 / (self.legacy.kernelWidth * 2 * 2 + 1)) + self.legacy.hyperparamLaplacian = 0.001 + + # --------------------------------------------------------------------- + # Parameters for SIFT descriptor + # nNumOrientation = SIFT_ORI_HIST_BINS # Number of orientations + # to be used (might be deprecated!) + self.keypoint = paramGroup() + self.keypoint.sKpType = "SIFT" # Descriptor type + self.keypoint.sDescType = "SIFT" # Descriptor type + self.keypoint.desc_num_orientation = 72 + self.keypoint.fOverlapThresh = 40.0 + + self.keypoint.fMinKpSize = 0.0 # min allowed size of a kp + # Consider keypoints within these pixels as duplicates + self.keypoint.fDupRange = 5.0 + self.keypoint.bNewCleanMethod = False # for backward compatibility + # maximum number of keypoints (-1 for unlimited) + self.keypoint.dMaxKeypointNum = -1 + + # --------------------------------------------------------------------- + # # Paramters for patch extraction + self.patch = paramGroup() + # self.patch.nPatchSize = None # Width and Height of the patch + # self.patch.sDataType = None # Image Data type for Patches + # self.patch.bNormalizePatch = True # NOrmalize single patch? + # self.patch.bLogPolarPatch = False # Use log-polar for patches? + # self.patch.bPolarPatch = False # Use log-polar for patches? + # self.patch.bApplyWeights = None # Whether to apply weights to input + # # fRatioScale will be multiplied to the SIFT scale (negative uses + # # fixed scale) + # self.patch.fRatioScale = None + # # use this size if smaller than this size for each kp + # self.patch.fMinSize = None + # self.patch.bPyramidLearn = False # learning using pyramid patch + # extraction + + # --------------------------------------------------------------------- + # Parameters for synthetic images + self.synth = paramGroup() + # np.pi/4.0 # From -pi/4 to pi/4 + self.synth.dPerturbType = 0 + self.synth.nbAngleInPlane = 5 + self.synth.nbAngleOutPlane = 5 + self.synth.aInPlaneRot \ + = 1.0 / (self.synth.nbAngleInPlane - 1) \ + * np.arange(self.synth.nbAngleInPlane, dtype=np.float) \ + * np.pi - np.pi / 2.0 # From -pi/2 to pi/2 + self.synth.aOutPlaneRot \ + = 1.0 / (self.synth.nbAngleOutPlane - 1) \ + * np.arange(self.synth.nbAngleOutPlane, dtype=np.float) \ + * np.pi / 2.0 - np.pi / 4.0 # From -pi/4 to pi/4 + self.synth.aScaleChange = [0.5, 0.75, 1.0, 1.5, 2.0] + + self.synth.bFrontalLearn = False # learning with frontal pair only + self.synth.sFrontalName = 'img1.png' + + # print 'In plane rotation are: ' + # for a in aInPlaneRot: + # print a, ' ' + # print '\n' + # print 'Out of plane rotation are: ' + # for a in aOutPlaneRot: + # print a, ' ' + # print '\n' + + # --------------------------------------------------------------------- + # Parameters for testing + self.PCA = paramGroup() + self.PCA.PCAdim = None + + # --------------------------------------------------------------------- + # Model parameters + self.model = paramGroup() + # Which type am I running in terms of python executable + self.model.modelType = None + # self.model.num_siamese = None # number of siamese clones + # self.model.fRatio = None # the ratio of the WNN + self.model.bNormalizeInput = None # whether to normalize the input + + # --------------------------------------------------------------------- + # Optimization parameters + self.learning = paramGroup() + # name of the solver (e.g. adam, adadelta) + self.learning.optimizer = None + self.learning.n_epochs = None # maximum number of epochs to run + # self.learning.min_epoch = 5 # minimum number of epochs to run + self.learning.batch_size = None # batch size for SGD + # self.learning.alpha_L2 = None # L2 regularisor weight + # # SGD with momentum + # self.learning.momentum = None # momentum component + # self.learning.learning_rate = None # learning rate + # # Adadelta + # self.learning.decay = None # ADADELTA param + # self.learning.epsilon = None # ADADELTA param + # ADAM + self.learning.lr = None + self.learning.beta1 = None + self.learning.beta2 = None + self.learning.epsilon = None + # Epoch LR decay param + # learning rate is halved every this epoch + self.learning.lr_half_interval = 2.0 + # reduction of learning rate after pretrain + self.learning.pretrain_reduction = 1e-2 + + # --------------------------------------------------------------------- + # Dataset parameters + self.dataset = paramGroup() + self.dataset.dataType = "ECCV" + + # --------------------------------------------------------------------- + # Validation parameters (Not used in prefix generation) + self.validation = paramGroup() + + def loadParam(self, file_name, verbose=True): + + config_file = open(file_name, 'rb') + if verbose: + print("Parameters") + + # ------------------------------------------ + # Read the configuration file line by line + while True: + line2parse = config_file.readline().decode("utf-8") + if verbose: + print(line2parse, end='') + + # Quit parsing if we reach the end + if not line2parse: + break + + # Parse + parse_res = parse( + '{parse_type}: {group}.{field_name} = {read_value};{trash}', + line2parse) + + # Skip if it is something we cannot parse + if parse_res is not None: + if parse_res['parse_type'] == 'ss': + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], parse_res['read_value'].split(',')) + elif parse_res['parse_type'] == 's': + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], parse_res['read_value']) + elif parse_res['parse_type'] == 'd': + eval_res = eval(parse_res['read_value']) + if isinstance(eval_res, np.ndarray): + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], eval_res.astype(int)) + elif isinstance(eval_res, list): + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], [int(v) for v in eval_res]) + else: + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], int(eval_res)) + elif parse_res['parse_type'] == 'f': + eval_res = eval(parse_res['read_value']) + if isinstance(eval_res, np.ndarray): + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], eval_res.astype(float)) + elif isinstance(eval_res, list): + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], [float(v) for v in eval_res]) + else: + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], float(eval_res)) + elif parse_res['parse_type'] == 'sf': + setattr(getattr(self, parse_res['group']), + parse_res['field_name'], + [float(eval(s)) for s in + parse_res['read_value'].split(',')]) + elif parse_res['parse_type'] == 'b': + setattr(getattr(self, parse_res['group']), parse_res[ + 'field_name'], bool(int(parse_res['read_value']))) + else: + if verbose: + print(' L-> skipped') + + # Check if we want the small range for perturbations and update it + if self.synth.dPerturbType == 1: # using smaller synthetics + self.synth.nbAngleInPlane = 5 + self.synth.nbAngleOutPlane = 1 + self.synth.aInPlaneRot \ + = 1.0 / (self.synth.nbAngleInPlane - 1) \ + * np.arange(self.synth.nbAngleInPlane, dtype=np.float) \ + * np.pi / 2.0 - np.pi / 4.0 # From -pi/4 to pi/4 + self.synth.aOutPlaneRot = [0.0] + self.synth.aScaleChange = [0.75, 1.0, 1.0 / 0.75] + elif self.synth.dPerturbType == 2: # using only the actual + self.synth.nbAngleInPlane = 1 + self.synth.nbAngleOutPlane = 1 + self.synth.aInPlaneRot = [0.0] + self.synth.aOutPlaneRot = [0.0] + self.synth.aScaleChange = [1.0] + elif self.synth.dPerturbType == 3: # using only the frontal + self.synth.nbAngleInPlane = 5 + self.synth.nbAngleOutPlane = 1 + self.synth.aInPlaneRot \ + = 1.0 / (self.synth.nbAngleInPlane - 1) \ + * np.arange(self.synth.nbAngleInPlane, dtype=np.float) \ + * np.pi / 2.0 - np.pi / 4.0 # From -pi/4 to pi/4 + self.synth.aOutPlaneRot = [0.0] + self.synth.aScaleChange = [0.75, 1.0, 1.0 / 0.75] + + +# ------------------------------------------ +# Data structure +# dataStruct = np.dtype([ +# ('gt_angle', np.float64), +# ('up_angle', np.float64), +# ('rotVec', np.float64,3) +# ]) + +class dataStruct: + fSIFTAngle = 0 # SIFT angle + fGTAngle = 0 # Ground truth angle obtained by warping the reference + + mRawPatch = [] # patch data near the keypoint region + # patch data of a warped (according to SIFT estimation) region + mWarpedPatch = [] + vHistogram = [] # Orientation Histogram of the selected patch + vWarpedHistogram = [] # Warped Orientation Histogram of the selected patch + + mRefRawPatch = [] # patch data near the keypoint region + # patch data of a warped (according to SIFT estimation) region + mRefWarpedPatch = [] + + # patch data of a warped (according to GT estimation) region + mGTWarpedPatch = [] + + vRefHistogram = [] # Orientation Histogram of the selected patch + vRefWarpedHistogram = [] # Warped Orient. Histogram of the selected patch + + # descriptor computed for all angles (the name is misleading here!) + mOrientedSIFTdesc = [] + vSIFTdesc = [] # descriptor computed with SIFT angle + vGTdesc = [] # descriptor computed with GT angle + + dLabel = 0 # Label of the data + fLabelStd = 0 # Std of the keypoints having this label in the training set + pt = [] # coordinates of the keypoint + + sFileName = [] # Original File name which this data was extracted + + # ------------------------------------------------------------------------ + # Fro Brown data + refID = None + kpID = None + interest_x = None + interest_y = None + interest_orientation = None + interest_scale = None + # mOrigPatch = None + + +class data2DrawStruct: + # orig_keypoint = cv2.KeyPoint() # Opencv Keypoint + # updt_keypoint = cv2.KeyPoint() # Opencv Keypoints with updated + # angle (recomputed) + gt_angle = 0 + up_angle = 0 + # rotation Vector used to synthesize the image and keypoint + rotVec = np.zeros((3)) + + +# +# custom_types.py ends here diff --git a/datasets/eccv2016/eccv.py b/datasets/eccv2016/eccv.py new file mode 100644 index 0000000..a997a7e --- /dev/null +++ b/datasets/eccv2016/eccv.py @@ -0,0 +1,733 @@ +# eccv.py --- +# +# Filename: eccv.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Fri Feb 26 12:39:54 2016 (+0100) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# Thu 29 Jun 14:52:41 CEST 2017, Kwang Moo Yi +# +# - Adaptation to be used inside the modules +# +# Mon 26 Jun 13:52:37 CEST 2017, Kwang Moo Yi +# +# - Runs differently based on user name. Will use cvlab environment if possible +# - NFS safe lock is now re-enabled to prevent race condition. +# + +# Code: + + +# ----------------------------------------------------------------------------- +# Imports +from __future__ import print_function + +import hashlib +import multiprocessing as mp +import os +import shutil +import sys +import time +from datetime import timedelta +from inspect import currentframe, getframeinfo + +import cv2 +import h5py +import numpy as np +import six +from flufl.lock import Lock + +# from Utils.dataset_tools.helper import (load_patches, +# random_mine_non_kp_with_2d_distance, +# random_mine_non_kp_with_3d_blocking) +from datasets.eccv2016.helper import (load_patches, + random_mine_non_kp_with_2d_distance, + random_mine_non_kp_with_3d_blocking) +from six.moves import xrange +# from Utils.custom_types import pathConfig +from datasets.eccv2016.custom_types import pathConfig +from utils import loadh5, saveh5 + +# ratio of cpu cores this script is allowed to use +ratio_CPU = 0.5 + + +# ----------------------------------------------------------------------------- +# Dataset class +def createDump(args): + + idx_jpg, jpg_file, train_data_dir, dump_data_dir, tmp_patch_dir, \ + scale_hist, scale_hist_c, out_dim, param, queue = args + + # queue for monitoring + if queue is not None: + queue.put(idx_jpg) + + final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5") + if not os.path.exists(final_dump_file_name): + # load image + bUseColorImage = getattr(param.patch, "bUseColorImage", False) + if not bUseColorImage: + bUseDebugImage = getattr(param.patch, "bUseDebugImage", False) + if not bUseDebugImage: + img = cv2.cvtColor(cv2.imread(train_data_dir + jpg_file), + cv2.COLOR_BGR2GRAY) + else: + # debug image contains keypoints survive the SfM + # pipeline (blue) and the rest (in red) + img = cv2.cvtColor(cv2.imread(train_data_dir + jpg_file), + cv2.COLOR_BGR2GRAY) + debug_jpg = train_data_dir + jpg_file.replace( + ".jpg", "-kp-minsc-" + str(param.dataset.fMinKpSize) + + ".jpg") + imgd = cv2.cvtColor(cv2.imread(debug_jpg), cv2.COLOR_BGR2GRAY) + img = cv2.resize(imgd, img.shape[:2][::-1]) + + in_dim = 1 + else: + img = cv2.imread(train_data_dir + jpg_file) + in_dim = 3 + assert(img.shape[-1] == in_dim) + + # ---------------------------------------- + # load kp data + # Note that data is in order of [x,y,scale,angle,pointid,setid]] + + # For positive: select randomly from "valid_keypoints" + pos_kp_file_name = train_data_dir + jpg_file.replace( + ".jpg", "-kp-minsc-" + str(param.dataset.fMinKpSize) + ".h5") + with h5py.File(pos_kp_file_name, "r") as h5file: + sfm_kp = np.asarray(h5file["valid_keypoints"], dtype="float") + non_sfm_kp = np.asarray(h5file["other_keypoints"], dtype="float") + # add two dummy fields to non_sfm since they don"t have id + # and group. THis means pointid,setid are set to 1 for non_sfm_kp + non_sfm_kp = np.concatenate( + [non_sfm_kp, -np.ones((non_sfm_kp.shape[0], 2))], + axis=1 + ) + + pos_kp = np.concatenate((sfm_kp, np.ones((sfm_kp.shape[0], 1), + dtype="float")), axis=1) + # Select subset for positives (assuming we have same coordinates for + # all image) + dump_file_name = dump_data_dir + jpg_file.replace(".jpg", + "_idxPosSel.h5") + + # if dump file not exist, create it; otherwise load it + if not os.path.exists(dump_file_name): + # idxPosShuffle = np.argsort(pos_kp[3,:])[::-1] # sort backwards + idxPosShuffle = np.random.permutation( + len(pos_kp)) # random shuffle + pos_2_keep = len(idxPosShuffle) + if param.dataset.nPosPerImg > 0: + pos_2_keep = min(pos_2_keep, param.dataset.nPosPerImg) + idxPosSel = idxPosShuffle[:pos_2_keep] # shuffle the points + to_save = {"saveval": idxPosSel} + saveh5(to_save, dump_file_name) + else: + to_load = loadh5(dump_file_name) + idxPosSel = to_load["saveval"] + pos_kp = pos_kp[idxPosSel] + + # negative sampling: + # + # 1) only use Sfm points, too close keypoints will be rejected as + # negative pair (because of high potential overlapping) + # + # 2) Use all SIFT points, when the overlapping of two feature context + # windows is larger than a threshold, it will be rejected as a negative + # pair (because it shares too much common regions..). In this step, we + # concatenate sfm_kp and non_sfm_kp to form keypoint class. + neg_mine_method = getattr(param.patch, "sNegMineMethod", + "use_only_SfM_points") + if neg_mine_method == "use_only_SfM_points": + # where is + neg_kp = random_mine_non_kp_with_2d_distance( + img, sfm_kp, scale_hist, scale_hist_c, param) + elif neg_mine_method == "use_all_SIFT_points": + max_iter = getattr(param.patch, "nMaxRandomNegMineIter", 100) + sift_kp = np.concatenate([sfm_kp, non_sfm_kp], axis=0) + neg_kp = random_mine_non_kp_with_3d_blocking( + img, sift_kp, scale_hist, scale_hist_c, param, + max_iter=max_iter) + else: + raise ValueError("Mining method {} is not supported!" + "".format(neg_mine_method)) + # add another dim indicating good or bad + neg_kp = np.concatenate((neg_kp, np.zeros((len(neg_kp), 1), + dtype="float")), axis=1) + # concatenate negative and positives + kp = np.concatenate((pos_kp, neg_kp), axis=0) + + # Retrive target values, 1 for pos and 0 for neg + y = kp[:, 6] + + # Retrieve angles + angle = kp[:, 3] + + # Assign ID to keypoints (for sfm points, ID = 3d point ind; for non + # sfm kp, ID = -1) + ID = kp[:, 4] + + # load patches with id (drop out of boundary) + bPerturb = getattr(param.patch, "bPerturb", False) + fPerturbInfo = getattr(param.patch, "fPerturbInfo", np.zeros((3,))) + nAugmentedRotations = getattr(param.patch, "nAugmentedRotations", 1) + fAugmentRange = getattr(param.patch, "fAugmentRange", 0) + fAugmentCenterRandStrength = getattr( + param.patch, "fAugmentCenterRandStrength", 0 + ) + sAugmentCenterRandMethod = getattr( + param.patch, "sAugmentCenterRandMethod", "uniform" + ) + + cur_data_set = load_patches( + img, kp, y, ID, angle, param.patch.fRatioScale, + param.patch.fMaxScale, param.patch.nPatchSize, + param.model.nDescInputSize, in_dim, bPerturb, fPerturbInfo, + bReturnCoords=True, + nAugmentedRotations=nAugmentedRotations, + fAugmentRange=fAugmentRange, + fAugmentCenterRandStrength=fAugmentCenterRandStrength, + sAugmentCenterRandMethod=sAugmentCenterRandMethod, + nPatchSizeAug=param.patch.nPatchSizeAug, + ) + # save dump as dictionary using saveh5 + # from here Kernel died, because of not finding "MKL_intel_thread.dll" + + # pdb.set_trace() + # here the data are saved in tmp_dump file, keys are numbers [0,1..] + tmpdict = dict( + (str(_idx), np.asarray(_data)) for _idx, _data in + zip(np.arange(len(cur_data_set)), cur_data_set) + ) + saveh5(tmpdict, final_dump_file_name) + + return idx_jpg + + +class data_obj(object): + """ Dataset Object class. + + Implementation of the dataset object + """ + + def __init__(self, param, mode="train"): + + # Set parameters + self.out_dim = 1 # a single regressor output + + # Load data + # self.x = None # data (patches) to be used for learning [N, + # # channel, w, h] + # self.y = None # label/target to be learned + # self.ID = None # id of the data for manifold regularization + self.load_data(param, mode) + + # Set parameters + self.num_channel = self.x.shape[1] # single channel image + # patch width == patch height (28x28) + self.patch_height = self.x.shape[2] + self.patch_width = self.x.shape[3] + + self + + def load_data(self, param, mode): + + print(" --------------------------------------------------- ") + print(" ECCV Data Module ") + print(" --------------------------------------------------- ") + + # for each dataset item in the list + id_base = 0 + for idxSet in xrange(len(param.dataset.trainSetList)): + + pathconf = pathConfig() + pathconf.setupTrain(param, param.dataset.trainSetList[idxSet]) + + # Save the hash, for the pairs folder + # TODO make this work with multiple datasets + self.hash = '/'.join(pathconf.patch_dump.split('/')[-3:]) + + # load the data from one dataset (multi dataset can be used) + cur_data = self.load_data_for_set(pathconf, param, mode) + fixed_id = cur_data[2] + (cur_data[2] >= 0) * id_base + if idxSet == 0: + self.x = cur_data[0] + self.y = cur_data[1] + self.ID = fixed_id + self.pos = cur_data[3] + self.angle = cur_data[4] + self.coords = cur_data[5] + else: + self.x = np.concatenate([self.x, cur_data[0]]) + self.y = np.concatenate([self.y, cur_data[1]]) + self.ID = np.concatenate([self.ID, fixed_id]) + self.pos = np.concatenate([self.pos, cur_data[3]]) + self.angle = np.concatenate([self.angle, cur_data[4]]) + self.coords = np.concatenate([self.coords, cur_data[5]]) + + id_base = np.max(self.ID) + 1 + + def load_data_for_set(self, pathconf, param, mode): + + # ---------------------------------------------------------------------- + # Train, Validation, and Test + # mlab = matlab.engine.start_matlab() + + # Read from pathconf + # Original implementation + train_data_dir = pathconf.dataset + dump_data_dir = pathconf.train_dump + dump_patch_dir = pathconf.patch_dump + # local (or volatile) copy of the dump data + tmp_patch_dir = pathconf.volatile_patch_dump + + print("train_data_dir = {}".format(train_data_dir)) + print("dump_data_dir = {}".format(dump_data_dir)) + print("dump_patch_dir = {}".format(dump_patch_dir)) + print("tmp_patch_dir = {}".format(tmp_patch_dir)) + + if not os.path.exists(dump_data_dir): + os.makedirs(dump_data_dir) + if not os.path.exists(dump_patch_dir): + os.makedirs(dump_patch_dir) + if not os.path.exists(tmp_patch_dir): + os.makedirs(tmp_patch_dir) + + # Check if we have the big h5 file ready + big_file_name = dump_patch_dir + mode + "-data-chunked.h5" + # if os.getenv("MLTEST_DEBUG", default=""): + # import pdb + # pdb.set_trace() + + # Mutex lock + # + # We will create an nfs-safe lock file in a temporary directory to + # prevent our script from using corrupted, or data that is still being + # generated. This allows us to launch multiple instances at the same + # time, and allow only a single instance to generate the big_file. + if not os.path.exists(".locks"): + os.makedirs(".locks") + check_lock_file = ".locks/" + \ + hashlib.md5(big_file_name.encode()).hexdigest() + if os.name == "posix": + check_lock = Lock(check_lock_file) + check_lock.lifetime = timedelta(days=2) + frameinfo = getframeinfo(currentframe()) + print("-- {}/{}: waiting to obtain lock --".format( + frameinfo.filename, frameinfo.lineno)) + check_lock.lock() + print(">> obtained lock for posix system<<") + elif os.name == "nt": + import filelock + check_lock = filelock.FileLock(check_lock_file) + check_lock.timeout = 2000 + check_lock.acquire() + if check_lock.is_locked: + print(">> obtained lock for windows system <<") + else: + print("Unknown operating system, lock unavailable") + + if not os.path.exists(big_file_name): + + if not os.path.exists(dump_patch_dir + mode + "-data.h5"): + + # Read scale histogram + hist_file = h5py.File(train_data_dir + + "scales-histogram-minsc-" + + str(param.dataset.fMinKpSize) + ".h5", + "r") + scale_hist = np.asarray(hist_file["histogram_bins"]).flatten() + scale_hist /= np.sum(scale_hist) + scale_hist_c = np.asarray( + hist_file["histogram_centers"] + ).flatten() + + # Read list of images from split files + split_name = "" + split_name += str(param.dataset.nTrainPercent) + "-" + split_name += str(param.dataset.nValidPercent) + "-" + split_name += str(param.dataset.nTestPercent) + "-" + if mode == "train": + split_name += "train-" + elif mode == "valid": + split_name += "val-" + elif mode == "test": + split_name += "test-" + split_file_name = train_data_dir + "split-" \ + + split_name + "minsc-" \ + + str(param.dataset.fMinKpSize) + ".h.txt" + list_jpg_file = [] + for file_name in list( + np.loadtxt(split_file_name, dtype=bytes) + ): + list_jpg_file += [ + file_name.decode("utf-8").replace( + "-kp-minsc-" + str(param.dataset.fMinKpSize), + ".jpg") + ] + + # ------------------------------------------------- + # Create dumps in parallel + # I am lazy so create arguments in loop lol + pool_arg = [None] * len(list_jpg_file) + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + pool_arg[idx_jpg] = (idx_jpg, + list_jpg_file[idx_jpg], + train_data_dir, + dump_data_dir, tmp_patch_dir, + scale_hist, scale_hist_c, + self.out_dim, param) + + # if true, use multi thread, otherwise use only single thread + if True: + number_of_process = int(ratio_CPU * mp.cpu_count()) + pool = mp.Pool(processes=number_of_process) + manager = mp.Manager() + queue = manager.Queue() + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + pool_arg[idx_jpg] = pool_arg[idx_jpg] + (queue,) + # map async + pool_res = pool.map_async(createDump, pool_arg) + # monitor loop + while True: + if pool_res.ready(): + # print("") + break + else: + size = queue.qsize() + print("\r -- " + mode + ": Processing image " + "{}/{}".format(size, len(list_jpg_file)), + end="") + sys.stdout.flush() + time.sleep(1) + pool.close() + pool.join() + print("\r -- " + mode + ": Finished Processing Images!") + # for debugging, if multi thread is used, then it is difficult + # to debug + else: + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None,) + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + createDump(pool_arg[idx_jpg]) + print("\r -- " + mode + ": Processing image " + "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), + end="") + sys.stdout.flush() + print("\r -- " + mode + ": Finished Processing Images!") + # ------------------------------------------------- + + # # -------------------- + # # use single thread for simplify debugging + # for idx_jpg in six.moves.xrange(len(list_jpg_file)): + # pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None,) + # for idx_jpg in six.moves.xrange(len(list_jpg_file)): + # createDump(pool_arg[idx_jpg]) + # print("\r -- " + mode + ": Processing image " + # "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), + # end="") + # sys.stdout.flush() + # print("\r -- " + mode + ": Finished Processing Images!") + + # ------------------------------------------------------------------ + # Use only valid indices to ascertain mutual exclusiveness + id_file_name = train_data_dir + "split-" + id_file_name += str(param.dataset.nTrainPercent) + "-" + id_file_name += str(param.dataset.nValidPercent) + "-" + id_file_name += str(param.dataset.nTestPercent) + "-" + id_file_name += ("minsc-" + + str(param.dataset.fMinKpSize) + + ".h5") + + if mode == "train": + id_key = "indices_train" + elif mode == "valid": + id_key = "indices_val" + elif mode == "test": + id_key = "indices_test" + + with h5py.File(id_file_name, "r") as id_file: + id_2_keep = np.asarray(id_file[id_key]) + + # ind_2_keep = np.in1d(dataset[2], id_2_keep) + # ind_2_keep += dataset[2] < 0 + + # loop through files to figure out how many valid items we have +# pdb.set_trace() # for tracking of the dataset + + num_valid = 0 + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + + jpg_file = list_jpg_file[idx_jpg] + + print("\r -- " + mode + ": " + "Reading dumps to figure out number of valid " + "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), + end="") + sys.stdout.flush() + + # Load created dump + final_dump_file_name = tmp_patch_dir \ + + jpg_file.replace(".jpg", ".h5") + # Use loadh5 and turn it back to original cur_data_set + with h5py.File(final_dump_file_name, "r") as dump_file: + cur_ids = dump_file["2"].value + + # Find cur valid by looking at id_2_keep + cur_valid = np.in1d(cur_ids, id_2_keep) + # Add all negative labels as valid (neg data) + cur_valid += cur_ids < 0 + + # Sum it up + num_valid += np.sum(cur_valid) + + print("\n -- " + mode + ": " + "Found {} valid data points from {} files" + "".format(num_valid, len(list_jpg_file))) + + # Get the first data to simply check the shape + tmp_dump_file_name = tmp_patch_dir \ + + list_jpg_file[0].replace(".jpg", ".h5") + with h5py.File(tmp_dump_file_name, "r") as dump_file: + dataset_shape = [] + dataset_type = [] + for _idx in six.moves.xrange(len(dump_file.keys())): + dataset_shape += [dump_file[str(_idx)].shape] + dataset_type += [dump_file[str(_idx)].dtype] + + # create and save the large dataset chunk + with h5py.File(big_file_name, "w-") as big_file: + big_file["time_stamp"] = np.asarray(time.localtime()) + name_list = ["x", "y", "ID", "pos", "angle", "coords"] + # create the dataset storage chunk + for __i in six.moves.xrange(len(dataset_shape)): + big_file.create_dataset( + name_list[__i], + (num_valid,) + dataset_shape[__i][1:], + chunks=(1,) + dataset_shape[__i][1:], + maxshape=( + (num_valid,) + dataset_shape[__i][1:] + ), + dtype=dataset_type[__i] + ) + # loop through the file to save to a big chunk + save_base = 0 + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + + jpg_file = list_jpg_file[idx_jpg] + + print("\r -- " + mode + ": " + "Saving the data to the big dump " + "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), + end="") + sys.stdout.flush() + + # Load created dump + final_dump_file_name = tmp_patch_dir \ + + jpg_file.replace(".jpg", ".h5") + # Use loadh5 and turn it back to original cur_data_set + tmpdict = loadh5(final_dump_file_name) + cur_data_set = tuple([ + tmpdict[str(_idx)] for _idx in + range(len(tmpdict.keys())) + ]) + # Find cur valid by looking at id_2_keep + cur_valid = np.in1d(cur_data_set[2], id_2_keep) + # Add all negative labels as valid (neg data) + cur_valid += cur_data_set[2] < 0 + for __i in six.moves.xrange(len(dataset_shape)): + big_file[name_list[__i]][ + save_base:save_base + np.sum(cur_valid) + ] = cur_data_set[__i][cur_valid] + # Move base to the next chunk + save_base += np.sum(cur_valid) + + # Assert that we saved all + assert save_base == num_valid + + print("\n -- " + mode + ": " + "Done saving {} valid data points from {} files" + "".format(num_valid, len(list_jpg_file))) + + # -------------------------------------------------- + # Cleanup dump + for idx_jpg in six.moves.xrange(len(list_jpg_file)): + + jpg_file = list_jpg_file[idx_jpg] + + print("\r -- " + mode + ": " + "Removing dump " + "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), + end="") + sys.stdout.flush() + + # Delete dump + final_dump_file_name = tmp_patch_dir \ + + jpg_file.replace(".jpg", ".h5") + os.remove(final_dump_file_name) + + print("\r -- " + mode + ": " + "Cleaned up dumps! " + "Local dump is now clean!") + + else: + print(" -- Found old file without chunks. " + "Copying to new file with chunks...") + old_big_file_name = dump_patch_dir + mode + "-data.h5" + with h5py.File(old_big_file_name, "r") as old_big_file, \ + h5py.File(big_file_name, "w-") as big_file: + dataset = [] + + # load old train into array + name_list = ["x", "y", "ID", "pos", "angle", "coords"] + for __i in six.moves.xrange(len(name_list)): + dataset += [np.asarray(old_big_file[name_list[__i]])] + + # save train + big_file["time_stamp"] = np.asarray(time.localtime()) + + # allocate and write + for __i in six.moves.xrange(len(name_list)): + if name_list[__i] == "x": + chunk_shape = (1,) + dataset[__i].shape[1:] + else: + chunk_shape = None + big_file.create_dataset( + name_list[__i], + dataset[__i].shape, + data=dataset[__i], + chunks=chunk_shape, + maxshape=dataset[__i].shape, + ) + + print(" -- Finished creating chunked file, removing old...") + os.remove(old_big_file_name) + + # ---------------------------------------------------------------------- + # Copy to local tmp if necessary + if not os.path.exists(tmp_patch_dir + mode + "-data-chunked.h5"): + print(" -- " + mode + ": " + "Local dump does not exist! " + "Copying big dump to local drive... ") + shutil.copy(dump_patch_dir + mode + "-data-chunked.h5", + tmp_patch_dir + mode + "-data-chunked.h5") + else: + print(" -- " + mode + ": " + "Local dump exists. Checking timestamp...") + + # get timestamp from nfs + with h5py.File(dump_patch_dir + mode + "-data-chunked.h5", "r") \ + as nfs_file: + nfs_time = np.asarray(nfs_file["time_stamp"]) + + # get timestamp from local + with h5py.File(tmp_patch_dir + mode + "-data-chunked.h5", "r") \ + as local_file: + local_time = np.asarray(local_file["time_stamp"]) + + # if the two files have different time stamps + if any(nfs_time != local_time): + print(" -- " + mode + ": " + "Time stamps are different! " + "Copying big dump to local drive... ") + shutil.copy(dump_patch_dir + mode + "-data-chunked.h5", + tmp_patch_dir + mode + "-data-chunked.h5") + else: + print(" -- " + mode + ": " + "Time stamps are identical! Re-using local dump") + + # Free lock + if os.name == "posix": + check_lock.unlock() + print("-- free lock --") + elif os.name == "nt": + check_lock.release() + print("-- free lock --") + else: + pass + # ---------------------------------------------------------------------- + # Use local copy for faster speed + print(" -- " + mode + ": Loading from local drive... ") + big_file_name = tmp_patch_dir + mode + "-data-chunked.h5" + + # open big_file and don"t close + big_file = h5py.File(big_file_name, "r") + + x = big_file["x"] + # work arround for h5py loading all things to memory + read_batch_size = 10000 + read_batch_num = int(np.ceil( + float(big_file["x"].shape[0]) / + float(read_batch_size) + )) + + # Manual, since I don't want to bother debugging the below + # fields = ["y", "ID", "pos", "angle", "coords"] + # for var_name in fields: + # # allocate data + # exec("{0} = np.zeros(big_file['{0}'].shape, " + # "dtype=big_file['{0}'].dtype)".format(var_name)) + # # copy data in batches + # for idx_batch in six.moves.xrange(read_batch_num): + # idx_s = idx_batch * read_batch_size + # idx_e = (idx_batch + 1) * read_batch_size + # idx_e = np.minimum(idx_e, big_file["x"].shape[0]) + # exec("{0}[idx_s:idx_e] = np.asarray(big_file['{0}'][idx_s:idx_e])" + # "". format(var_name)) + + # Allocate + y = np.zeros(big_file["y"].shape, dtype=big_file["y"].dtype) + ID = np.zeros(big_file["ID"].shape, dtype=big_file["ID"].dtype) + pos = np.zeros(big_file["pos"].shape, dtype=big_file["pos"].dtype) + angle = np.zeros(big_file["angle"].shape, + dtype=big_file["angle"].dtype) + coords = np.zeros(big_file["coords"].shape, + dtype=big_file["coords"].dtype) + + # Copy data in batches + for idx_batch in six.moves.xrange(read_batch_num): + idx_s = idx_batch * read_batch_size + idx_e = (idx_batch + 1) * read_batch_size + idx_e = np.minimum(idx_e, big_file["x"].shape[0]) + + y[idx_s:idx_e] = np.asarray(big_file['y'][idx_s:idx_e]) + ID[idx_s:idx_e] = np.asarray(big_file['ID'][idx_s:idx_e]) + pos[idx_s:idx_e] = np.asarray(big_file['pos'][idx_s:idx_e]) + angle[idx_s:idx_e] = np.asarray(big_file['angle'][idx_s:idx_e]) + coords[idx_s:idx_e] = np.asarray(big_file['coords'][idx_s:idx_e]) + + # import pdb + # pdb.set_trace() + + # # Make sure data is contiguos + # y = np.ascontiguousarray(y) + # ID = np.ascontiguousarray(ID) + # pos = np.ascontiguousarray(pos) + # angle = np.ascontiguousarray(angle) + # coords = np.ascontiguousarray(coords) + + print(" -- " + mode + ": Done... ") + + return x, y, ID, pos, angle, coords + +# +# eccv.py ends here diff --git a/datasets/eccv2016/helper.py b/datasets/eccv2016/helper.py new file mode 100644 index 0000000..fabb1d3 --- /dev/null +++ b/datasets/eccv2016/helper.py @@ -0,0 +1,831 @@ +# helper.py --- +# +# Filename: helper.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Tue Feb 23 15:18:50 2016 (+0100) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +from __future__ import print_function + +import os + +import cv2 +import h5py # for hdf5 +import numpy as np +import scipy.io # for matlab +import six + +# dump tools +from utils import loadh5, saveh5 +from datasets.eccv2016.math_tools import getIoUOfRectangles +# from Utils.transformations import quaternion_from_matrix + + +# # custom types +# from Utils.custom_types import paramStruct, pathConfig + +# def load_geom(geom_file, geom_type, scale_factor, flip_R=False): +# if geom_type == "calibration": +# # load geometry file +# geom_dict = loadh5(geom_file) +# # Check if principal point is at the center +# K = geom_dict["K"] +# assert(K[0, 2] < 1e-3 and K[1, 2] < 1e-3) +# # Rescale calbration according to previous resizing +# S = np.asarray([[scale_factor, 0, 0], +# [0, scale_factor, 0], +# [0, 0, 1]]) +# K = np.dot(S, K) +# geom_dict["K"] = K +# # Transpose Rotation Matrix if needed +# if flip_R: +# R = geom_dict["R"].T.copy() +# geom_dict["R"] = R +# # append things to list +# geom_list = [] +# geom_info_name_list = ["K", "R", "T", "imsize"] +# for geom_info_name in geom_info_name_list: +# geom_list += [geom_dict[geom_info_name].flatten()] +# # Finally do K_inv since inverting K is tricky with theano +# geom_list += [np.linalg.inv(geom_dict["K"]).flatten()] +# # Get the quaternion from Rotation matrices as well +# q = quaternion_from_matrix(geom_dict["R"]) +# geom_list += [q.flatten()] +# # Also add the inverse of the quaternion +# q_inv = q.copy() +# np.negative(q_inv[1:], q_inv[1:]) +# geom_list += [q_inv.flatten()] +# # Add to list +# geom = np.concatenate(geom_list) +# +# elif geom_type == "homography": +# H = np.loadtxt(geom_file) +# geom = H.flatten() +# +# return geom + + +def create_perturb(orig_pos, nPatchSize, nDescInputSize, fPerturbInfo): + """Create perturbations in the xyz format we use in the keypoint component. + + The return value is in x,y,z where x and y are scaled coordinates that are + between -1 and 1. In fact, the coordinates are scaled so that 1 = 0.5 * + (nPatchSize-1) + patch center. + + In case of the z, it is in log2 scale, where zero corresponds to the + original scale of the keypoint. For example, z of 1 would correspond to 2 + times the original scale of the keypoint. + + Parameters + ---------- + orig_pos: ndarray of float + Center position of the patch in coordinates. + + nPatchSize: int + Patch size of the entire patch extraction region. + + nDescInputSize: int + Patch size of the descriptor support region. + + fPerturbInfo: ndarray + Amount of maximum perturbations we allow in xyz. + + Notes + ----- + The perturbation has to ensure that the *ground truth* support region in + included. + """ + + # Check provided perturb info so that it is guaranteed that it will + # generate perturbations that holds the ground truth support region + assert fPerturbInfo[0] <= 1.0 - float(nDescInputSize) / float(nPatchSize) + assert fPerturbInfo[1] <= 1.0 - float(nDescInputSize) / float(nPatchSize) + + # Generate random perturbations + perturb_xyz = ((2.0 * np.random.rand(orig_pos.shape[0], 3) - 1.0) * + fPerturbInfo.reshape([1, 3])) + + return perturb_xyz + + +def apply_perturb(orig_pos, perturb_xyz, maxRatioScale): + """Apply the perturbation to ``a'' keypoint. + + The xyz perturbation is in format we use in the keypoint component. See + 'create_perturb' for details. + + Parameters + ---------- + orig_pos: ndarray of float (ndim == 1, size 3) + Original position of a *single* keypoint in pixel coordinates. + + perturb_xyz: ndarray of float (ndim == 1, size 3) + The center position in xyz after the perturbation is applied. + + maxRatioScale: float + The multiplier that get's multiplied to scale to get half-width of the + region we are going to crop. + """ + + # assert that we are working with only one sample + assert len(orig_pos.shape) == 1 + assert len(perturb_xyz.shape) == 1 + + # get the new scale + new_pos_s = orig_pos[2] * (2.0**(-perturb_xyz[2])) + + # get xy to pixels conversion + xyz_to_pix = new_pos_s * maxRatioScale + + # Get the new x and y according to scale. Note that we multiply the + # movement we need to take by 2.0**perturb_xyz since we are moving at a + # different scale + new_pos_x = orig_pos[0] - perturb_xyz[0] * 2.0**perturb_xyz[2] * xyz_to_pix + new_pos_y = orig_pos[1] - perturb_xyz[1] * 2.0**perturb_xyz[2] * xyz_to_pix + + perturbed_pos = np.asarray([new_pos_x, new_pos_y, new_pos_s]) + + return perturbed_pos + + +def get_crop_range(xx, yy, half_width): + """Function for retrieving the crop coordinates""" + + xs = np.cast['int'](np.round(xx - half_width)) + xe = np.cast['int'](np.round(xx + half_width)) + ys = np.cast['int'](np.round(yy - half_width)) + ye = np.cast['int'](np.round(yy + half_width)) + + return xs, xe, ys, ye + + +def crop_patch(img, cx, cy, clockwise_rot, resize_ratio, nPatchSize): + """Crops the patch. + + Crops with center at cx, cy with patch size of half_width and resizes to + nPatchSize x nPatchsize patch. + + Parameters + ---------- + img: np.ndarray + Input image to be cropped from. + + cx: float + x coordinate of the patch. + + cy: float + y coordinate of the patch. + + clockwise_rot: float (degrees) + clockwise rotation to apply when extracting + + resize_ratio: float + Ratio of the resize. For example, ratio of two will crop a 2 x + nPatchSize region. + + nPatchSize: int + Size of the returned patch. + + Notes + ----- + + The cv2.warpAffine behaves in similar way to the spatial transformers. The + M matrix should move coordinates from the original image to the patch, + i.e. inverse transformation. + + """ + + # Below equation should give (nPatchSize-1)/2 when M x [cx, 0, 1]', + # 0 when M x [cx - (nPatchSize-1)/2*resize_ratio, 0, 1]', and finally, + # nPatchSize-1 when M x [cx + (nPatchSize-1)/2*resize_ratio, 0, 1]'. + dx = (nPatchSize - 1.0) * 0.5 - cx / resize_ratio + dy = (nPatchSize - 1.0) * 0.5 - cy / resize_ratio + M = np.asarray([[1. / resize_ratio, 0.0, dx], + [0.0, 1. / resize_ratio, dy], + [0.0, 0.0, 1.0]]) + # move to zero base before rotation + R_pre = np.asarray([[1.0, 0.0, -(nPatchSize - 1.0) * 0.5], + [0.0, 1.0, -(nPatchSize - 1.0) * 0.5], + [0.0, 0.0, 1.0]]) + # rotate + theta = clockwise_rot / 180.0 * np.pi + R_rot = np.asarray([[np.cos(theta), -np.sin(theta), 0.0], + [np.sin(theta), np.cos(theta), 0.0], + [0.0, 0.0, 1.0]]) + # move back to corner base + R_post = np.asarray([[1.0, 0.0, (nPatchSize - 1.0) * 0.5], + [0.0, 1.0, (nPatchSize - 1.0) * 0.5], + [0.0, 0.0, 1.0]]) + # combine + R = np.dot(R_post, np.dot(R_rot, R_pre)) + + crop = cv2.warpAffine(img, np.dot(R, M)[:2, :], (nPatchSize, nPatchSize)) + + return crop + + +def load_patches(img, kp_in, y_in, ID_in, angle_in, fRatioScale, fMaxScale, + nPatchSize, nDescInputSize, in_dim, bPerturb, fPerturbInfo, + bReturnCoords=False, nAugmentedRotations=1, + fAugmentRange=180.0, fAugmentCenterRandStrength=0.0, + sAugmentCenterRandMethod="uniform", nPatchSizeAug=None): + '''Loads Patches given img and list of keypoints + + Parameters + ---------- + + img: input grayscale image (or color) + + kp_in: 2-D array of keypoints [nbkp, 3+], Note that data is in order of + [x,y,scale,...] + + y_in: 1-D array of keypoint labels (or target values) + + ID_in: 1-D array of keypoint IDs + + fRatioScale: float + The ratio which gets multiplied to obtain the crop radius. For example + if fRatioScale is (48-1) /2 /2, and the scale is 2, the crop region + will be of size 48x48. Note that -1 is necessary as the center pixel is + inclusive. In our previous implementation regarding ECCV submission, -1 + was negelected. This however should not make a big difference. + + fMaxScale: float + Since we are trying to make scale space learning possible, we give list + of scales for training. This should contain the maximum value, so that + we consider the largest scale when cropping. + + nPatchSize: int + Size of the patch (big one, not to be confused with nPatchSizeKp). + + nDescInputSize: int + Size of the inner patch (descriptor support region). Used for computing + the bounds for purturbing the patch location. + + in_dim: int + Number of dimensions of the input. For example grayscale would mean + `in_dim == 1`. + + bPerturb: boolean + Whether to perturb when extracting patches + + fPerturbInfo: np.array (float) + How much perturbation (in relative scale) for x, y, scale + + bReturnCoord: boolean + Return groundtruth coordinates. Should be set to True for new + implementations. Default is False for backward compatibility. + + nAugmentedRotations: int + Number of augmented rotations (equaly spaced) to be added to the + dataset. The original implementation should be equal to having this + number set to 1. + + fAugmentRange: float (degrees) + The range of the augnmented degree. For example, 180 would mean the + full range, 90 would be the half range. + + fAugmentCenterRandStrength: float (degrees) + The strength of the random to be applied to the center angle + perturbation. + + sAugmentCenterRandMethod: string + Name of the center randomness method. + + nPatchSizeAug: int + Size of the patch when augmenting rotations. This is used to create + smaller perturbations to ensure we do not crop outside the patch. + + ''' + + # get max possible scale ratio + maxRatioScale = fRatioScale * fMaxScale + + # check validity of nPreRotPatchSize + assert nAugmentedRotations >= 1 + # # Since we apply perturbations, we need to be at least sqrt(2) larger than + # # the desired when random augmentations are introduced + # if nAugmentedRotations > 1 or fAugmentCenterRandStrength > 0: + # nInitPatchSize = np.round(np.sqrt(2.0) * nPatchSize).astype(int) + # else: + # nInitPatchSize = nPatchSize + + if nPatchSizeAug is None: + nPatchSizeAug = nPatchSize + assert nPatchSize <= nPatchSizeAug + + # pre-allocate maximum possible array size for data + x = np.zeros((kp_in.shape[0] * nAugmentedRotations, in_dim, + nPatchSizeAug, nPatchSizeAug), dtype='uint8') + y = np.zeros((kp_in.shape[0] * nAugmentedRotations,), dtype='float32') + ID = np.zeros((kp_in.shape[0] * nAugmentedRotations,), dtype='int') + pos = np.zeros((kp_in.shape[0] * nAugmentedRotations, 3), dtype='float') + angle = np.zeros((kp_in.shape[0] * nAugmentedRotations,), dtype='float32') + coords = np.tile(np.zeros_like(kp_in), (nAugmentedRotations, 1)) + + # create perturbations + # Note: the purturbation still considers only the nPatchSize + perturb_xyz = create_perturb(kp_in, nPatchSize, + nDescInputSize, fPerturbInfo) + +# import pdb +# pdb.set_trace() + + # delete perturbations for the negatives (look at kp[6]) + # perturb_xyz[kp_in[:, 6] == 0] = 0 + perturb_xyz[y_in == 0] = 0 + + idxKeep = 0 + for idx in six.moves.xrange(kp_in.shape[0]): + + # current kp position + cur_pos = apply_perturb(kp_in[idx], perturb_xyz[idx], maxRatioScale) + cx = cur_pos[0] + cy = cur_pos[1] + cs = cur_pos[2] + + # retrieve the half width acording to scale + max_hw = cs * maxRatioScale + + # get crop range considering bigger area + xs, xe, ys, ye = get_crop_range(cx, cy, max_hw * np.sqrt(2.0)) + + # boundary check with safety margin + safety_margin = 1 + # if xs < 0 or xe >= img.shape[1] or ys < 0 or ys >= img.shape[0]: + if (xs < safety_margin or xe >= img.shape[1] - safety_margin or + ys < safety_margin or ys >= img.shape[0] - safety_margin): + continue + + # create an initial center orientation + center_rand = 0 + if sAugmentCenterRandMethod == "uniform": + # Note that the below will give zero when + # `fAugmentCenterRandStrength == 0`. This effectively disables the + # random perturbation. + center_rand = ((np.random.rand() * 2.0 - 1.0) * + fAugmentCenterRandStrength) + else: + raise NotImplementedError( + "Unknown random method " + "sAugmentCenterRandMethod = {}".format( + sAugmentCenterRandMethod + ) + ) + + # create an array of rotations to used + rot_diff_list = np.arange(nAugmentedRotations).astype(float) + # compute the step between subsequent rotations + rot_step = 2.0 * fAugmentRange / float(nAugmentedRotations) + rot_diff_list *= rot_step + + for rot_diff in rot_diff_list: + + # the rotation to be applied for this patch + crot_deg = rot_diff + center_rand + crot_rad = crot_deg * np.pi / 180.0 + + # Crop using the crop function + # crop = img[ys:ye, xs:xe] + # x[idxKeep, 0, :, :] = crop_patch( + # img, cx, cy, crot_deg, + # max_hw / (float(nPatchSizeAug - 1) * 0.5), + # nPatchSizeAug) # note the nInitPatchSize + cur_patch = crop_patch( + img, cx, cy, crot_deg, + max_hw / (float(nPatchSizeAug - 1) * 0.5), + nPatchSizeAug) + if len(cur_patch.shape) == 2: + # pdb.set_trace() + cur_patch = cur_patch[..., np.newaxis] + + x[idxKeep] = cur_patch.transpose(2, 0, 1) + + # crop = img[ys:ye, xs:xe] + + # update target value and id + y[idxKeep] = y_in[idx] + ID[idxKeep] = ID_in[idx] + # add crot (in radians), note that we are between -2pi and 0 for + # compatiblity + angle[idxKeep] = ((angle_in[idx] + crot_rad) % (2.0 * np.pi) - + (2.0 * np.pi)) + + # Store the perturbation (center of the patch is 0,0,0) + new_perturb_xyz = perturb_xyz[idx].copy() + xx, yy, zz = new_perturb_xyz + rx = np.cos(crot_rad) * xx - np.sin(crot_rad) * yy + ry = np.sin(crot_rad) * xx + np.cos(crot_rad) * yy + rz = zz + pos[idxKeep] = np.asarray([rx, ry, rz]) + + # store the original pixel coordinates + new_kp_in = kp_in[idx].copy() + new_kp_in[3] = ((new_kp_in[3] + crot_rad) % (2.0 * np.pi) - + (2.0 * np.pi)) + coords[idxKeep] = new_kp_in + + idxKeep += 1 + + # Delete unassigned + x = x[:idxKeep] + y = y[:idxKeep] + ID = ID[:idxKeep] + pos = pos[:idxKeep] + angle = angle[:idxKeep] + coords = coords[:idxKeep] + + if not bReturnCoords: + return x.astype("uint8"), y, ID.astype("int"), pos, angle + else: + return x.astype("uint8"), y, ID.astype("int"), pos, angle, coords + + +def random_mine_non_kp_with_2d_distance(img, pos_kp, scale_hist, + scale_hist_c, param): + """Randomly mines negatives with 2d distance. + + This function looks at raw pixel distances, without considering the + scale. The threshold sould be given by the parameter. This function was our + first atempt which was not really successful. + + Parameters + ---------- + img: np.ndrray + Input image to be mined + + pos_kp: np.ndarray + Positive keypoint locations that we should avoid when mining. Note that + this can be either SfM points, or all SIFT keypoints. + + scale_hist: np.ndarray + Scale distribution to randomly sample scale from. Should be the scale + histogram of positive keypoints. + + scale_hist_c: np.ndarray + Bin center position of the scale histogram. Note that numpy histogram + (by default) stores the cutoff values where we want the center values. + + param: paramStruct object + The parameter object which is read from the configuration. + + """ + + # read param + neg_2_mine = param.dataset.nNegPerImg + neg_dist_th = param.dataset.nNegDistTh + if 'fNegOverlapTh' in param.patch.__dict__.keys(): + raise ValueError('fNegOverlapTh should not be defined ' + 'when using distance method!') + + all_neg_kp = None + + num_iter = 0 + while neg_2_mine > 0: + # random sample positions + neg_kp = np.random.rand(neg_2_mine * 10, 2) # randomly get 10 times + neg_kp *= np.array([img.shape[1] - 1 - 2.0 * neg_dist_th, + img.shape[0] - 1 - 2.0 * neg_dist_th], + dtype='float').reshape([1, 2]) # adjust to borders + neg_kp += neg_dist_th + + # remove too close keypoints + dists = neg_kp.reshape([-1, 2, 1]) - pos_kp[:, :2].reshape([1, 2, -1]) + idx_far_enough = (dists**2).sum(axis=1).min(axis=1) > neg_dist_th + neg_kp = neg_kp[idx_far_enough] + + # random shuffle and take neg_2_mine + idx_shuffle = np.random.permutation(len(neg_kp)) + neg_kp = neg_kp[idx_shuffle[:min(neg_2_mine, len(idx_shuffle))]] + + # concatenate random scale + random_scales = np.random.choice(scale_hist_c, size=(len(neg_kp), 1), + p=scale_hist) + neg_kp = np.concatenate([neg_kp, random_scales], axis=1) + + # concatenate dummy values + neg_kp = np.concatenate([neg_kp, + np.zeros((len(neg_kp), 1)), + -1.0 * np.ones((len(neg_kp), 1)), + np.zeros((len(neg_kp), 1))], axis=1) + + # concatenate to the final list + if all_neg_kp is None: + all_neg_kp = neg_kp + else: + all_neg_kp = np.concatenate([all_neg_kp, neg_kp]) + + # update neg_2_mine + neg_2_mine = param.dataset.nNegPerImg - len(all_neg_kp) + + # update number of iterations + num_iter += 1 + + if num_iter > 100: + raise RuntimeError('I am taking too much') + + neg_kp = all_neg_kp[:param.dataset.nNegPerImg] + + return neg_kp + + +def random_mine_non_kp_with_3d_blocking(img, pos_kp, scale_hist, + scale_hist_c, param, + neg_per_iter=None, + max_iter=100): + """Randomly mines negatives with 3d blocking. + + This function uses overlap and if the overlap / proposed region is larget + than the given threshold, we don't use that area. + + Parameters + ---------- + img: np.ndrray + Input image to be mined + + pos_kp: np.ndarray + Positive keypoint locations that we should avoid when mining. Note that + this can be either SfM points, or all SIFT keypoints. + + scale_hist: np.ndarray + Scale distribution to randomly sample scale from. Should be the scale + histogram of positive keypoints. + + scale_hist_c: np.ndarray + Bin center position of the scale histogram. Note that numpy histogram + (by default) stores the cutoff values where we want the center values. + + param: paramStruct object + The parameter object which is read from the configuration. + + neg_per_iter: int or None (optional) + If not given or set to None, we will mine 10 times the desired amount + at each iteration. This may cause negatives overlapping with each + other. To make sure none of the negatives are overlapping, use 1 for + this argument. + + max_iter: int (optional) + Maximum iterations to the looping to try mining. When he does not find + after this, he will simply stop mining and maybe print a message, but + it will not necessarily crash. I did it this way since this function is + intended to run in a sub-process, which the individual crashes do not + really matter. And also, if you can't find enough negatives after that + mining, maybe you are better off just mining no more. + + """ + + # read param + neg_2_mine = param.dataset.nNegPerImg + neg_overlap_th = param.patch.fNegOverlapTh + if 'nNegDistTh' in param.dataset.__dict__.keys(): + raise ValueError('nNegDistTh should not be defined ' + 'when using distance method!') + # To stay inside the image boundaries + maxRatioScale = param.patch.fRatioScale * param.patch.fMaxScale + # The multiplier to get support region + ratioDescSupport = (param.patch.fRatioScale * + float(param.model.nDescInputSize) / + float(param.patch.nPatchSize)) + + all_neg_kp = None + + num_iter = 0 + while neg_2_mine > 0: + + if neg_per_iter is None: + neg_per_iter = neg_2_mine * 10 + + # randomly sample scales + random_scales = np.random.choice(scale_hist_c, size=(neg_per_iter, 1), + p=scale_hist) + + # random sample positions with scale in mind - this will ensure we are + # in the boundary + neg_kp = np.random.rand(neg_per_iter, 2) # randomly get 10 times + rescale_mtx = np.array([img.shape[1] - 1, + img.shape[0] - 1], + dtype='float').reshape([1, 2]) + rescale_mtx = (rescale_mtx - + 2.0 * maxRatioScale * random_scales.reshape([-1, 1])) + neg_kp *= rescale_mtx + + # concatenate scale + neg_kp = np.concatenate([neg_kp, random_scales], axis=1) + + # compute overlaps + x2 = neg_kp[:, 0] + y2 = neg_kp[:, 1] + r2 = neg_kp[:, 2] * ratioDescSupport + # reshape x2, y2, r2 so that we do all the combinations + x2.shape = [-1, 1] + y2.shape = [-1, 1] + r2.shape = [-1, 1] + x1 = pos_kp[:, 0] + y1 = pos_kp[:, 1] + r1 = pos_kp[:, 2] * ratioDescSupport + # reshape x1, y1, r1 so that we do all the combinations + x1.shape = [1, -1] + y1.shape = [1, -1] + r1.shape = [1, -1] + max_overlap_with_pos = np.max( + getIoUOfRectangles(x1, y1, r1, x2, y2, r2), + axis=1 + ) + bCheckNeg = False + if all_neg_kp is not None: + bCheckNeg = len(all_neg_kp) > 0 + if bCheckNeg: + # overlap with previously mined negatives + x1 = all_neg_kp[:, 0] + y1 = all_neg_kp[:, 1] + r1 = all_neg_kp[:, 2] * ratioDescSupport + # reshape x1, y1, r1 so that we do all the combinations + x1.shape = [1, -1] + y1.shape = [1, -1] + r1.shape = [1, -1] + max_overlap_with_neg = np.max( + getIoUOfRectangles(x1, y1, r1, x2, y2, r2), + axis=1 + ) + # now get overlap in terms of ratio + r2.shape = [-1] + max_overlap = np.maximum(max_overlap_with_pos, + max_overlap_with_neg) + else: + r2.shape = [-1] + max_overlap = max_overlap_with_pos + + # remove too close keypoints + idx_far_enough = max_overlap <= neg_overlap_th + neg_kp = neg_kp[idx_far_enough] + + # random shuffle and take neg_2_mine + idx_shuffle = np.random.permutation(len(neg_kp)) + neg_kp = neg_kp[idx_shuffle[:min(neg_2_mine, len(idx_shuffle))]] + + # concatenate dummy values + neg_kp = np.concatenate([neg_kp, + np.zeros((len(neg_kp), 1)), + -1.0 * np.ones((len(neg_kp), 1)), + np.zeros((len(neg_kp), 1))], axis=1) + + # concatenate to the final list + if all_neg_kp is None: + all_neg_kp = neg_kp + else: + all_neg_kp = np.concatenate([all_neg_kp, neg_kp]) + + # update neg_2_mine + neg_2_mine = param.dataset.nNegPerImg - len(all_neg_kp) + + # update number of iterations + num_iter += 1 + + # # if num_iter == 1 and len(all_neg_kp) == 0: + # import pdb + # pdb.set_trace() + + if num_iter > max_iter: + print('\nRan {} iterations, but could not mine {} on this image' + ''.format(num_iter, neg_2_mine)) + break + + neg_kp = all_neg_kp[:param.dataset.nNegPerImg] + + return neg_kp + + +def get_list_of_img(train_data_dir, dump_data_dir, param, mode): + + # Check if split file exists + split_prefix = train_data_dir + 'split-' + split_prefix += str(param.dataset.nTrainPercent) + '-' + split_prefix += str(param.dataset.nValidPercent) + '-' + split_prefix += str(param.dataset.nTestPercent) + '-' + + # If it does not exist, create one + if not os.path.exists(split_prefix + mode + '.txt'): + + # Read list of images + list_png_file = [] + for files in os.listdir(train_data_dir): + if files.endswith(".png"): + list_png_file = list_png_file + [files] + + # Shuffle the image list + if not os.path.exists(dump_data_dir + 'permute_png_idx.h5'): + print(' -- ' + mode + ': ' + 'Creating new shuffle for reading images') + permute_png_idx = np.random.permutation(len(list_png_file)) + to_save = {"saveval": permute_png_idx} + saveh5(to_save, + dump_data_dir + 'permute_png_idx.h5') + # dt.save(permute_png_idx, + # dump_data_dir + 'permute_png_idx.h5') + else: + print(' -- ' + mode + ': ' + 'Loading shuffle for reading images from ' + '{}'.format(dump_data_dir)) + to_load = loadh5(dump_data_dir + + 'permute_png_idx.h5') + permute_png_idx = to_load["saveval"] + # permute_png_idx = dt.load(dump_data_dir + + # 'permute_png_idx.h5') + + list_png_file = [list_png_file[permute_png_idx[idx]] for idx in + range(len(list_png_file))] + + # Write to file (all three) + f_train = open(split_prefix + 'train.txt', 'w') + f_valid = open(split_prefix + 'valid.txt', 'w') + f_test = open(split_prefix + 'test.txt', 'w') + + train_end = int(float(param.dataset.nTrainPercent) / 100.0 * + len(list_png_file)) + valid_end = int(float(param.dataset.nTrainPercent + + param.dataset.nValidPercent) / 100.0 * + len(list_png_file)) + + for idx_png in six.moves.xrange(len(list_png_file)): + if idx_png > valid_end: + print(list_png_file[idx_png], file=f_test) + elif idx_png > train_end: + print(list_png_file[idx_png], file=f_valid) + else: + print(list_png_file[idx_png], file=f_train) + + f_train.close() + f_valid.close() + f_test.close() + + # Read the list + list_png_file = list(np.loadtxt(split_prefix + mode + '.txt', dtype='str')) + + return list_png_file + + +def get_scale_hist(train_data_dir, param): + + # Check if scale histogram file exists + hist_file_name = train_data_dir + 'scales-histogram-minsc-' \ + + str(param.dataset.fMinKpSize) + '.h5' + if not os.path.exists(hist_file_name): + + # read all positive keypoint scales + list_png_file = [] + for files in os.listdir(train_data_dir): + if files.endswith(".png"): + list_png_file = list_png_file + [files] + all_scales = [] + for png_file in list_png_file: + kp_file_name = train_data_dir + png_file.replace('.png', '_P.mat') + cur_pos_kp = scipy.io.loadmat(kp_file_name)['TFeatures'] + cur_pos_kp = np.asarray(cur_pos_kp, dtype='float') + all_scales += [cur_pos_kp[5, :].flatten()] + all_scales = np.concatenate(all_scales) + + # make histogram + hist, bin_edges = np.histogram(all_scales, bins=100) + hist_c = (bin_edges[1:] + bin_edges[:-1]) * 0.5 + + # save to h5 file + with h5py.File(hist_file_name, 'w') as hist_file: + hist_file['all_scales'] = all_scales + hist_file['histogram_bins'] = hist + hist_file['histogram_centers'] = hist_c + + # Load from the histogram file + with h5py.File(hist_file_name, 'r') as hist_file: + scale_hist = np.asarray(hist_file['histogram_bins'], + dtype=float).flatten() + scale_hist /= np.sum(scale_hist) + scale_hist_c = np.asarray(hist_file['histogram_centers']).flatten() + + return scale_hist, scale_hist_c + +# +# helper.py ends here diff --git a/datasets/eccv2016/math_tools.py b/datasets/eccv2016/math_tools.py new file mode 100644 index 0000000..d37bb65 --- /dev/null +++ b/datasets/eccv2016/math_tools.py @@ -0,0 +1,359 @@ +# math_tools.py --- +# +# Filename: math_tools.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Fri Feb 19 18:04:58 2016 (+0100) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +import numpy as np +import six + + +# ------------------------------------------------------------------------------ +# Math functions +def softmax(val, axis, softmax_strength): + ''' Soft max function used for cost function ''' + + import theano + import theano.tensor as T + floatX = theano.config.floatX + softmax_strength = np.cast[floatX](softmax_strength) + + if softmax_strength < 0: + res_after_max = T.max(val, axis=axis) + else: + res_after_max = np.cast[floatX](1.0) / softmax_strength \ + * T.log(T.mean(T.exp( + softmax_strength * (val - T.max(val, + axis=axis, + keepdims=True)) + ), axis=axis)) \ + + T.max(val, axis=axis) + return res_after_max + + +def softargmax(val, axis, softargmax_strength): + ''' Soft argmax function used for cost function ''' + + import theano + import theano.tensor as T + floatX = theano.config.floatX + + # The implmentation only works for single axis + assert isinstance(axis, int) + + if softargmax_strength < 0: + res = T.argmax(val, axis=axis) + else: + safe_exp = T.exp(softargmax_strength * ( + val - T.max(val, axis=axis, keepdims=True))) + prob = safe_exp / T.sum(safe_exp, axis=axis, keepdims=True) + ind = T.arange(val.shape[axis], dtype=floatX) + res = T.sum(prob * ind, axis=axis) / T.sum(prob, axis=axis) + + return res + + +def batch_inc_mean(data, batch_size, axis=None): + """Computes standard deviation of the data using batches. + + Parameters + ---------- + data: np.ndarray or h5py.Dataset + Data to compute the standar deviation for. + + batch_size: int + Size of the working batch. + + axis: None or tuple (optional) + Axis in which we should take the standar deviation on + + Returns + ------- + mean: np.ndarray or float + Mean. ndarray if axis is not None, float otherwise. + + """ + + if axis is not None: + raise NotImplementedError('Working on arbitrary axis' + ' is not yet implemented') + + else: + numel = np.prod(data.shape) + dim1_batch_size = int(np.floor(batch_size / np.prod(data.shape[1:]))) + num_batch = int(np.ceil(float(data.shape[0]) / float(dim1_batch_size))) + + mean = 0.0 + for idx_batch in six.moves.xrange(num_batch): + idx_s = idx_batch * dim1_batch_size + idx_e = min((idx_batch + 1) * dim1_batch_size, data.shape[0]) + x = data[idx_s:idx_e].flatten() + mean += float(np.sum(x)) + + mean = mean / float(numel) + + return mean + + +def batch_inc_std(data, mean, batch_size, axis=None): + """Computes standard deviation of the data using batches. + + Parameters + ---------- + data: np.ndarray or h5py.Dataset + Data to compute the standar deviation for. + + mean: np.ndarray + Precomputed mean of the data. + + batch_size: int + Size of the working batch. + + axis: None or tuple (optional) + Axis in which we should take the standar deviation on + + Returns + ------- + std: np.ndarray or float + Standar deviation. ndarray if axis is not None, float otherwise. + + """ + + if axis is not None: + raise NotImplementedError('Working on arbitrary axis' + ' is not yet implemented') + + else: + numel = np.prod(data.shape) + dim1_batch_size = int(np.floor(batch_size / np.prod(data.shape[1:]))) + num_batch = int(np.ceil(float(data.shape[0]) / float(dim1_batch_size))) + + M2 = 0.0 + for idx_batch in six.moves.xrange(num_batch): + idx_s = idx_batch * dim1_batch_size + idx_e = min((idx_batch + 1) * dim1_batch_size, data.shape[0]) + x = data[idx_s:idx_e].flatten() + M2 += float(np.sum((x - mean)**2)) + + std = np.sqrt(M2 / float(numel - 1)) + + return std + + +# ------------------------------------------------------------------------------ +# Circle related functions +def getIntersectionOfCircles(x1, y1, r1, x2, y2, r2): + + # see http://mathworld.wolfram.com/Circle-CircleIntersection.html + + d = np.sqrt((x1 - x2)**2 + (y1 - y2)**2) + + # if d >= r1 + r2: + # return 0 + + intersection = np.zeros_like(d) + valid = np.where(d >= r1 + r2)[0] + + d = d[valid] + r1 = r1[valid] + r2 = r2[valid] + + intersection[valid] = ( + r2**2 * np.arccos((d**2 + r2**2 - r1**2) / (2.0 * d * r2)) + + r1**2 * np.arccos((d**2 + r1**2 - r2**2) / (2.0 * d * r1)) - + 0.5 * np.sqrt( + (-d + r2 + r1) * (d + r2 - r1) * (d - r2 + r1) * (d + r2 + r1) + ) + ) + + return intersection + + +def getInterNUnionOfCircles(x1, y1, r1, x2, y2, r2): + + intersection = getIntersectionOfCircles(x1, y1, r1, x2, y2, r2) + union = np.pi * r1**2 + np.pi * r2**2 - intersection + + return intersection, union + + +# ------------------------------------------------------------------------------ +# Rectangle related functions +def getIntersectionOfRectangles(x1, y1, r1, x2, y2, r2): + """Computes Intersection of Rectangles + + Parameters + ---------- + x1: ndarray or tensor + x coordinates of the first rectangle. + + y1: ndarray or tensor + y coordinates of the first rectangle. + + r1: ndarray or tensor + Half width of the first rectangle (just to be consistent with + circle impl). + + x2: ndarray or tensor + x coordinates of the second rectangle. + + y2: ndarray or tensor + y coordinates of the second rectangle. + + r2: ndarray or tensor + Half width of the second rectangle (just to be consistent with + circle impl). + """ + + # use theano or numpy depending on the variable + if isinstance(x1, np.ndarray): + # Checking if ndarray will prevent us from loading theano + # unnecessarily. + cm = np + else: + # import theano + import theano.tensor as T + + cm = T + + # intersection computation + inter_w = cm.minimum(x1 + r1, x2 + r2) - cm.maximum(x1 - r1, x2 - r2) + inter_w = cm.maximum(0, inter_w) + inter_h = cm.minimum(y1 + r1, y2 + r2) - cm.maximum(y1 - r1, y2 - r2) + inter_h = cm.maximum(0, inter_h) + + return inter_w * inter_h + + +def getIoUOfRectangles(x1, y1, r1, x2, y2, r2): + """Computes Intersection over Union of Rectangles + + Parameters + ---------- + x1: ndarray or tensor + x coordinates of the first rectangle. + + y1: ndarray or tensor + y coordinates of the first rectangle. + + r1: ndarray or tensor + Half width of the first rectangle (just to be consistent with + circle impl). + + x2: ndarray or tensor + x coordinates of the second rectangle. + + y2: ndarray or tensor + y coordinates of the second rectangle. + + r2: ndarray or tensor + Half width of the second rectangle (just to be consistent with + circle impl). + """ + + # compute intersection and union + intersection = getIntersectionOfRectangles(x1, y1, r1, x2, y2, r2) + union = (2.0 * r1)**2.0 + (2.0 * r2)**2.0 - intersection + + return intersection / union + + +# ----------------------------------------------------------------------------- +# For normalization +def _subtractive_norm_make_coef(norm_kernel, input_hw): + """Creates the coef matrix for accounting borders when applying + subtractive normalization. + + Parameters + ---------- + norm_kernel : np.ndarray, 2d + Normalized kernel applied for subtractive normalization. + + input_hw : tuple + Height and width of the input image (patch) for the + SubtractiveNormalizationLayer. + + """ + + assert np.isclose(np.sum(norm_kernel), 1.0) + + # This allows our mean computation to compensate for the border area, + # where you have less terms adding up. Torch used convolution with a + # ``one'' image, but since we do not want the library to depend on + # other libraries with convolutions, we do it manually here. + coef = np.ones(input_hw, dtype='float32') + pad_x = norm_kernel.shape[1] // 2 + pad_y = norm_kernel.shape[0] // 2 + + # Corners + # for the top-left corner + tl_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::-1, ::-1], axis=0), axis=1)[::1, ::1] + coef[:pad_y + 1, :pad_x + 1] = tl_cumsum_coef[pad_y:, pad_x:] + # for the top-right corner + tr_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::-1, ::1], axis=0), axis=1)[::1, ::-1] + coef[:pad_y + 1, -pad_x - 1:] = tr_cumsum_coef[pad_y:, :pad_x + 1] + # for the bottom-left corner + bl_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::1, ::-1], axis=0), axis=1)[::-1, ::1] + coef[-pad_y - 1:, :pad_x + 1] = bl_cumsum_coef[:pad_y + 1, pad_x:] + # for the bottom-right corner + br_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::1, ::1], axis=0), axis=1)[::-1, ::-1] + coef[-pad_y - 1:, -pad_x - 1:] = br_cumsum_coef[:pad_y + 1, :pad_x + 1] + + # Sides + tb_slice = slice(pad_y + 1, -pad_y - 1) + # for the left side + fill_value = tl_cumsum_coef[-1, pad_x:] + coef[tb_slice, :pad_x + 1] = fill_value.reshape([1, -1]) + # for the right side + fill_value = br_cumsum_coef[0, :pad_x + 1] + coef[tb_slice, -pad_x - 1:] = fill_value.reshape([1, -1]) + lr_slice = slice(pad_x + 1, -pad_x - 1) + # for the top side + fill_value = tl_cumsum_coef[pad_y:, -1] + coef[:pad_y + 1, lr_slice] = fill_value.reshape([-1, 1]) + # for the right side + fill_value = br_cumsum_coef[:pad_y + 1, 0] + coef[-pad_y - 1:, lr_slice] = fill_value.reshape([-1, 1]) + + # # code for validation of above + # img = np.ones_like(input, dtype='float32') + # import cv2 + # coef_cv2 = cv2.filter2D(img, -1, norm_kernel, + # borderType=cv2.BORDER_CONSTANT) + + return coef + +_lcn_make_coef = _subtractive_norm_make_coef + + +# +# math_tools.py ends here diff --git a/datasets/eccv2016/wrapper.py b/datasets/eccv2016/wrapper.py new file mode 100644 index 0000000..a003211 --- /dev/null +++ b/datasets/eccv2016/wrapper.py @@ -0,0 +1,127 @@ +# wrapper.py --- +# +# Filename: wrapper.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jun 29 14:55:21 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import numpy as np + +import datasets.eccv2016.eccv as old_impl +from datasets.eccv2016.custom_types import paramStruct +from utils import get_patch_size, get_patch_size_no_aug, get_ratio_scale + + +def config_to_param(config): + """The function that takes care of the transfer to the new framework""" + + param = paramStruct() + + # Param Group "dataset" + param.dataset.nTestPercent = int(20) + param.dataset.dataType = "ECCV" + param.dataset.nValidPercent = int(20) + param.dataset.fMinKpSize = float(2.0) + param.dataset.nPosPerImg = int(-1) + # Note that we are passing a list. This module actually supports + # concatenating datsets. + param.dataset.trainSetList = ["ECCV/" + config.data_name] + param.dataset.nNegPerImg = int(1000) + param.dataset.nTrainPercent = int(60) + + # Param Group "patch" + if config.old_data_compat: + param.patch.nPatchSize = int(get_patch_size(config)) + else: + param.patch.nPatchSize = int(get_patch_size_no_aug(config)) + param.patch.nPatchSizeAug = int(get_patch_size(config)) + param.patch.noscale = False + param.patch.fNegOverlapTh = float(0.1) + param.patch.sNegMineMethod = "use_all_SIFT_points" + param.patch.fRatioScale = float(get_ratio_scale(config)) + param.patch.fPerturbInfo = np.array([0.2, 0.2, 0.0]).astype(float) + if config.old_data_compat: + param.patch.nMaxRandomNegMineIter = int(500) + else: + param.patch.nMaxRandomNegMineIter = int(100) + param.patch.fMaxScale = 1.0 + param.patch.bPerturb = 1.0 + + # Param Group "model" + param.model.nDescInputSize = int(config.desc_input_size) + + # override folders from config + setattr(param, "data_dir", config.data_dir) + setattr(param, "temp_dir", config.temp_dir) + setattr(param, "scratch_dir", config.scratch_dir) + + return param + + +class Wrapper(object): + """The wrapper class for eccv data. + + Since we want to re-use the code for loading and storing data, but do *not* + want to bother integrating it into the new framework, we simply create a + wrapper on top, which translates the old format to the new one. This is + likely to cause some unecessary overhead, but is not so critical right now. + + The role of this class is to: + + * Create a dictionary that allows easy access to raw data + + """ + + def __init__(self, config, rng): + + # Placeholder for the data dictionary + self.data = {} + + # Use the old data module to load data. Processing data loading for + # differen tasks + for task in ["train", "valid", "test"]: + param = config_to_param(config) + old_data = old_impl.data_obj(param, task) + # Some sanity check to make sure that the data module is behaving + # as intended. + assert old_data.patch_height == old_data.patch_width + assert old_data.patch_height == get_patch_size(config) + assert old_data.num_channel == config.nchannel + assert old_data.out_dim == config.nchannel + self.data[task] = { + "patch": old_data.x, + "xyz": old_data.pos, + "angle": old_data.angle.reshape(-1, 1), + "ID": old_data.ID, + } + + # data ordering of this class + self.data_order = "NCHW" + + # Save the hash, for the pairs folder + self.hash = old_data.hash +# +# wrapper.py ends here diff --git a/datasets/lift.py b/datasets/lift.py new file mode 100644 index 0000000..611099f --- /dev/null +++ b/datasets/lift.py @@ -0,0 +1,334 @@ +# lift.py --- +# +# Filename: lift.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Wed Jul 5 19:29:17 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +from __future__ import print_function + +import importlib +import os +import sys + +import numpy as np +import h5py + +from six.moves import xrange +from utils import loadh5, saveh5 + + +class Dataset(object): + """The Dataset Module + + LATER: For validation data, next_batch should also consider the pos-to-neg + ratio. For now, we just weight them + + LATER: Smarter cache system + LATER: Optimize LUT generation + + """ + + def __init__(self, config, rng): + self.config = config + self.rng = rng + + # Hard-code the 3D distance threshold + # 3D points can be too close to be non-matches + if self.config.data_type == "chalmers": + if self.config.data_name == "oxford": + self.th_dist = 2e-3 + else: + raise RuntimeError( + "Unknown data_name: '{}'".format( + self.config.data_name)) + + # Look Up Tables + self.LUT_kp = {} + self.LUT_nonkp = {} + if hasattr(self, "th_dist"): + self.LUT_below_th = {} + + # Initialize the appropriate data module + self.data_module = importlib.import_module( + "datasets.{}.wrapper".format(self.config.data_type)) + + # Initialize the wrapper class. This should also make the data + # available on demand. For now, we just get the handles to the data + # stored in h5 files since it's too large + self.data_wrapper = self.data_module.Wrapper(config, rng) + # Alias data for easy access + self.data = self.data_wrapper.data + + # Overwrite pairs directory with dataset name + self.pair_dir = '{}/{}/{}/'.format( + config.pair_dir, config.data_type, config.data_name) + # And with the hash + self.pair_dir = '{}/{}'.format( + self.pair_dir, self.data_wrapper.hash) + + # Pointers for each task + self.batch_start = {} + self.epoch = {} + self.ind = {} + self.pairs = {} + for task in self.data: + # Counters to indicate the position of the batch in epoch + self.batch_start[task] = 0 + self.epoch[task] = 0 + + # Mark LUT to un-init + self.LUT_kp[task] = None + self.LUT_nonkp[task] = None + if hasattr(self, "th_dist"): + self.LUT_below_th[task] = None + + # Create initial pairs + self.pairs[task] = self._create_pairs( + task, self.config.pair_use_cache) + # Create Initial random permutation + self.ind[task] = np.random.permutation( + len(self.pairs[task]["P1"])) + + def next_batch(self, task, subtask, batch_size, aug_rot=0, num_empty=0): + + # Dictionary to return + cur_data = {} + + assert batch_size > 0 + + # Check if we can fill the data + if self.batch_start[task] + batch_size >= len(self.ind[task]): + # If we can't shuffle the indices, reset pointer, and increase + # epoch + self.ind[task] = np.random.permutation( + len(self.pairs[task]["P1"])) + self.batch_start[task] = 0 + self.epoch[task] += 1 + # Every N epochs, recreate pairs. Note that the number of pairs + # will NEVER change + if self.config.pair_interval > 0 and task == "train": + # Always active for oxford (small set) + if self.config.data_name is "oxford" or self.config.regen_pairs: + if self.epoch[task] % self.config.pair_interval == 0: + self.pairs[task] = self._create_pairs( + task, self.config.pair_use_cache, overwrite=False) + + # Pack the data according to pairs + for _type in ["patch", "xyz", "angle"]: + cur_data[_type] = {} + for _name in ["P1", "P2", "P3", "P4"]: + _pair = self.pairs[task][_name] + cur_data[_type][_name] = np.asarray([ + self.data[task][_type][_pair[ + self.ind[task][_i + self.batch_start[task]] + ]] for _i in xrange(batch_size) + ]) + + # Work-around for partial batches: add empty samples + if num_empty > 0: + cur_data[_type][_name].resize( + (cur_data[_type][_name].shape[0] + num_empty,) + + cur_data[_type][_name].shape[1:]) + + # If it's the patch, make it into NHWC format + if _type == "patch": + if self.data_wrapper.data_order == "NCHW": + cur_data[_type][_name] = np.array( + np.transpose(cur_data[_type][_name], + (0, 2, 3, 1)) + ) + + # Augment rotations: -1 (zeros), 0 (no aug ST, do nothing), 1 (random) + # Allows us to separate train/val (not doing it for now) + if aug_rot > 0: + cur_data["aug_rot"] = {} + for _name in ["P1", "P2", "P3", "P4"]: + rot = 2 * np.pi * np.random.rand(batch_size) + cur_data["aug_rot"][_name] = {} + cur_data["aug_rot"][_name]["angle"] = rot + cur_data["aug_rot"][_name]["cs"] = np.concatenate( + (np.cos(rot)[..., None], np.sin(rot)[..., None]), + axis=1) + elif aug_rot < 0: + cur_data["aug_rot"] = {} + for _name in ["P1", "P2", "P3", "P4"]: + rot = np.zeros(batch_size) + cur_data["aug_rot"][_name] = {} + cur_data["aug_rot"][_name]["angle"] = rot + cur_data["aug_rot"][_name]["cs"] = np.concatenate( + (np.cos(rot)[..., None], np.sin(rot)[..., None]), + axis=1) + + # Update the batch start location + self.batch_start[task] += batch_size + + return cur_data + + def _create_pairs(self, task, use_cache=True, overwrite=False): + """Generate the pairs from ID + + This function should return the pair indices given the task. The return + type is expected to be a dictionary, where you can access by doing + res["P1"], for example. + + """ + + # Use the cache file if asked + if use_cache: + pairs_file = os.path.join( + self.pair_dir, "{}.h5".format(task)) + if not os.path.exists(self.pair_dir): + os.makedirs(self.pair_dir) + if os.path.exists(pairs_file) and not overwrite: + if self.config.use_augmented_set: + # Add rotation augmentation if it does not exist (compat) + _f = h5py.File(pairs_file, "r+") + for name in ["P1", "P2", "P3", "P4"]: + if (name + "_rot_aug") not in _f: + _f.create_dataset( + name + "_rot_aug", data=2 * + np.pi * self.rng.rand(_f[name].size)) + _f.close() + return loadh5(pairs_file) + + # Make a lookup table for getting the indices of each sample. This can + # be easily done by sorting by ID, and storing the indices to a + # dictionary + if self.LUT_kp[task] is None: + # Create empty dictionary + self.LUT_kp[task] = {} + self.LUT_nonkp[task] = {} + if hasattr(self, "th_dist"): + self.LUT_below_th[task] = {} + # Build list of indices + dist_idx_reverse = np.zeros( + self.data[task]["ID"].max() + 1, + dtype=np.int64) + dist_idx_reverse[self.data[task]["dist_idx"]] = range( + 1, self.data[task]["dist_idx"].size + 1) + dist_idx_reverse -= 1 + # Argsort the ID + print("[{}] sorting IDs...".format(task)) + ID = self.data[task]["ID"] + ind = np.argsort(ID) + # Go through them one by one -- the easy way + print("[{}] building LUT...".format(task)) + start = 0 + start_ID = ID[ind[start]] + for end in xrange(1, len(ind)): + if end % 100 == 0: + print("\r --- working on {}/{}".format( + end + 1, len(ind)), end="") + sys.stdout.flush() + + # Check if indices have changed or we reached the end + if start_ID != ID[ind[end]] or end == len(ind) - 1: + # Store them in proper LUT + if start_ID < 0: + self.LUT_nonkp[task][-1] = ind[start:end] + else: + self.LUT_kp[task][start_ID] = ind[start:end] + if hasattr(self, "th_dist"): + v = dist_idx_reverse[start_ID] + assert v >= 0, "Invalid index" + d = self.data[task]["dist_mat"][v] + # 3D points are 1-indexed + self.LUT_below_th[task][start_ID] = 1 + \ + np.where((d > 0) & (d < self.th_dist))[0] + # Update start position + start = end + start_ID = ID[ind[start]] + print("\r --- done. ") + + # The dictionary to return + cur_pairs = {} + for name in ["P1", "P2", "P3", "P4"]: + cur_pairs[name] = [] + + # For each kp item, create a random pair + # + # Note that this is different from what we used to do. This way seems + # better as we will look at each keypoint once. + print("[{}] creating pairs...".format(task)) + num_kp = len(self.LUT_kp[task]) + for i in xrange(num_kp): + if i % 100 == 0: + print("\r --- working on {}/{}".format( + i + 1, num_kp), end="") + sys.stdout.flush() + _kp = list(self.LUT_kp[task].keys())[i] + + # Check if we have enough views of this point and skip + if len(self.LUT_kp[task][_kp]) < 2: + continue + + # For P1 and P2 -- Select two random patches + P1, P2 = self.rng.choice( + self.LUT_kp[task][_kp], 2, replace=False) + + # For P3 -- Select a different keypoint randomly + # Generate a list of keys + valid_keys = set(self.LUT_kp[task]) + # Remove self + valid_keys -= set([_kp]) + # Remove points below the distance threshold + if hasattr(self, "th_dist"): + valid_keys -= set(valid_keys.intersection( + self.LUT_below_th[task][_kp])) + + _kp2 = self.rng.choice(list(valid_keys)) + P3 = self.rng.choice(self.LUT_kp[task][_kp2], 1)[0] + + # For P4 -- Select a random nonkp + P4 = self.rng.choice(self.LUT_nonkp[task][-1], 1)[0] + + # Append to the list + for name in ["P1", "P2", "P3", "P4"]: + cur_pairs[name].append(eval(name)) + print("\r --- done. ") + + # Convert them to np arrays + for name in ["P1", "P2", "P3", "P4"]: + cur_pairs[name] = np.asarray(cur_pairs[name]) + + # Augment pairs with rotation data + if self.config.use_augmented_set: + for name in ["P1", "P2", "P3", "P4"]: + cur_pairs[name + "_rot_aug"] = 2 * np.pi * \ + self.rng.rand(cur_pairs[name].size) + + # Save to cache if asked + if use_cache: + if not os.path.exists(self.pair_dir): + os.makedirs(self.pair_dir) + saveh5(cur_pairs, pairs_file) + + return cur_pairs + +# +# lift.py ends here diff --git a/datasets/test.py b/datasets/test.py new file mode 100644 index 0000000..defafdc --- /dev/null +++ b/datasets/test.py @@ -0,0 +1,150 @@ +# test.py --- +# +# Filename: test.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 13:43:44 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import os +import time +from copy import deepcopy + +import cv2 +import numpy as np + +from datasets.eccv2016.helper import load_patches +from utils import IDX_ANGLE, get_patch_size, get_ratio_scale, loadKpListFromTxt + + +class Dataset(object): + """The Dataset Module + + + """ + + def __init__(self, config, rng): + self.config = config + self.rng = rng + + def load_image(self): + """ Returns the image to work with """ + + image_file_name = self.config.test_img_file + + # ------------------------------------------------------------------------ + # Run learned network + start_time = time.clock() + # resize_scale = 1.0 + # If there is not gray image, load the color one and convert to gray + # read the image + if os.path.exists(image_file_name.replace( + "image_color", "image_gray" + )) and "image_color" in image_file_name: + image_gray = cv2.imread(image_file_name.replace( + "image_color", "image_gray" + ), 0) + image_color = deepcopy(image_gray) + image_resized = image_gray + # ratio_h = float( + # image_resized.shape[0]) / float(image_gray.shape[0]) + # ratio_w = float( + # image_resized.shape[1]) / float(image_gray.shape[1]) + else: + # read the image + image_color = cv2.imread(image_file_name) + image_resized = image_color + # ratio_h = float( + # image_resized.shape[0]) / float(image_color.shape[0]) + # ratio_w = float( + # image_resized.shape[1]) / float(image_color.shape[1]) + image_gray = cv2.cvtColor( + image_resized, + cv2.COLOR_BGR2GRAY).astype("float32") + + assert len(image_gray.shape) == 2 + + end_time = time.clock() + load_prep_time = (end_time - start_time) * 1000.0 + print("Time taken to read and prepare the image is {} ms".format( + load_prep_time + )) + + return image_color, image_gray, load_prep_time + + def load_data(self): + """Returns the patch, given the keypoint structure + + LATER: Cleanup. We currently re-use the utils we had from data + extraction. + + """ + + # Load image + img = cv2.imread(self.config.test_img_file) + # If color image, turn it to gray + if len(img.shape) == 3: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + in_dim = 1 + + # Load keypoints + kp = np.asarray(loadKpListFromTxt(self.config.test_kp_file)) + + # Use load patches function + # Assign dummy values to y, ID, angle + y = np.zeros((len(kp),)) + ID = np.zeros((len(kp),), dtype='int64') + # angle = np.zeros((len(kp),)) + angle = np.pi / 180.0 * kp[:, IDX_ANGLE] # store angle in radians + + # load patches with id (drop out of boundary) + bPerturb = False + fPerturbInfo = np.zeros((3,)) + dataset = load_patches(img, kp, y, ID, angle, + get_ratio_scale(self.config), 1.0, + int(get_patch_size(self.config)), + int(self.config.desc_input_size), in_dim, + bPerturb, fPerturbInfo, bReturnCoords=True) + + # Change old dataset return structure to necessary data + x = dataset[0] + # y = dataset[1] + # ID = dataset[2] + pos = dataset[3] + angle = dataset[4] + coords = dataset[5] + + # Return the dictionary structure + cur_data = {} + cur_data["patch"] = np.transpose(x, (0, 2, 3, 1)) # In NHWC + cur_data["kps"] = coords + cur_data["xyz"] = pos + # Make sure that angle is a Nx1 vector + cur_data["angle"] = np.reshape(angle, (-1, 1)) + + return cur_data + +# +# test.py ends here diff --git a/layers.py b/layers.py new file mode 100644 index 0000000..bd95b40 --- /dev/null +++ b/layers.py @@ -0,0 +1,316 @@ +# layers.py --- +# +# Filename: layers.py +# Description: Special layers not included in tensorflow +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Thu Jun 29 12:23:35 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +import numpy as np +import tensorflow as tf +import tensorflow.contrib.layers as tcl + +from utils import get_tensor_shape, get_W_b_conv2d, get_W_b_fc + + +def leaky_relu(x, alpha=0.2): + return tf.maximum(tf.minimum(0.0, alpha * x), x) + + +def relu(x): + return tf.nn.relu(x) + + +def batch_norm(x, training, data_format="NHWC"): + # return tcl.batch_norm(x, center=True, scale=True, is_training=training, data_format=data_format) + if data_format == "NHWC": + axis = -1 + else: + axis = 1 + return tf.layers.batch_normalization(x, training=training, trainable=False, axis=axis) + + +def norm_spatial_subtractive(inputs, sub_kernel, data_format="NHWC"): + """Performs the spatial subtractive normalization + + Parameters + ---------- + + inputs: tensorflow 4D tensor, NHWC format + + input to the network + + sub_kernel: numpy.ndarray, 2D matrix + + the subtractive normalization kernel + + """ + + raise NotImplementedError( + "This function is buggy! don't use before extensive debugging!") + + # ---------- + # Normalize kernel. + # Note that unlike Torch, we don't divide the kernel here. We divide + # when it is fed to the convolution, since we use it to generate the + # coefficient map. + kernel = sub_kernel.astype("float32") + norm_kernel = (kernel / np.sum(kernel)) + + # ---------- + # Compute the adjustment coef. + # This allows our mean computation to compensate for the border area, + # where you have less terms adding up. Torch used convolution with a + # ``one'' image, but since we do not want the library to depend on + # other libraries with convolutions, we do it manually here. + input_shape = get_tensor_shape(inputs) + assert len(input_shape) == 4 + if data_format == "NHWC": + coef = np.ones(input_shape[1:3], dtype="float32") + else: + coef = np.ones(input_shape[2:], dtype="float32") + pad_x = norm_kernel.shape[1] // 2 + pad_y = norm_kernel.shape[0] // 2 + + # Corners + # for the top-left corner + tl_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::-1, ::-1], axis=0), axis=1)[::1, ::1] + coef[:pad_y + 1, :pad_x + 1] = tl_cumsum_coef[pad_y:, pad_x:] + # for the top-right corner + tr_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::-1, ::1], axis=0), axis=1)[::1, ::-1] + coef[:pad_y + 1, -pad_x - 1:] = tr_cumsum_coef[pad_y:, :pad_x + 1] + # for the bottom-left corner + bl_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::1, ::-1], axis=0), axis=1)[::-1, ::1] + coef[-pad_y - 1:, :pad_x + 1] = bl_cumsum_coef[:pad_y + 1, pad_x:] + # for the bottom-right corner + br_cumsum_coef = np.cumsum(np.cumsum( + norm_kernel[::1, ::1], axis=0), axis=1)[::-1, ::-1] + coef[-pad_y - 1:, -pad_x - 1:] = br_cumsum_coef[:pad_y + 1, :pad_x + 1] + + # Sides + tb_slice = slice(pad_y + 1, -pad_y - 1) + # for the left side + fill_value = tl_cumsum_coef[-1, pad_x:] + coef[tb_slice, :pad_x + 1] = fill_value.reshape([1, -1]) + # for the right side + fill_value = br_cumsum_coef[0, :pad_x + 1] + coef[tb_slice, -pad_x - 1:] = fill_value.reshape([1, -1]) + lr_slice = slice(pad_x + 1, -pad_x - 1) + # for the top side + fill_value = tl_cumsum_coef[pad_y:, -1] + coef[:pad_y + 1, lr_slice] = fill_value.reshape([-1, 1]) + # for the right side + fill_value = br_cumsum_coef[:pad_y + 1, 0] + coef[-pad_y - 1:, lr_slice] = fill_value.reshape([-1, 1]) + + # # code for validation of above + # img = np.ones_like(input, dtype='float32') + # import cv2 + # coef_cv2 = cv2.filter2D(img, -1, norm_kernel, + # borderType=cv2.BORDER_CONSTANT) + + # ---------- + # Extract convolutional mean + # Make filter a c01 filter by repeating. Note that we normalized above + # with the number of repetitions we are going to do. + if data_format == "NHWC": + norm_kernel = np.tile(norm_kernel, [input_shape[-1], 1, 1]) + else: + norm_kernel = np.tile(norm_kernel, [input_shape[1], 1, 1]) + # Re-normlize the kernel so that the sum is one. + norm_kernel /= np.sum(norm_kernel) + # add another axis in from to make oc01 filter, where o is the number + # of output dimensions (in our case, 1!) + norm_kernel = norm_kernel[np.newaxis, ...] + # # To pad with zeros, half the size of the kernel (only for 01 dims) + # border_mode = tuple(s // 2 for s in norm_kernel.shape[2:]) + # Convolve the mean filter. Results in shape of (batch_size, + # input_shape[1], input_shape[2], 1). + # For tensorflow, the kernel shape is 01co, which is different.... why?! + conv_mean = tf.nn.conv2d( + inputs, + norm_kernel.astype("float32").transpose(2, 3, 1, 0), + strides=[1, 1, 1, 1], + padding="SAME", + data_format=data_format, + ) + + # ---------- + # Adjust convolutional mean with precomputed coef + # This is to prevent border values being too small. + if data_format == "NHWC": + coef = coef[None][..., None].astype("float32") + else: + coef = coef[None, None].astype("float32") + adj_mean = conv_mean / coef + # # Make second dimension broadcastable as we are going to + # # subtract for all channels. + # adj_mean = T.addbroadcast(adj_mean, 1) + + # ---------- + # Subtract mean + sub_normalized = inputs - adj_mean + + # # line for debugging + # test = theano.function(inputs=[input], outputs=[sub_normalized]) + + return sub_normalized + + +def pool_l2(inputs, ksize, stride, padding, data_format="NHWC"): + """L2 pooling, NHWC""" + + if data_format == "NHWC": + ksizes = [1, ksize, ksize, 1] + strides = [1, stride, stride, 1] + else: + ksizes = [1, 1, ksize, ksize] + strides = [1, 1, stride, stride] + + scaler = tf.cast(ksize * ksize, tf.float32) + + return tf.sqrt( + scaler * # Multiply since we want to sum + tf.nn.avg_pool( + tf.square(inputs), + ksize=ksizes, + strides=strides, + padding=padding, + data_format=data_format, + )) + + +def pool_max(inputs, ksize, stride, padding, data_format="NHWC"): + """max pooling, NHWC""" + + if data_format == "NHWC": + ksizes = [1, ksize, ksize, 1] + strides = [1, stride, stride, 1] + else: + ksizes = [1, 1, ksize, ksize] + strides = [1, 1, stride, stride] + + return tf.nn.max_pool( + inputs, + ksize=ksizes, + strides=strides, + padding=padding, + data_format=data_format, + ) + + +def conv_2d(inputs, ksize, nchannel, stride, padding, data_format="NHWC"): + """conv 2d, NHWC""" + + if data_format == "NHWC": + fanin = get_tensor_shape(inputs)[-1] + strides = [1, stride, stride, 1] + else: + fanin = get_tensor_shape(inputs)[1] + strides = [1, 1, stride, stride] + W, b = get_W_b_conv2d(ksize=ksize, fanin=fanin, fanout=nchannel) + conv = tf.nn.conv2d( + inputs, W, strides=strides, + padding=padding, data_format=data_format) + + return tf.nn.bias_add(conv, b, data_format=data_format) + + +def fc(inputs, fanout): + """fully connected, NC """ + + inshp = get_tensor_shape(inputs) + fanin = np.prod(inshp[1:]) + + # Flatten input if needed + if len(inshp) > 2: + inputs = tf.reshape(inputs, (inshp[0], fanin)) + + W, b = get_W_b_fc(fanin=fanin, fanout=fanout) + mul = tf.matmul(inputs, W) + + return tf.nn.bias_add(mul, b) + + +def ghh(inputs, num_in_sum, num_in_max, data_format="NHWC"): + """GHH layer + + LATER: Make it more efficient + + """ + + # Assert NHWC + assert data_format == "NHWC" + + # Check validity + inshp = get_tensor_shape(inputs) + num_channels = inshp[-1] + pool_axis = len(inshp) - 1 + assert (num_channels % (num_in_sum * num_in_max)) == 0 + + # Abuse cur_in + cur_in = inputs + + # # Okay the maxpooling and avgpooling functions do not like weird + # # pooling. Just reshape to avoid this issue. + # inshp = get_tensor_shape(inputs) + # numout = int(inshp[1] / (num_in_sum * num_in_max)) + # cur_in = tf.reshape(cur_in, [ + # inshp[0], numout, num_in_sum, num_in_max, inshp[2], inshp[3] + # ]) + + # Reshaping does not work for undecided input sizes. use split instead + cur_ins_to_max = tf.split( + cur_in, num_channels // num_in_max, axis=pool_axis) + + # Do max and concat them back + cur_in = tf.concat([ + tf.reduce_max(cur_ins, axis=pool_axis, keep_dims=True) for + cur_ins in cur_ins_to_max + ], axis=pool_axis) + + # Create delta + delta = (1.0 - 2.0 * (np.arange(num_in_sum) % 2)).astype("float32") + delta = tf.reshape(delta, [1] * (len(inshp) - 1) + [num_in_sum]) + + # Again, split into multiple pieces + cur_ins_to_sum = tf.split( + cur_in, num_channels // (num_in_max * num_in_sum), + axis=pool_axis) + + # Do delta multiplication, sum, and concat them back + cur_in = tf.concat([ + tf.reduce_sum(cur_ins * delta, axis=pool_axis, keep_dims=True) for + cur_ins in cur_ins_to_sum + ], axis=pool_axis) + + return cur_in + +# +# layers.py ends here diff --git a/losses.py b/losses.py new file mode 100644 index 0000000..46b58f0 --- /dev/null +++ b/losses.py @@ -0,0 +1,129 @@ +# losses.py --- +# +# Filename: losses.py +# Description: WRITEME +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Wed Jun 28 20:06:43 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import tensorflow as tf + +from utils import get_rect_inter, rebase_xyz + + +def loss_overlap(kp_pos1, gt_pos1, kp_pos2, gt_pos2, r_base, + alpha_non_overlap=1.0): + """Loss from the overlap between keypoints detected in P1 and P2 + + Note that due to the random perturbation, we need to compensate back the + scale and the positions. + + Parameters + ---------- + + WRITEME + + """ + + # Rebase the coordinates to the same base. + xyz1 = rebase_xyz(kp_pos1, gt_pos1) + xyz2 = rebase_xyz(kp_pos2, gt_pos2) + + # x,y and r for first patch + scaler = 2.0**xyz1[:, 2] + x1 = xyz1[:, 0] * scaler + y1 = xyz1[:, 1] * scaler + r1 = r_base * scaler + + # x,y and r for second patch + scaler = 2.0**xyz2[:, 2] + x2 = xyz2[:, 0] * scaler + y2 = xyz2[:, 1] * scaler + r2 = r_base * scaler + + # compute intersection and union + intersection = get_rect_inter(x1, y1, r1, x2, y2, r2) + union = (2.0 * r1)**2.0 + (2.0 * r2)**2.0 - intersection + + # add to cost (this has max value of 1) + cost = 1.0 - intersection / union + + # compute the non-overlapping region cost + dx = abs(x1 - x2) + gap_x = tf.nn.relu(dx - (r1 + r2)) + dy = abs(y1 - y2) + gap_y = tf.nn.relu(dy - (r1 + r2)) + + # divide by the sqrt(union) to more-or-less be in same range as + # above cost + cost += alpha_non_overlap * (gap_x + gap_y) / (union**0.5) + + return cost + + +def loss_classification(s1, s2, s3, s4): + """Loss from the classification + + Note s1, s2, and s3 are positive whereas s4 is negative. We therefore need + to balance the cost that comes out of this. The original implementation for + doing this is not identical to the new implementation, which may cause some + minor differences. + + """ + + # Get cost with l2 hinge loss + cost_p = tf.add_n([ + tf.nn.relu(1.0 - s1), tf.nn.relu(1.0 - s2), tf.nn.relu(1.0 - s3)]) + cost_n = tf.nn.relu(1.0 + s4) + # Make sure the elements sum to one. i.e. balance them. + cost = cost_p / 6.0 + cost_n / 2.0 + + return cost + + +def loss_desc_pair(d1, d2): + """Loss using the euclidean distance + + """ + + # return tf.norm(d1 - d2, ord="euclidean", axis=1) + return tf.sqrt(tf.reduce_sum(tf.square(d1 - d2), axis=1)) + + +def loss_desc_non_pair(d1, d3, margin, d2=None): + """Loss using the euclidean distance and the margin + + """ + + pair_dist_1_to_3 = tf.sqrt(tf.reduce_sum(tf.square(d1 - d3), axis=1)) + if d2 is not None: + pair_dist_2_to_3 = tf.sqrt(tf.reduce_sum(tf.square(d2 - d3), axis=1)) + return tf.nn.relu(margin - tf.minimum(pair_dist_1_to_3, pair_dist_2_to_3)) + else: + return tf.nn.relu(margin - pair_dist_1_to_3) + + +# +# losses.py ends here diff --git a/main.py b/main.py new file mode 100644 index 0000000..223bbe8 --- /dev/null +++ b/main.py @@ -0,0 +1,81 @@ +# main.py --- +# +# Filename: main.py +# Description: WRITEME +# Author: Kwang Moo Yi, Lin Chen +# Maintainer: Kwang Moo Yi +# Created: ??? +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +import os +import sys + +import numpy as np +import tensorflow as tf + +import getpass + +from config import get_config, save_config + +config = None + + +def main(_): + + # Create a random state using the random seed given by the config. This + # should allow reproducible results. + rng = np.random.RandomState(config.random_seed) + tf.set_random_seed(config.random_seed) + + # Train / Test + if config.task == "train": + # Import trainer module + from trainer import Trainer + + # Create a trainer object + task = Trainer(config, rng) + save_config(config.logdir, config) + + else: + # Import tester module + from tester import Tester + + # Create a tester object + task = Tester(config, rng) + + # Run the task + task.run() + + +if __name__ == "__main__": + config, unparsed = get_config(sys.argv) + + if len(unparsed) > 0: + raise RuntimeError("Unknown arguments were given! Check the command line!") + + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) + +# +# main.py ends here diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/bypass.py b/modules/bypass.py new file mode 100644 index 0000000..462d4c3 --- /dev/null +++ b/modules/bypass.py @@ -0,0 +1,69 @@ +# bypass.py --- +# +# Filename: bypass.py +# Description: WRITEME +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Thu Jun 29 14:13:15 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import tensorflow as tf + + +def bypass_kp(xyz): + """Use GT information for keypoint detector + + Should return the xyz coordinates, as well as a dummy score. + + """ + + # we always expect a dictionary as return value to be more explicit + res = {} + + # Bypass for xy coordiantes + res["xyz"] = xyz + + # Bypass for score (a dummy) + res["score"] = xyz[:, 0] + + return res + + +def bypass_ori(angle): + """Use GT information for orientation estimator + + Should return the cosine and the sine + + """ + + # we always expect a dictionary as return value to be more explicit + res = {} + + # Bypass for angle + res["cs"] = tf.concat([tf.cos(angle), tf.sin(angle)], axis=1) + + return res + +# +# bypass.py ends here diff --git a/modules/lift_desc.py b/modules/lift_desc.py new file mode 100644 index 0000000..38242ef --- /dev/null +++ b/modules/lift_desc.py @@ -0,0 +1,127 @@ +# lift_desc.py --- +# +# Filename: lift_desc.py +# Description: WRITEME +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Wed Jun 28 20:02:58 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +import os + +import tensorflow as tf + +from layers import batch_norm, conv_2d, norm_spatial_subtractive, pool_l2 +from utils import image_summary_nhwc, loadh5 + + +def process(inputs, bypass, name, skip, config, is_training): + """WRITEME + + inputs: input to the network + bypass: gt to by used when trying to bypass + name: name of the siamese branch + skip: whether to apply the bypass information + + Note + ---- + + We don't have to worry about the reuse flag here, since it is already dealt + with in the higher level. We just need to inherit it. + + """ + + # We never skip descriptor + assert skip is False + + # we always expect a dictionary as return value to be more explicit + res = {} + + # let's look at the inputs that get fed into this layer + image_summary_nhwc(name + "-input", inputs) + + # Import the lift_desc_sub_kernel.h5 to get the kernel file + script_dir = os.path.dirname(os.path.realpath(__file__)) + sub_kernel = loadh5(script_dir + "/lift_desc_sub_kernel.h5")["kernel"] + + # activation + if config.desc_activ == "tanh": + activ = tf.nn.tanh + elif config.desc_activ == "relu": + activ = tf.nn.relu + else: + raise RuntimeError('Unknown activation type') + + # pooling + def pool(cur_in, desc_pool, ksize): + if desc_pool == "l2_pool": + return pool_l2(cur_in, ksize, ksize, "VALID") + elif desc_pool == "max_pool": + return tf.nn.max_pool(cur_in, (1, ksize, ksize, 1), (1, ksize, ksize, 1), "VALID") + elif desc_pool == "avg_pool": + return tf.nn.avg_pool(cur_in, (1, ksize, ksize, 1), (1, ksize, ksize, 1), "VALID") + else: + raise RuntimeError('Unknown pooling type') + + # now abuse cur_in so that we can simply copy paste + cur_in = inputs + + # lets apply batch normalization on the input - we did not normalize the + # input range! + with tf.variable_scope("input-bn"): + if config.use_input_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + + with tf.variable_scope("conv-act-pool-norm-1"): + cur_in = conv_2d(cur_in, 7, 32, 1, "VALID") + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + cur_in = activ(cur_in) + cur_in = pool(cur_in, config.desc_pool, 2) + if config.use_subtractive_norm: + cur_in = norm_spatial_subtractive(cur_in, sub_kernel) + + with tf.variable_scope("conv-act-pool-norm-2"): + cur_in = conv_2d(cur_in, 6, 64, 1, "VALID") + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + cur_in = activ(cur_in) + cur_in = pool(cur_in, config.desc_pool, 3) + if config.use_subtractive_norm: + cur_in = norm_spatial_subtractive(cur_in, sub_kernel) + + with tf.variable_scope("conv-act-pool-3"): + cur_in = conv_2d(cur_in, 5, 128, 1, "VALID") + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + cur_in = activ(cur_in) + cur_in = pool(cur_in, config.desc_pool, 4) + + res["desc"] = tf.reshape(cur_in, (-1, 128)) + + return res + +# +# lift_desc.py ends here diff --git a/modules/lift_kp.py b/modules/lift_kp.py new file mode 100644 index 0000000..6c65d2d --- /dev/null +++ b/modules/lift_kp.py @@ -0,0 +1,181 @@ +# lift_kp.py --- +# +# Filename: lift_kp.py +# Description: WRITEME +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Wed Jun 28 20:01:16 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import numpy as np +import tensorflow as tf + +from layers import conv_2d, ghh, batch_norm +from modules.bypass import bypass_kp +from utils import get_patch_size_no_aug, get_tensor_shape, \ + image_summary_nhwc, softmax + + +def process(inputs, bypass, name, skip, config, is_training): + """WRITEME. + + LATER: Clean up + + inputs: input to the network + bypass: gt to by used when trying to bypass + name: name of the siamese branch + skip: whether to apply the bypass information + + """ + + # let's look at the inputs that get fed into this layer except when we are + # looking at the whole image + if name != "img": + image_summary_nhwc(name + "-input", inputs) + + if skip: + return bypass_kp(bypass) + + # we always expect a dictionary as return value to be more explicit + res = {} + + # now abuse cur_in so that we can simply copy paste + cur_in = inputs + + # lets apply batch normalization on the input - we did not normalize the + # input range! + # with tf.variable_scope("input-bn"): + # if config.use_input_batch_norm: + # cur_in = batch_norm(cur_in, training=is_training) + + with tf.variable_scope("conv-ghh-1"): + nu = 1 + ns = 4 + nm = 4 + cur_in = conv_2d( + cur_in, config.kp_filter_size, nu * ns * nm, 1, "VALID") + # batch norm on the output of convolutions! + # if config.use_batch_norm: + # cur_in = batch_norm(cur_in, training=is_training) + cur_in = ghh(cur_in, ns, nm) + + res["scoremap-uncut"] = cur_in + + # --------------------------------------------------------------------- + # Check how much we need to cut + kp_input_size = config.kp_input_size + patch_size = get_patch_size_no_aug(config) + desc_input_size = config.desc_input_size + rf = float(kp_input_size) / float(patch_size) + + input_shape = get_tensor_shape(inputs) + uncut_shape = get_tensor_shape(cur_in) + req_boundary = np.ceil(rf * np.sqrt(2) * desc_input_size / 2.0).astype(int) + cur_boundary = (input_shape[2] - uncut_shape[2]) // 2 + crop_size = req_boundary - cur_boundary + + # Stop building the network outputs if we are building for the full image + if name == "img": + return res + + # # Debug messages + # resized_shape = get_tensor_shape(inputs) + # print(' -- kp_info: output score map shape {}'.format(uncut_shape)) + # print(' -- kp_info: input size after resizing {}'.format(resized_shape[2])) + # print(' -- kp_info: output score map size {}'.format(uncut_shape[2])) + # print(' -- kp info: required boundary {}'.format(req_boundary)) + # print(' -- kp info: current boundary {}'.format(cur_boundary)) + # print(' -- kp_info: additional crop size {}'.format(crop_size)) + # print(' -- kp_info: additional crop size {}'.format(crop_size)) + # print(' -- kp_info: final cropped score map size {}'.format( + # uncut_shape[2] - 2 * crop_size)) + # print(' -- kp_info: movement ratio will be {}'.format(( + # float(uncut_shape[2] - 2.0 * crop_size) / + # float(kp_input_size - 1)))) + + # Crop center + cur_in = cur_in[:, crop_size:-crop_size, crop_size:-crop_size, :] + res["scoremap"] = cur_in + + # --------------------------------------------------------------------- + # Mapping layer to x,y,z + com_strength = config.kp_com_strength + # eps = 1e-10 + scoremap_shape = get_tensor_shape(cur_in) + + od = len(scoremap_shape) + # CoM to get the coordinates + pos_array_x = tf.range(scoremap_shape[2], dtype=tf.float32) + pos_array_y = tf.range(scoremap_shape[1], dtype=tf.float32) + + out = cur_in + max_out = tf.reduce_max( + out, axis=list(range(1, od)), keep_dims=True) + o = tf.exp(com_strength * (out - max_out)) # + eps + sum_o = tf.reduce_sum( + o, axis=list(range(1, od)), keep_dims=True) + x = tf.reduce_sum( + o * tf.reshape(pos_array_x, [1, 1, -1, 1]), + axis=list(range(1, od)), keep_dims=True + ) / sum_o + y = tf.reduce_sum( + o * tf.reshape(pos_array_y, [1, -1, 1, 1]), + axis=list(range(1, od)), keep_dims=True + ) / sum_o + + # Remove the unecessary dimensions (i.e. flatten them) + x = tf.reshape(x, (-1,)) + y = tf.reshape(y, (-1,)) + + # -------------- + # Turn x, and y into range -1 to 1, where the patch size is + # mapped to -1 and 1 + orig_patch_width = ( + scoremap_shape[2] + np.cast["float32"](req_boundary * 2.0)) + orig_patch_height = ( + scoremap_shape[1] + np.cast["float32"](req_boundary * 2.0)) + + x = ((x + np.cast["float32"](req_boundary)) / np.cast["float32"]( + (orig_patch_width - 1.0) * 0.5) - + np.cast["float32"](1.0)) + y = ((y + np.cast["float32"](req_boundary)) / np.cast["float32"]( + (orig_patch_height - 1.0) * 0.5) - + np.cast["float32"](1.0)) + + # -------------- + # No movement in z direction + z = tf.zeros_like(x) + + res["xyz"] = tf.stack([x, y, z], axis=1) + + # --------------------------------------------------------------------- + # Mapping layer to x,y,z + res["score"] = softmax( + res["scoremap"], axis=list(range(1, od)), + softmax_strength = config.kp_scoremap_softmax_strength) + + return res + +# +# lift_kp.py ends here diff --git a/modules/lift_ori.py b/modules/lift_ori.py new file mode 100644 index 0000000..6f4430a --- /dev/null +++ b/modules/lift_ori.py @@ -0,0 +1,151 @@ +# lift_ori.py --- +# +# Filename: lift_ori.py +# Description: WRITEME +# Author: Kwang Moo Yi +# Maintainer: Kwang Moo Yi +# Created: Wed Jun 28 20:02:50 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import tensorflow as tf + +from layers import batch_norm, conv_2d, fc, ghh, pool_max +from modules.bypass import bypass_ori +from utils import image_summary_nhwc + + +def process(inputs, bypass, name, skip, config, is_training): + """WRITEME. + + inputs: input to the network + bypass: gt to by used when trying to bypass + name: name of the siamese branch + skip: whether to apply the bypass information + + """ + + # let's look at the inputs that get fed into this layer + image_summary_nhwc(name + "-input", inputs) + + if skip: + return bypass_ori(bypass) + + # we always expect a dictionary as return value to be more explicit + res = {} + + # now abuse cur_in so that we can simply copy paste + cur_in = inputs + + # lets apply batch normalization on the input - we did not normalize the + # input range! + with tf.variable_scope("input-bn"): + if config.use_input_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + + with tf.variable_scope("conv-act-pool-1"): + cur_in = conv_2d(cur_in, 5, 10, 1, "VALID") + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + cur_in = tf.nn.relu(cur_in) + cur_in = pool_max(cur_in, 2, 2, "VALID") + + with tf.variable_scope("conv-act-pool-2"): + cur_in = conv_2d(cur_in, 5, 20, 1, "VALID") + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + cur_in = tf.nn.relu(cur_in) + cur_in = pool_max(cur_in, 2, 2, "VALID") + + with tf.variable_scope("conv-act-pool-3"): + cur_in = conv_2d(cur_in, 3, 50, 1, "VALID") + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + cur_in = tf.nn.relu(cur_in) + cur_in = pool_max(cur_in, 2, 2, "VALID") + # res["ori_out3"] = cur_in + + with tf.variable_scope("fc-ghh-drop-4"): + nu = 100 + ns = 4 + nm = 4 + cur_in = fc(cur_in, nu * ns * nm) + # cur_in = fc(cur_in, nu) + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + if config.ori_activation == 'ghh': + cur_in = ghh(cur_in, ns, nm) + elif config.ori_activation == 'tanh': + cur_in = tf.nn.tanh(cur_in) + else: + raise RuntimeError("Bad orientation rectifier") + # cur_in = tf.nn.relu(cur_in) + if config.use_dropout_ori: + raise RuntimeError('Dropout not working properly!') + cur_in = tf.nn.dropout( + cur_in, + keep_prob=1.0 - (0.3 * tf.cast(is_training, tf.float32)), + ) + # res["ori_out4"] = cur_in + + with tf.variable_scope("fc-ghh-5"): + nu = 2 + ns = 4 + nm = 4 + cur_in = fc(cur_in, nu * ns * nm) + # cur_in = fc(cur_in, nu) + if config.use_batch_norm: + cur_in = batch_norm(cur_in, training=is_training) + if config.ori_activation == 'ghh': + cur_in = ghh(cur_in, ns, nm) + elif config.ori_activation == 'tanh': + cur_in = tf.nn.tanh(cur_in) + else: + raise RuntimeError("Bad orientation rectifier") + # cur_in = tf.nn.relu(cur_in) + # res["ori_out5"] = cur_in + + # with tf.variable_scope("fc-ghh-6"): + # cur_in = fc(cur_in, nu) + # res["ori_out6"] = cur_in + + with tf.variable_scope("cs-norm"): + eps = 1e-10 + # First, normalize according to the maximum of the two + cur_in_abs_max = tf.reduce_max(tf.abs(cur_in), axis=1, keep_dims=True) + cur_in = cur_in / tf.maximum(eps, cur_in_abs_max) + # Add an epsilon to avoid singularity + eps = 1e-3 + cur_in += tf.to_float(cur_in >= 0) * eps - tf.to_float(cur_in < 0) * eps + # Now make norm one without worrying about div by zero + cur_in_norm = tf.sqrt(tf.reduce_sum(tf.square( + cur_in), axis=1, keep_dims=True)) + cur_in /= cur_in_norm + + res["cs"] = tf.reshape(cur_in, (-1, 2)) + + return res + + +# +# lift_ori.py ends here diff --git a/modules/spatial_transformer.py b/modules/spatial_transformer.py new file mode 100644 index 0000000..5a14fb0 --- /dev/null +++ b/modules/spatial_transformer.py @@ -0,0 +1,222 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Modifications by Kwang Moo Yi +# - Range fix +# - Compatibility for Python 3 +# - Reshape added at the end to ensure that we have a proper shape + + +import tensorflow as tf + +# Included to be compatile +from six.moves import xrange + + +def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs): + """Spatial Transformer Layer + + Implements a spatial transformer layer as described in [1]_. + Based on [2]_ and edited by David Dao for Tensorflow. + + Parameters + ---------- + U : float + The output of a convolutional net should have the + shape [num_batch, height, width, num_channels]. + theta: float + The output of the + localisation network should be [num_batch, 6]. + out_size: tuple of two ints + The size of the output of the network (height, width) + + References + ---------- + .. [1] Spatial Transformer Networks + Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu + Submitted on 5 Jun 2015 + .. [2] https://github.com/skaae/transformer_network/blob/master/transformerlayer.py + + Notes + ----- + To initialize the network to the identity transform init + ``theta`` to : + identity = np.array([[1., 0., 0.], + [0., 1., 0.]]) + identity = identity.flatten() + theta = tf.Variable(initial_value=identity) + + """ + + def _repeat(x, n_repeats): + with tf.variable_scope('_repeat'): + rep = tf.transpose( + tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0]) + rep = tf.cast(rep, 'int32') + x = tf.matmul(tf.reshape(x, (-1, 1)), rep) + return tf.reshape(x, [-1]) + + def _interpolate(im, x, y, out_size): + with tf.variable_scope('_interpolate'): + # constants + num_batch = tf.shape(im)[0] + height = tf.shape(im)[1] + width = tf.shape(im)[2] + channels = tf.shape(im)[3] + + x = tf.cast(x, 'float32') + y = tf.cast(y, 'float32') + height_f = tf.cast(height, 'float32') + width_f = tf.cast(width, 'float32') + out_height = out_size[0] + out_width = out_size[1] + zero = tf.zeros([], dtype='int32') + max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') + max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') + + # scale indices from [-1, 1] to [0, width/height - 1] + # Fix range to not go out of bound + x = (x + 1.0) * (width_f - 1.0) / 2.0 + y = (y + 1.0) * (height_f - 1.0) / 2.0 + + # do sampling + x0 = tf.cast(tf.floor(x), 'int32') + x1 = x0 + 1 + y0 = tf.cast(tf.floor(y), 'int32') + y1 = y0 + 1 + + x0 = tf.clip_by_value(x0, zero, max_x) + x1 = tf.clip_by_value(x1, zero, max_x) + y0 = tf.clip_by_value(y0, zero, max_y) + y1 = tf.clip_by_value(y1, zero, max_y) + dim2 = width + dim1 = width * height + base = _repeat(tf.range(num_batch) * dim1, out_height * out_width) + base_y0 = base + y0 * dim2 + base_y1 = base + y1 * dim2 + idx_a = base_y0 + x0 + idx_b = base_y1 + x0 + idx_c = base_y0 + x1 + idx_d = base_y1 + x1 + + # use indices to lookup pixels in the flat image and restore + # channels dim + im_flat = tf.reshape(im, tf.stack([-1, channels])) + im_flat = tf.cast(im_flat, 'float32') + Ia = tf.gather(im_flat, idx_a) + Ib = tf.gather(im_flat, idx_b) + Ic = tf.gather(im_flat, idx_c) + Id = tf.gather(im_flat, idx_d) + + # and finally calculate interpolated values + x0_f = tf.cast(x0, 'float32') + x1_f = tf.cast(x1, 'float32') + y0_f = tf.cast(y0, 'float32') + y1_f = tf.cast(y1, 'float32') + wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) + wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1) + wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1) + wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1) + output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) + + return output + + def _meshgrid(height, width): + with tf.variable_scope('_meshgrid'): + # This should be equivalent to: + # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), + # np.linspace(-1, 1, height)) + # ones = np.ones(np.prod(x_t.shape)) + # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) + x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), + tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) + y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), + tf.ones(shape=tf.stack([1, width]))) + + x_t_flat = tf.reshape(x_t, (1, -1)) + y_t_flat = tf.reshape(y_t, (1, -1)) + + ones = tf.ones_like(x_t_flat) + grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones]) + return grid + + def _transform(theta, input_dim, out_size): + with tf.variable_scope('_transform'): + num_batch = tf.shape(input_dim)[0] + height = tf.shape(input_dim)[1] + width = tf.shape(input_dim)[2] + num_channels = tf.shape(input_dim)[3] + theta = tf.reshape(theta, (-1, 2, 3)) + theta = tf.cast(theta, 'float32') + + # grid of (x_t, y_t, 1), eq (1) in ref [1] + height_f = tf.cast(height, 'float32') + width_f = tf.cast(width, 'float32') + out_height = out_size[0] + out_width = out_size[1] + grid = _meshgrid(out_height, out_width) + grid = tf.expand_dims(grid, 0) + grid = tf.reshape(grid, [-1]) + grid = tf.tile(grid, tf.stack([num_batch])) + grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) + + # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) + T_g = tf.matmul(theta, grid) + x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) + y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) + x_s_flat = tf.reshape(x_s, [-1]) + y_s_flat = tf.reshape(y_s, [-1]) + + input_transformed = _interpolate( + input_dim, x_s_flat, y_s_flat, + out_size) + + output = tf.reshape( + input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) + return output + + with tf.variable_scope(name): + output = _transform(theta, U, out_size) + + # let output go through a reshape operation to ensure shapes + U_shape = [_s if _s is not None else -1 for _s in + U.get_shape().as_list()] + + output_shape = [U_shape[0], out_size[0], out_size[1], U_shape[3]] + + return tf.reshape(output, output_shape) + + +def batch_transformer(U, theta, out_size, name='BatchSpatialTransformer'): + """Batch Spatial Transformer Layer + + Parameters + ---------- + + U : float + tensor of inputs [num_batch,height,width,num_channels] + theta : float + a set of transformations for each input [num_batch,num_transforms,6] + out_size : int + the size of the output [out_height,out_width] + + Returns: float + Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels] + """ + with tf.variable_scope(name): + num_batch, num_transforms = map(int, theta.get_shape().as_list()[:2]) + indices = [[i] * num_transforms for i in xrange(num_batch)] + input_repeated = tf.gather(U, tf.reshape(indices, [-1])) + return transformer(input_repeated, theta, out_size) diff --git a/networks/__init__.py b/networks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/networks/lift.py b/networks/lift.py new file mode 100644 index 0000000..82484bd --- /dev/null +++ b/networks/lift.py @@ -0,0 +1,899 @@ +# lift.py --- +# +# Filename: lift.py +# Description: WRITEME +# Author: Kwang Moo Yi, Lin Chen +# Maintainer: Kwang Moo Yi +# Created: ????? +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# + +# Code: + +import importlib + +import numpy as np +import tensorflow as tf + +from losses import (loss_classification, loss_desc_non_pair, loss_desc_pair, + loss_overlap) +from modules.spatial_transformer import transformer as transformer +from six.moves import xrange +from utils import (eval_descs, eval_kps, get_patch_size, get_patch_size_no_aug, + get_tensor_shape, image_summary_nhwc, make_theta, + show_all_variables) +from utils.legacy import build_legacy + + +class Network(object): + """The LIFT Network + + LATER + - Do proper mean/std normalization? + + """ + + def __init__(self, sess, config, dataset): + # Save pointer to the tensorflow session + self.sess = sess + # Save pointer to config + self.config = config + # Save pointer to the data module + self.dataset = dataset + # # Summaries to compute for this network + # self.summary = [] + + # Normalizer for the input data (they are raw images) + # Currently normalized to be between -1 and 1 + self.mean = {} + self.std = {} + for _module in ["kp", "ori", "desc"]: + self.mean[_module] = 128.0 + self.std[_module] = 128.0 + + if self.config.use_old_mean_std: + self.mean["kp"] = 116.4368117568544249706974369473755359649658203125 + self.std["kp"] = 88.083076379771597430590190924704074859619140625 + self.mean["ori"] = 116.4368117568544249706974369473755359649658203125 + self.std["ori"] = 88.083076379771597430590190924704074859619140625 + self.mean["desc"] = 110.75389862060546875 + self.std["desc"] = 61.53688812255859375 + + # Account for the keypoint scale change while augmenting rotations + self.scale_aug = float(get_patch_size(self.config)) / \ + float(get_patch_size_no_aug(self.config)) + + # Allocate placeholders + with tf.variable_scope("placeholders"): + self._build_placeholders() + # Build the network + with tf.variable_scope("network"): + self._build_network() + # Build loss + with tf.variable_scope("loss"): + self._build_loss() + # Build the optimization op + with tf.variable_scope("optimization"): + self._build_optim() + + # Build the legacy component. This is only used for accessing old + # framework weights. You can safely ignore this part + build_legacy(self) + + # Show all variables in the network + show_all_variables() + + # Add all variables into histogram summary + for _module in ["kp", "ori", "desc"]: + for _param in self.params[_module]: + tf.summary.histogram(_param.name, _param) + + # Collect all summary (Lazy...) + self.summary = tf.summary.merge_all() + + def forward(self, subtask, cur_data): + """Forward pass of the network. + + Runs the network and returns the losses for the subtask. + + """ + + # parse cur_data and create a feed_dict + feed_dict = self._get_feed_dict(subtask, cur_data) + # set to train mode + feed_dict[self.is_training] = True + + # import IPython + # IPython.embed() + + # specify which loss we are going to retrieve + fetch = { + "loss": self.loss[subtask], + } + + # run on the tensorflow session + res = self.sess.run(fetch, feed_dict=feed_dict) + + # return the losses + return res["loss"] + + def backward(self, subtask, cur_data, provide_summary=False): + """Backward pass of the network. + + Runs the network and updates the parameters, as well as returning the + summary if asked to do so. + + """ + + # parse cur_data and create a feed_dict + feed_dict = self._get_feed_dict(subtask, cur_data) + # set to train mode + feed_dict[self.is_training] = True + + # specify which optim we run depending on the subtask + fetch = { + "optim": self.optim[subtask], + } + if provide_summary: + fetch["summary"] = self.summary + + # run on the tensorflow session + try: + res = self.sess.run(fetch, feed_dict=feed_dict) + except: + print("backward pass had an error, this iteration did nothing.") + return None + + # return summary + if provide_summary: + return res["summary"] + else: + return None + + def validate(self, subtask, cur_data): + """Validation process. + + Runs the network for the given data and returns validation results. The + return value should be a single scalar, which lower is better. + + """ + + # Get feed_dict for this batch + feed_dict = self._get_feed_dict(subtask, cur_data) + # set to train mode + feed_dict[self.is_training] = False + + # Obtain xyz, scores, descs, whatever you need + fetch = {} + if subtask == "kp": + # Obtain xyz's and scores + for _i in xrange(1, 5): + # Detector output + fetch["xyz{}".format(_i)] \ + = self.outputs["kp"]["P{}".format(_i)]["xyz"] + # Warp ground truth if necessary + aug_rot = self.inputs["aug_rot"] \ + if self.config.augment_rotations else None + fetch["pos{}".format(_i)] = self.transform_xyz( + self.inputs["xyz"], + aug_rot, + self.config.batch_size, + self.scale_aug, + transpose=True, + names=["P{}".format(_i)]) + # fetch["pos{}".format(_i)] \ + # = self.inputs["xyz"]["P{}".format(_i)] + fetch["score{}".format(_i)] \ + = self.outputs["kp"]["P{}".format(_i)]["score"] + res = self.sess.run(fetch, feed_dict=feed_dict) + + # Compute the AUC + # neg_weight = self.config.neg_per_pos + prauc = eval_kps( + [res["xyz{}".format(_i + 1)] for _i in xrange(4)], + [res["pos{}".format(_i + 1)]['P{}'.format(_i + 1)] + for _i in xrange(4)], + [res["score{}".format(_i + 1)] for _i in xrange(4)], + self.r_base, + ) + # prauc = eval_kps( + # [res["xyz{}".format(_i + 1)] for _i in xrange(4)], + # [res["pos{}".format(_i + 1)] * self.scale_aug + # for _i in xrange(4)], + # [res["score{}".format(_i + 1)] for _i in xrange(4)], + # self.r_base, + # ) + + return 1.0 - prauc + + else: + # Obtain descriptors for each patch + for _i in xrange(1, 4): + fetch["d{}".format(_i)] \ + = self.outputs["desc"]["P{}".format(_i)]["desc"] + res = self.sess.run(fetch, feed_dict=feed_dict) + + # Compute the AUC + neg_weight = self.config.neg_per_pos + prauc = eval_descs( + res["d1"], res["d2"], res["d3"], neg_weight) + + return 1.0 - prauc + + def test(self, subtask, cur_data): + """The test function to retrieve results for a single patch. + + LATER: Comment + + """ + + if subtask == "kp": + feed_dict = { + self.inputs["img"]["img"]: cur_data, + self.is_training: False, + } + fetch = { + "scoremap": self.outputs["kp"]["img"]["scoremap-uncut"] + } + res = self.sess.run(fetch, feed_dict=feed_dict) + return res["scoremap"] + elif subtask == "ori": + feed_dict = { + self.inputs["patch"]["P1"]: cur_data["patch"], + self.inputs["xyz"]["P1"]: cur_data["xyz"], + self.is_training: False, + } + fetch = { + "cs": self.outputs["ori"]["P1"]["cs"] + } + res = self.sess.run(fetch, feed_dict=feed_dict) + return np.arctan2(res["cs"][:, 1], res["cs"][:, 0]) + elif subtask == "desc": + feed_dict = { + self.inputs["patch"]["P1"]: cur_data["patch"], + self.inputs["xyz"]["P1"]: cur_data["xyz"], + self.inputs["angle"]["P1"]: cur_data["angle"], + self.is_training: False, + } + fetch = { + "desc": self.outputs["desc"]["P1"]["desc"] + } + res = self.sess.run(fetch, feed_dict=feed_dict) + return res["desc"] + + def _build_placeholders(self): + """Builds Tensorflow Placeholders""" + + # The inputs placeholder dictionary + self.inputs = {} + # multiple types + # LATER: label might not be necessary + types = ["patch", "xyz", "angle"] + if self.config.use_augmented_set: + types += ["aug_rot"] + for _type in types: + self.inputs[_type] = {} + + # We *ARE* going to specify the input size, since the spatial + # transformer implementation *REQUIRES* us to do so. Note that this + # has to be dealt with in the validate loop. + + # batch_size = self.config.batch_size + + # Use variable batch size + batch_size = None + + # We also read nchannel from the configuration. Make sure that the data + # module is behaving accordingly + nchannel = self.config.nchannel + + # Get the input patch size from config + patch_size = float(get_patch_size(self.config)) + + # Compute the r_base (i.e. overlap radius when computing the keypoint + # overlaps. + self.r_base = (float(self.config.desc_input_size) / + float(get_patch_size_no_aug(self.config))) + + # P1, P2, P3, P4 in the paper. P1, P2, P3 are keypoints, P1, P2 + # correspond, P1, and P3 don't correspond, P4 is a non-keypoint patch. + for _name in ["P1", "P2", "P3", "P4"]: + self.inputs["patch"][_name] = tf.placeholder( + tf.float32, shape=[ + batch_size, patch_size, patch_size, nchannel + ], name=_name, + ) + self.inputs["xyz"][_name] = tf.placeholder( + tf.float32, shape=[ + batch_size, 3, + ], name=_name, + ) + self.inputs["angle"][_name] = tf.placeholder( + tf.float32, shape=[ + batch_size, 1, + ], name=_name, + ) + if self.config.use_augmented_set: + self.inputs["aug_rot"][_name] = { + "cs": tf.placeholder( + tf.float32, shape=[ + batch_size, 2, + ], name=_name, + ), + "angle": tf.placeholder( + tf.float32, shape=[ + batch_size, 1, + ], name=_name, + )} + # Add to summary to view them + image_summary_nhwc( + "input/" + _name, + self.inputs["patch"][_name], + ) + + # For Image based test + self.inputs["img"] = {"img": tf.placeholder( + tf.float32, shape=[ + None, None, None, nchannel + ], name="img", + )} + + # For runmode in dropout and batch_norm + self.is_training = tf.placeholder( + tf.bool, shape=(), + name="is_training", + ) + + def _build_network(self): + """Define all the architecture here. Use the modules if necessary.""" + + # Import modules according to the configurations + self.modules = {} + for _key in ["kp", "ori", "desc"]: + self.modules[_key] = importlib.import_module( + "modules.{}".format( + getattr(self.config, "module_" + _key))) + + # prepare dictionary for the output and parameters of each module + self.outputs = {} + self.params = {} + self.allparams = {} + for _key in self.modules: + self.outputs[_key] = {} + self.params[_key] = [] + self.allparams[_key] = [] + # create a joint params list + # NOTE: params is a list, not a dict! + self.params["joint"] = [] + self.allparams["joint"] = [] + # create outputs placeholder for crop and rot + self.outputs["resize"] = {} + self.outputs["crop"] = {} + self.outputs["rot"] = {} + + # Actual Network definition + with tf.variable_scope("lift"): + # Graph construction depends on the subtask + subtask = self.config.subtask + + # ---------------------------------------- + # Initial resize for the keypoint module + # Includes rotation when augmentations are used + # + if self.config.use_augmented_set: + rot = self.inputs["aug_rot"] + else: + rot = None + self._build_st( + module="resize", + xyz=None, + cs=rot, + names=["P1", "P2", "P3", "P4"], + out_size=self.config.kp_input_size, + reduce_ratio=float(get_patch_size_no_aug(self.config)) / + float(get_patch_size(self.config)), + ) + + # ---------------------------------------- + # Keypoint Detector + # + # The keypoint detector takes each patch input and outputs (1) + # "score": the score of the patch, (2) "xy": keypoint position in + # side the patch. The score output is the soft-maximum (not a + # softmax) of the scores. The position output from the network + # should be in the form friendly to the spatial + # transformer. Outputs are always dictionaries. + # Rotate ground truth coordinates when augmenting rotations. + aug_rot = self.inputs["aug_rot"] \ + if self.config.augment_rotations else None + xyz_gt_scaled = self.transform_xyz( + self.inputs["xyz"], + aug_rot, + self.config.batch_size, + self.scale_aug, + transpose=True, + names=["P1", "P2", "P3", "P4"]) + self._build_module( + module="kp", + inputs=self.outputs["resize"], + bypass=xyz_gt_scaled, + names=["P1", "P2", "P3", "P4"], + skip=subtask == "ori" or subtask == "desc", + ) + + # For image based test + # self._build_module( + # module="kp", + # inputs=self.inputs["img"], + # bypass=self.inputs["img"], # This is a dummy + # names=["img"], + # skip=subtask != "kp", + # reuse=True, + # test_only=True, + # ) + + # ---------------------------------------- + # The Crop Spatial Transformer + # Output: use the same support region as for the descriptor + # + xyz_kp_scaled = self.transform_kp( + self.outputs["kp"], + aug_rot, + self.config.batch_size, + 1 / self.scale_aug, + transpose=False, + names=["P1", "P2", "P3"]) + self._build_st( + module="crop", + xyz=xyz_kp_scaled, + cs=aug_rot, + names=["P1", "P2", "P3"], + out_size=self.config.ori_input_size, + reduce_ratio=float(self.config.desc_input_size) / + float(get_patch_size(self.config)), + ) + + # ---------------------------------------- + # Orientation Estimator + # + # The orientation estimator takes the crop outputs as input and + # outputs orientations for the spatial transformer to + # use. Actually, since we output cos and sin, we can simply use the + # *UNNORMALIZED* version of the two, normalize them, and directly + # use it for our affine transform. In short it returns "cs": the + # cos and the sin, but unnormalized. Outputs are always + # dictionaries. + # Bypass: just the GT angle + if self.config.augment_rotations: + rot = {} + for name in ["P1", "P2", "P3"]: + rot[name] = self.inputs["angle"][name] - \ + self.inputs["aug_rot"][name]["angle"] + else: + rot = self.inputs["angle"] + self._build_module( + module="ori", + inputs=self.outputs["crop"], + bypass=rot, + names=["P1", "P2", "P3"], + skip=subtask == "kp" or subtask == "desc", + ) + + # ---------------------------------------- + # The Rot Spatial Transformer. + # - No rotation augmentation: + # Operates over the original patch with the ground truth angle when + # bypassing. Otherwise, we combine the augmented angle and the + # output of the orientation module. + # We do not consider rotation augmentations for the descriptor. + if self.config.augment_rotations: + rot = self.chain_cs( + self.inputs["aug_rot"], + self.outputs["ori"], + names=["P1", "P2", "P3"]) + # rot = self.outputs["ori"] + # xyz_desc_scaled = self.transform_kp( + # self.outputs["kp"], + # rot, + # self.config.batch_size, + # 1 / self.scale_aug, + # transpose=False, + # names=["P1", "P2", "P3"]) + elif self.config.use_augmented_set: + rot = self.outputs["ori"] + # xyz_desc_scaled = self.transform_kp( + # self.outputs["kp"], + # rot, + # self.config.batch_size, + # 1 / self.scale_aug, + # transpose=False, + # names=["P1", "P2", "P3"]) + else: + rot = None + # xyz_desc_scaled = self.inputs["xyz"] + self._build_st( + module="rot", + xyz=xyz_kp_scaled, + cs=rot, + names=["P1", "P2", "P3"], + out_size=self.config.desc_input_size, + reduce_ratio=float(self.config.desc_input_size) / + float(get_patch_size(self.config)), + ) + + # ---------------------------------------- + # Feature Descriptor + # + # The descriptor simply computes the descriptors, given the patch. + self._build_module( + module="desc", + inputs=self.outputs["rot"], + bypass=self.outputs["rot"], + names=["P1", "P2", "P3"], + skip=False, + ) + + def _build_module(self, module, inputs, bypass, names, skip, reuse=None, + test_only=False): + """Subroutine for building each module""" + + is_first = True + # _module = "kp" + for name in names: + # reuse if not the first time + if not is_first: + cur_reuse = True + else: + cur_reuse = reuse + with tf.variable_scope(module, reuse=cur_reuse) as sc: + cur_inputs = ( + (inputs[name] - self.mean[module]) / + self.std[module] + ) + if test_only: + is_training = False + else: + is_training = self.is_training + self.outputs[module][name] = self.modules[module].process( + inputs=cur_inputs, + bypass=bypass[name], + name=name, + skip=skip, + config=self.config, + is_training=is_training, + ) + # Store variables if it is the first time + if is_first: + self.params[module] = tf.get_collection( + tf.GraphKeys.TRAINABLE_VARIABLES, scope=sc.name) + self.allparams[module] = tf.get_collection( + tf.GraphKeys.GLOBAL_VARIABLES, scope=sc.name) + # Also append to the global list + self.params["joint"] += self.params[module] + self.allparams["joint"] += self.allparams[module] + # Mark that it is initialized + is_first = False + + def _build_st(self, module, xyz, cs, names, out_size, + reduce_ratio=None, do_rotate=False, do_reverse=False): + """Subroutine for building spatial transformer""" + + for name in names: + with tf.variable_scope(module): + cur_inputs = self.inputs["patch"][name] + # Get xy and cs + if xyz is not None: + _xyz = xyz[name]["xyz"] + else: + batch_size = tf.shape(cur_inputs)[0] + _xyz = tf.zeros((batch_size, 3)) + if cs is not None: + _cs = cs[name]["cs"] + else: + _cs = None + # transform coordinates + # if do_rotate: + # _xyz[:2] = self.transform_xyz(_xyz, + # _cs, + # config.batch_size, + # reverse=do_reverse) + input_size = get_tensor_shape(cur_inputs)[2] + if reduce_ratio is None: + reduce_ratio = float(out_size) / float(input_size) + # apply the spatial transformer + self.outputs[module][name] = transformer( + U=cur_inputs, + # Get the output from the keypoint layer + theta=make_theta(xyz=_xyz, cs=_cs, rr=reduce_ratio), + out_size=(out_size, out_size), + ) + + def chain_cs(self, cs1, cs2, names): + cs = {} + for name in names: + c1 = cs1[name]["cs"][:, 0] + s1 = cs1[name]["cs"][:, 1] + c2 = cs2[name]["cs"][:, 0] + s2 = cs2[name]["cs"][:, 1] + z = tf.zeros_like(c1) + o = tf.ones_like(c1) + mat1 = tf.transpose(tf.stack([[c1, -s1, z], + [s1, c1, z], + [z, z, o]]), + (2, 0, 1)) + mat2 = tf.transpose(tf.stack([[c2, -s2, z], + [s2, c2, z], + [z, z, o]]), + (2, 0, 1)) + joint = tf.matmul(mat1, mat2) + cs[name] = {"cs": tf.stack( + [joint[:, 0, 0], joint[:, 1, 0]], + axis=1)} + return cs + + def transform_xyz(self, xyz, cs, batch_size, scale, transpose, names): + """Rotate a set of coordinates and apply scaling if necessary.""" + + xyz_rot = {} + for name in names: + if cs is None: + xyz_rot[name] = xyz[name] * scale + else: + c = cs[name]["cs"][:, 0] + s = cs[name]["cs"][:, 1] + z = tf.zeros_like(c) + o = tf.ones_like(c) + mat = tf.transpose(tf.stack([[c, -s, z], + [s, c, z], + [z, z, o]]), + (2, 0, 1)) + xyz_rot[name] = tf.squeeze( + tf.matmul(mat, + tf.expand_dims(xyz[name], 2), + transpose_a=transpose), + axis=2) * scale + return xyz_rot + + def transform_kp(self, kp, cs, batch_size, scale, transpose, names): + """Rotate/scale keypoint coordinates while preserving the score.""" + + # A bit inelegant but I'm tired + kp_rot = {} + for name in names: + if cs is None: + kp_rot[name] = {"xyz": kp[name]["xyz"] * scale, + "score": kp[name]["score"]} + else: + c = cs[name]["cs"][:, 0] + s = cs[name]["cs"][:, 1] + z = tf.zeros_like(c) + o = tf.ones_like(c) + mat = tf.transpose(tf.stack([[c, -s, z], + [s, c, z], + [z, z, o]]), + (2, 0, 1)) + kp_rot[name] = { + "xyz": tf.squeeze( + tf.matmul(mat, + tf.expand_dims(kp[name]["xyz"], 2), + transpose_a=transpose), + axis=2) * scale, + "score": kp[name]["score"]} + return kp_rot + + def _build_loss(self): + """Build losses related to each subtask.""" + + self.loss = {} + + # Indivisual loss components + with tf.variable_scope("kp-overlap"): + aug_rot = self.inputs["aug_rot"] if self.config.augment_rotations \ + else None + gt = self.transform_xyz(self.inputs["xyz"], + aug_rot, + self.config.batch_size, + self.scale_aug, + transpose=True, + names=["P1", "P2"]) + gt1 = gt["P1"] + gt2 = gt["P2"] + # gt1 = self.inputs["xyz"]["P1"] * self.scale_aug + # gt2 = self.inputs["xyz"]["P2"] * self.scale_aug + _loss_overlap = loss_overlap( + kp_pos1=self.outputs["kp"]["P1"]["xyz"], + gt_pos1=gt1, + kp_pos2=self.outputs["kp"]["P2"]["xyz"], + gt_pos2=gt2, + r_base=self.r_base, + ) + with tf.variable_scope("kp-classification"): + _loss_classification = loss_classification( + s1=self.outputs["kp"]["P1"]["score"], + s2=self.outputs["kp"]["P2"]["score"], + s3=self.outputs["kp"]["P3"]["score"], + s4=self.outputs["kp"]["P4"]["score"], + ) + with tf.variable_scope("desc-pair"): + _loss_desc_pair = loss_desc_pair( + d1=self.outputs["desc"]["P1"]["desc"], + d2=self.outputs["desc"]["P2"]["desc"], + ) + with tf.variable_scope("desc-non-pair"): + if self.config.use_triplet_loss: + _loss_desc_non_pair = loss_desc_non_pair( + d1=self.outputs["desc"]["P1"]["desc"], + d2=self.outputs["desc"]["P2"]["desc"], + d3=self.outputs["desc"]["P3"]["desc"], + margin=self.config.alpha_margin, + ) + else: + _loss_desc_non_pair = loss_desc_non_pair( + d1=self.outputs["desc"]["P1"]["desc"], + d3=self.outputs["desc"]["P3"]["desc"], + margin=self.config.alpha_margin, + ) + + # Loss for each task + self.loss["kp"] = ( + self.config.alpha_overlap * _loss_overlap + + self.config.alpha_classification * _loss_classification + ) + self.loss["ori"] = _loss_desc_pair + self.loss["desc"] = _loss_desc_pair + _loss_desc_non_pair + self.loss["joint"] = ( + self.config.alpha_kp * _loss_classification + + self.config.alpha_desc * _loss_desc_pair + + self.config.alpha_desc * _loss_desc_non_pair + ) + + # Loss for weight decay + # + # LATER: we actually did not use it + + # Add summary for the subtask loss + tf.summary.scalar( + "losses/loss-" + self.config.subtask, + tf.reduce_mean(self.loss[self.config.subtask])) + + def _build_optim(self): + """Build the optimization op + + Note that we only construct it for the task at hand. This is to avoid + having to deal with bypass layers + + """ + self.optim = {} + + # Optimizer depending on the option + optimizer = self.config.optimizer + learning_rate = self.config.learning_rate + if optimizer == "sgd": + optim = tf.train.GradientDescentOptimizer(learning_rate) + elif optimizer == "adam": + optim = tf.train.AdamOptimizer(learning_rate) + elif optimizer == "rmsprop": + optim = tf.train.RMSPropOptimizer(learning_rate) + else: + raise Exception("[!] Unkown optimizer: {}".format(optimizer)) + + # All gradient computation should be done *after* the batchnorm update + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(update_ops): + # Compute gradient using the optimizer + subtask = self.config.subtask + loss = self.loss[subtask] + params = self.params[subtask] + # The subtask loss to optimize + if subtask != "joint": + # In case of other losses, simply do so + grads_and_vars = optim.compute_gradients(loss, var_list=params) + elif subtask == "joint": + # In case of the joint optimization, be sure to treat + # orientation in a different way. + + # for keypoint + gv_kp = optim.compute_gradients( + self.loss["joint"], var_list=self.params["kp"]) + # for orientation + gv_ori = optim.compute_gradients( + self.loss["ori"], var_list=self.params["ori"]) + # for descriptor + gv_desc = optim.compute_gradients( + self.loss["joint"], var_list=self.params["desc"]) + + # Concatenate the list + # grads_and_vars = gv_kp + gv_ori + gv_desc + grads_and_vars = [] + if 'kp' in self.config.finetune.split('+'): + grads_and_vars += gv_kp + if 'ori' in self.config.finetune.split('+'): + grads_and_vars += gv_ori + if 'desc' in self.config.finetune.split('+'): + grads_and_vars += gv_desc + if len(grads_and_vars) == 0: + raise RuntimeError("Nothing to finetune? Check --finetune") + + else: + raise ValueError("Wrong subtask {}".format(subtask)) + + # Clip gradients if necessary + if self.config.max_grad_norm > 0.0: + new_grads_and_vars = [] + # check whether gradients contain large value (then clip), NaN + # and InF + for idx, (grad, var) in enumerate(grads_and_vars): + if grad is not None and var in params: + grad = tf.clip_by_norm(grad, self.config.max_grad_norm) + new_grads_and_vars.append((grad, var)) + grads_and_vars = new_grads_and_vars + + # Check numerics and report if something is going on. This will + # make the backward pass stop and skip the batch + if self.config.check_numerics is True: + new_grads_and_vars = [] + # check whether gradients contain large value (then clip), NaN + # and InF + for idx, (grad, var) in enumerate(grads_and_vars): + if grad is not None and var in params: + grad = tf.check_numerics( + grad, "Numerical error in gradient for {}".format( + var.name)) + new_grads_and_vars.append((grad, var)) + grads_and_vars = new_grads_and_vars + + # Summarize all gradients + for grad, var in grads_and_vars: + tf.summary.histogram(var.name + '/gradient', grad) + + # Make the optim op + self.optim[subtask] = optim.apply_gradients(grads_and_vars) + + def _get_feed_dict(self, subtask, cur_data): + """Returns feed_dict""" + + # + # To simplify things, we feed everything for now. + # + # LATER: make feed_dict less redundant. + + feed_dict = {} + + types = ["patch", "xyz", "angle"] + if self.config.augment_rotations \ + or self.config.use_augmented_set: + types += ["aug_rot"] + + for _type in types: + for _name in ["P1", "P2", "P3", "P4"]: + if _type == 'aug_rot': + key_cs = self.inputs[_type][_name]["cs"] + feed_dict[key_cs] = cur_data[_type][_name]["cs"] + key_angle = self.inputs[_type][_name]["angle"] + feed_dict[key_angle] = cur_data[_type][ + _name]["angle"][..., None] + else: + key = self.inputs[_type][_name] + feed_dict[key] = cur_data[_type][_name] + + return feed_dict + +# +# lift.py ends here diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..bbe1271 --- /dev/null +++ b/readme.md @@ -0,0 +1,112 @@ +# TF-LIFT: Tensorflow Implmentation for Learned Invariant Feature Transform # + +## Basic Usage ## + +### Setting the environment ### + +Python Version : 3 +OpenCV Version : 3 + +You'll need to install the dependencies, something like the following: + +``` +pip install numpy h5py tensorflow tensorflow-gpu + +``` +We will later provide a requirements.txt for you to use with `pip`. + +Also, you need to setup your work directories. Edit the `config.py` for a +convenient default argument setting. See help for more information on what the +configurations do. + +### Training ### + +`main.py` is the entry point for all your needs. Simply define the task you +want to do, the where to save the results (logs) and the training subtask you +want to perform. + +For example, to train the descriptor +``` +python main.py --task=train --subtask=desc +``` + +Note: this will save the logs at `logs/main.py---task=train---subtask=desc`. If +you don't want this behavior, you can also add `--logdir=logs/test` in the +command line argument, for example. + +### Testing ### + +Testing is even more simple, just provide the input image location, the output +file name, keypoint file name (for ori and desc). For example, the following +command will run the entire pipeline for `image1.jpg`, using the model at `logs/test`. + +``` +python main.py --task=test --subtask=kp --logdir=logs/test --test_img_file=image1.jpg \ + --test_out_file=image1_kp.txt +python main.py --task=test --subtask=ori --logdir=logs/test --test_img_file=image1.jpg \ + --test_out_file=image1_ori.txt --test_kp_file=image1_kp.txt +python main.py --task=test --subtask=desc --logdir=logs/test --test_img_file=image1.jpg \ + --test_out_file=image1_desc.h5 --test_kp_file=image1_ori.txt +``` + +Note: when trying to load the model, it will always look for the `joint` +trained model first, and fall back to the subtask it is trying to test for. + +## More notes on training ## + +### Saving the network ### + +When training, the network is automatically saved in the `logdir`. If you don't +set this manually, it defaults to +`logs/{concatenation-of-all-arguments}`. The things that are saved are: + +* Tensorflow checkpoint +* Tensorflow graph metadata +* `mean` and `std` used for input data normalization +* best validation loss +* best validation iteration + +All these are loaded back when we want to continue. + +### Loading the network ### + +On all runs, the framework automatically resumes from where it left. In other +words, it will **always** try to load network weights and resume. If the +framework cannot find the expected weights, it will just tell you that it could +not find weights in the expected locations, and **will try to go on its merry +way**. Note that this is something that you want to keep in mind. For example, +if you run the subtask `ori`, with a typo in `logdir` pointing you to a +directory without the pretrained descriptor weights, the framework will simply +try to learn the orientation estimator **with random descriptors**. This is +intended, as this might be something that you actually want to try. + +Network loading is performed in the following order, **overwriting** the previously +loaded weights: + +1. Loads the pretrained weights, in the **old framework** format, from + directories defined in `pretrained_{subtask}` in the configuration. **This + feature is deprecated and should not be used** + +2. Loads the pretrained weights, in the **new framework** format, from + directories defined in `pretrained_{subtask}` in the configuration. + +2. Loads the weights from the `logdir`, which is either automatically determined + by the command line arguments, or can be given manually. + +## Differences from the original version ## + +### Batch normalization ### + +In the original version, we did not apply batch normalization. In the new +version, bach normalization is applied to all layers. This significantly +speeds-up the learning process, and makes learning stable. This also eliminates +the need for us to normalize the dataset when training, and we can instead +simply put the data range in a reasonable range, say -1 to 1 and be done with +it. Note that since we do this, we also perform batch normalization on the +input. + +### L2-pooling and Spatial subtractive normalization ### + +We found that these layers can be replaced with normal relus and spatial +pooling without significant difference. They are removed. + diff --git a/test_desc.sh b/test_desc.sh new file mode 100755 index 0000000..d748e69 --- /dev/null +++ b/test_desc.sh @@ -0,0 +1,2 @@ +#!/bin/bash +CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_kp_file=./test/fountain_ori.txt --test_out_file=./test/fountain_desc.h5 --task=test --subtask=desc --logdir="logs/main.py" diff --git a/test_kp.sh b/test_kp.sh new file mode 100755 index 0000000..1b6b7d7 --- /dev/null +++ b/test_kp.sh @@ -0,0 +1,2 @@ +#!/bin/bash +CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_out_file=./test/fountain_kp.txt --task=test --subtask=kp --logdir="logs/main.py" diff --git a/test_kp_legacy.sh b/test_kp_legacy.sh new file mode 100755 index 0000000..19f8844 --- /dev/null +++ b/test_kp_legacy.sh @@ -0,0 +1,2 @@ +#!/bin/bash +CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_out_file=./test/fountain_kp.txt --task=test --subtask=kp --logdir="logs/legacy" --pretrained_kp="models/legacy" --use_batch_norm=False diff --git a/test_ori.sh b/test_ori.sh new file mode 100755 index 0000000..5b59307 --- /dev/null +++ b/test_ori.sh @@ -0,0 +1,2 @@ +#!/bin/bash +CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_kp_file=./test/fountain_kp.txt --test_out_file=./test/fountain_ori.txt --task=test --subtask=ori --logdir="logs/main.py" diff --git a/test_orig_lift.sh b/test_orig_lift.sh new file mode 100755 index 0000000..272ac5f --- /dev/null +++ b/test_orig_lift.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# ------------------------------------------------------------ +# Example script for running LIFT + +# Open MP Settings +export OMP_NUM_THREADS=1 + +# Cuda Settings +export CUDA_VISIBLE_DEVICES=0 + +# Theano Flags +export THEANO_FLAGS="device=gpu0,${THEANO_FLAGS}" + +# ------------------------------------------------------------ +# LIFT code settings + +# Number of keypoints +_LIFT_NUM_KEYPOINT=1000 + +# Whether to save debug image for keypoints +_LIFT_SAVE_PNG=1 + +# Whether the use Theano when keypoint testing. CuDNN is required when turned +# on +_LIFT_USE_THEANO=1 + +# The base path of the code +export _LIFT_BASE_PATH="/cvlabdata2/home/kyi/GitData/LIFT" + +_LIFT_PYTHON_CODE_PATH="${_LIFT_BASE_PATH}/python-code" + +# Test image and model settings +# _LIFT_TEST_IMG_NAME="img1" +# _LIFT_TEST_IMG_NAME="rect_001_max" +_LIFT_TEST_IMG_NAME="fountain" +_LIFT_TEST_IMG="${_LIFT_BASE_PATH}/data/testimg/${_LIFT_TEST_IMG_NAME}.png" +_LIFT_TEST_CONFIG="${_LIFT_BASE_PATH}/models/configs/picc-finetune-nopair.config" +_LIFT_MODEL_DIR="${_LIFT_BASE_PATH}/models/picc-best/" + +# Output Settings +_LIFT_RES_DIR="${_LIFT_BASE_PATH}/results" +_LIFT_KP_FILE_NAME="${_LIFT_RES_DIR}/${_LIFT_TEST_IMG_NAME}_kp.txt" +_LIFT_ORI_FILE_NAME="${_LIFT_RES_DIR}/${_LIFT_TEST_IMG_NAME}_ori.txt" +_LIFT_DESC_FILE_NAME="${_LIFT_RES_DIR}/${_LIFT_TEST_IMG_NAME}_desc.h5" + + +(cd $_LIFT_PYTHON_CODE_PATH; \ + python compute_detector.py \ + $_LIFT_TEST_CONFIG \ + $_LIFT_TEST_IMG \ + $_LIFT_KP_FILE_NAME \ + $_LIFT_SAVE_PNG \ + $_LIFT_USE_THEANO \ + 0 \ + $_LIFT_MODEL_DIR \ + $_LIFT_NUM_KEYPOINT \ + ) + +# (cd $_LIFT_PYTHON_CODE_PATH; \ +# python compute_orientation.py \ +# $_LIFT_TEST_CONFIG \ +# $_LIFT_TEST_IMG \ +# $_LIFT_KP_FILE_NAME \ +# $_LIFT_ORI_FILE_NAME \ +# 0 \ +# 0 \ +# $_LIFT_MODEL_DIR +# ) + +# (cd $_LIFT_PYTHON_CODE_PATH; \ +# python compute_descriptor.py \ +# $_LIFT_TEST_CONFIG \ +# $_LIFT_TEST_IMG \ +# $_LIFT_ORI_FILE_NAME \ +# $_LIFT_DESC_FILE_NAME \ +# 0 \ +# 0 \ +# $_LIFT_MODEL_DIR +# ) + diff --git a/tester.py b/tester.py new file mode 100644 index 0000000..b552c65 --- /dev/null +++ b/tester.py @@ -0,0 +1,405 @@ +# tester.py --- +# +# Filename: tester.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 13:34:04 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +import time + +import cv2 +import h5py +import numpy as np +import tensorflow as tf + +from datasets.test import Dataset +from networks.lift import Network +from six.moves import xrange +from utils import (IDX_ANGLE, XYZS2kpList, draw_XYZS_to_img, get_patch_size, + get_ratio_scale, get_XYZS_from_res_list, restore_network, + saveh5, saveKpListToTxt, update_affine) + + +class Tester(object): + """The Tester Class + + LATER: Clean up unecessary dictionaries + LATER: Make a superclass for Tester and Trainer + + """ + + def __init__(self, config, rng): + self.config = config + self.rng = rng + + # Open a tensorflow session. I like keeping things simple, so I don't + # use a supervisor. I'm just going to do everything manually. I also + # will just allow the gpu memory to grow + tfconfig = tf.ConfigProto() + tfconfig.gpu_options.allow_growth = True + self.sess = tf.Session(config=tfconfig) + + # Create the dataset instance + self.dataset = Dataset(self.config, rng) + # Create the model instance + self.network = Network(self.sess, self.config, self.dataset) + # Make individual saver instances for each module. + self.saver = {} + self.best_val_loss = {} + self.best_step = {} + # Create the saver instance for both joint and the current subtask + for _key in ["joint", self.config.subtask]: + self.saver[_key] = tf.train.Saver(self.network.allparams[_key]) + + # We have everything ready. We finalize and initialie the network here. + self.sess.run(tf.global_variables_initializer()) + + def run(self): + + subtask = self.config.subtask + + # Load the network weights for the module of interest + print("-------------------------------------------------") + print(" Loading Trained Network ") + print("-------------------------------------------------") + # Try loading the joint version, and then fall back to the current task + # silently if failed. + try: + restore_res = restore_network(self, "joint") + except: + pass + if not restore_res: + restore_res = restore_network(self, subtask) + if not restore_res: + raise RuntimeError("Could not load network weights!") + + # Run the appropriate compute function + print("-------------------------------------------------") + print(" Testing ") + print("-------------------------------------------------") + + eval("self._compute_{}()".format(subtask)) + + def _compute_kp(self): + """Compute Keypoints. + + LATER: Clean up code + + """ + + total_time = 0.0 + + # Read image + image_color, image_gray, load_prep_time = self.dataset.load_image() + + # check size + image_height = image_gray.shape[0] + image_width = image_gray.shape[1] + + # Multiscale Testing + scl_intv = self.config.test_scl_intv + # min_scale_log2 = 1 # min scale = 2 + # max_scale_log2 = 4 # max scale = 16 + min_scale_log2 = self.config.test_min_scale_log2 + max_scale_log2 = self.config.test_max_scale_log2 + # Test starting with double scale if small image + min_hw = np.min(image_gray.shape[:2]) + if min_hw <= 1600: + print("INFO: Testing double scale") + min_scale_log2 -= 1 + # range of scales to check + num_division = (max_scale_log2 - min_scale_log2) * (scl_intv + 1) + 1 + scales_to_test = 2**np.linspace(min_scale_log2, max_scale_log2, + num_division) + + # convert scale to image resizes + resize_to_test = ((float(self.config.kp_input_size - 1) / 2.0) / + (get_ratio_scale(self.config) * scales_to_test)) + + # check if resize is valid + min_hw_after_resize = resize_to_test * np.min(image_gray.shape[:2]) + is_resize_valid = min_hw_after_resize > self.config.kp_filter_size + 1 + + # if there are invalid scales and resizes + if not np.prod(is_resize_valid): + # find first invalid + first_invalid = np.where(True - is_resize_valid)[0][0] + + # remove scales from testing + scales_to_test = scales_to_test[:first_invalid] + resize_to_test = resize_to_test[:first_invalid] + + print('resize to test is {}'.format(resize_to_test)) + print('scales to test is {}'.format(scales_to_test)) + + # Run for each scale + test_res_list = [] + for resize in resize_to_test: + + # resize according to how we extracted patches when training + new_height = np.cast['int'](np.round(image_height * resize)) + new_width = np.cast['int'](np.round(image_width * resize)) + start_time = time.clock() + image = cv2.resize(image_gray, (new_width, new_height)) + end_time = time.clock() + resize_time = (end_time - start_time) * 1000.0 + print("Time taken to resize image is {}ms".format( + resize_time + )) + total_time += resize_time + + # run test + # LATER: Compatibility with the previous implementations + start_time = time.clock() + + # Run the network to get the scoremap (the valid region only) + scoremap = None + if self.config.test_kp_use_tensorflow: + scoremap = self.network.test( + self.config.subtask, + image.reshape(1, new_height, new_width, 1) + ).squeeze() + else: + # OpenCV Version + raise NotImplementedError( + "TODO: Implement OpenCV Version") + + end_time = time.clock() + compute_time = (end_time - start_time) * 1000.0 + print("Time taken for image size {}" + " is {} milliseconds".format( + image.shape, compute_time)) + + total_time += compute_time + + # pad invalid regions and add to list + start_time = time.clock() + test_res_list.append( + np.pad(scoremap, int((self.config.kp_filter_size - 1) / 2), + mode='constant', + constant_values=-np.inf) + ) + end_time = time.clock() + pad_time = (end_time - start_time) * 1000.0 + print("Time taken for padding and stacking is {} ms".format( + pad_time + )) + total_time += pad_time + + # ------------------------------------------------------------------------ + # Non-max suppresion and draw. + + # The nonmax suppression implemented here is very very slow. Consider + # this as just a proof of concept implementation as of now. + + # Standard nearby : nonmax will check approximately the same area as + # descriptor support region. + nearby = int(np.round( + (0.5 * (self.config.kp_input_size - 1.0) * + float(self.config.desc_input_size) / + float(get_patch_size(self.config))) + )) + fNearbyRatio = self.config.test_nearby_ratio + # Multiply by quarter to compensate + fNearbyRatio *= 0.25 + nearby = int(np.round(nearby * fNearbyRatio)) + nearby = max(nearby, 1) + + nms_intv = self.config.test_nms_intv + edge_th = self.config.test_edge_th + + print("Performing NMS") + start_time = time.clock() + res_list = test_res_list + XYZS = get_XYZS_from_res_list( + res_list, resize_to_test, scales_to_test, nearby, edge_th, + scl_intv, nms_intv, do_interpolation=True, + ) + end_time = time.clock() + XYZS = XYZS[:self.config.test_num_keypoint] + + # For debugging + # TODO: Remove below + draw_XYZS_to_img(XYZS, image_color, self.config.test_out_file + '.jpg') + + nms_time = (end_time - start_time) * 1000.0 + print("NMS time is {} ms".format(nms_time)) + total_time += nms_time + print("Total time for detection is {} ms".format(total_time)) + # if bPrintTime: + # # Also print to a file by appending + # with open("../timing-code/timing.txt", "a") as timing_file: + # print("------ Keypoint Timing ------\n" + # "NMS time is {} ms\n" + # "Total time is {} ms\n".format( + # nms_time, total_time + # ), + # file=timing_file) + + # # resize score to original image size + # res_list = [cv2.resize(score, + # (image_width, image_height), + # interpolation=cv2.INTER_NEAREST) + # for score in test_res_list] + # # make as np array + # res_scores = np.asarray(res_list) + # with h5py.File('test/scores.h5', 'w') as score_file: + # score_file['score'] = res_scores + + # ------------------------------------------------------------------------ + # Save as keypoint file to be used by the oxford thing + print("Turning into kp_list") + kp_list = XYZS2kpList(XYZS) # note that this is already sorted + + # ------------------------------------------------------------------------ + # LATER: take care of the orientations somehow... + # # Also compute angles with the SIFT method, since the keypoint + # # component alone has no orientations. + # print("Recomputing Orientations") + # new_kp_list, _ = recomputeOrientation(image_gray, kp_list, + # bSingleOrientation=True) + + print("Saving to txt") + saveKpListToTxt(kp_list, None, self.config.test_out_file) + + def _compute_ori(self): + """Compute Orientations """ + + total_time = 0.0 + + # Read image + start_time = time.clock() + cur_data = self.dataset.load_data() + end_time = time.clock() + load_time = (end_time - start_time) * 1000.0 + print("Time taken to load patches is {} ms".format( + load_time + )) + total_time += load_time + + # ------------------------------------------------------------------------- + # Test using the test function + start_time = time.clock() + oris = self._test_multibatch(cur_data) + end_time = time.clock() + compute_time = (end_time - start_time) * 1000.0 + print("Time taken to compute is {} ms".format( + compute_time + )) + total_time += compute_time + + # update keypoints and save as new + start_time = time.clock() + kps = cur_data["kps"] + for idxkp in xrange(len(kps)): + kps[idxkp][IDX_ANGLE] = oris[idxkp] * 180.0 / np.pi % 360.0 + kps[idxkp] = update_affine(kps[idxkp]) + end_time = time.clock() + update_time = (end_time - start_time) * 1000.0 + print("Time taken to update is {} ms".format( + update_time + )) + total_time += update_time + print("Total time for orientation is {} ms".format(total_time)) + + # save as new keypoints + saveKpListToTxt( + kps, self.config.test_kp_file, self.config.test_out_file) + + def _compute_desc(self): + """Compute Descriptors """ + + total_time = 0.0 + + # Read image + start_time = time.clock() + cur_data = self.dataset.load_data() + end_time = time.clock() + load_time = (end_time - start_time) * 1000.0 + print("Time taken to load patches is {} ms".format( + load_time + )) + total_time += load_time + + # ------------------------------------------------------------------------- + # Test using the test function + start_time = time.clock() + descs = self._test_multibatch(cur_data) + end_time = time.clock() + compute_time = (end_time - start_time) * 1000.0 + print("Time taken to compute is {} ms".format( + compute_time + )) + total_time += compute_time + print("Total time for descriptor is {} ms".format(total_time)) + + # save as h5 file + save_dict = {} + save_dict['keypoints'] = cur_data["kps"] + save_dict['descriptors'] = descs + + saveh5(save_dict, self.config.test_out_file) + + def _test_multibatch(self, cur_data): + """A sub test routine. + + We do this since the spatial transformer implementation in tensorflow + does not like undetermined batch sizes. + + LATER: Bypass the spatial transformer...somehow + LATER: Fix the multibatch testing + + """ + + subtask = self.config.subtask + batch_size = self.config.batch_size + num_patch = len(cur_data["patch"]) + num_batch = int(np.ceil(float(num_patch) / float(batch_size))) + # Initialize the batch items + cur_batch = {} + for _key in cur_data: + cur_batch[_key] = np.zeros_like(cur_data[_key][:batch_size]) + + # Do muiltiple times + res = [] + for _idx_batch in xrange(num_batch): + # start of the batch + bs = _idx_batch * batch_size + # end of the batch + be = min(num_patch, (_idx_batch + 1) * batch_size) + # number of elements in batch + bn = be - bs + for _key in cur_data: + cur_batch[_key][:bn] = cur_data[_key][bs:be] + cur_res = self.network.test(subtask, cur_batch).squeeze()[:bn] + # Append + res.append(cur_res) + + return np.concatenate(res, axis=0) + +# +# tester.py ends here diff --git a/trainer.py b/trainer.py new file mode 100644 index 0000000..f5f6d37 --- /dev/null +++ b/trainer.py @@ -0,0 +1,250 @@ +# trainer.py --- +# +# Filename: trainer.py +# Description: +# Author: Kwang Moo Yi, Lin Chen +# Maintainer: Kwang Moo Yi +# Created: ??? +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# Tue 27 Jun 15:04:34 CEST 2017, Kwang Moo Yi +# +# - Removed comments. +# - Renamed Dataset import +# +# + +# Code: + + +import os + +import numpy as np +import tensorflow as tf +from tqdm import trange + +from datasets.lift import Dataset +from networks.lift import Network +from six.moves import xrange +from utils import get_hard_batch, restore_network, save_network + + +class Trainer(object): + """The Trainer Class + + LATER: Remove all unecessary "dictionarization" + + """ + + def __init__(self, config, rng): + self.config = config + self.rng = rng + + # Open a tensorflow session. I like keeping things simple, so I don't + # use a supervisor. I'm just going to do everything manually. I also + # will just allow the gpu memory to grow + tfconfig = tf.ConfigProto() + if self.config.usage > 0: + tfconfig.gpu_options.allow_growth = False + tfconfig.gpu_options.per_process_gpu_memory_fraction = \ + self.config.usage + else: + tfconfig.gpu_options.allow_growth = True + self.sess = tf.Session(config=tfconfig) + + # Create the dataset instance + self.dataset = Dataset(self.config, rng) + # import IPython + # IPython.embed() + # Create the model instance + self.network = Network(self.sess, self.config, self.dataset) + # Make individual saver instances and summary writers for each module + self.saver = {} + self.summary_writer = {} + self.best_val_loss = {} + self.best_step = {} + # Saver (only if there are params) + for _key in self.network.allparams: + if len(self.network.allparams[_key]) > 0: + with tf.variable_scope("saver-{}".format(_key)): + self.saver[_key] = tf.train.Saver( + self.network.allparams[_key]) + # Summary Writer + self.summary_writer[self.config.subtask] = tf.summary.FileWriter( + os.path.join(self.config.logdir, self.config.subtask), + graph=self.sess.graph) + # validation loss + self.best_val_loss[self.config.subtask] = np.inf + # step for each module + self.best_step[self.config.subtask] = 0 + + # We have everything ready. We finalize and initialie the network here. + self.sess.run(tf.global_variables_initializer()) + + # Enable augmentations and/or force the use of the augmented set + self.use_aug_rot = 0 + if self.config.augment_rotations: + self.use_aug_rot = 1 + elif self.config.use_augmented_set: + self.use_aug_rot = -1 + + def run(self): + # For each module, check we have pre-trained modules and load them + print("-------------------------------------------------") + print(" Looking for previous results ") + print("-------------------------------------------------") + for _key in ["kp", "ori", "desc", "joint"]: + restore_network(self, _key) + + print("-------------------------------------------------") + print(" Training ") + print("-------------------------------------------------") + + subtask = self.config.subtask + batch_size = self.config.batch_size + for step in trange(int(self.best_step[subtask]), + int(self.config.max_step), + desc="Subtask = {}".format(subtask), + ncols=self.config.tqdm_width): + # ---------------------------------------- + # Forward pass: Note that we only compute the loss in the forward + # pass. We don't do summary writing or saving + fw_data = [] + fw_loss = [] + batches = self.hardmine_scheduler(self.config, step) + for num_cur in batches: + cur_data = self.dataset.next_batch( + task="train", + subtask=subtask, + batch_size=num_cur, + aug_rot=self.use_aug_rot) + cur_loss = self.network.forward(subtask, cur_data) + # Sanity check + if min(cur_loss) < 0: + raise RuntimeError('Negative loss while mining?') + # Data may contain empty (zero-value) samples: set loss to zero + if num_cur < batch_size: + cur_loss[num_cur - batch_size:] = 0 + fw_data.append(cur_data) + fw_loss.append(cur_loss) + # Fill a single batch with hardest + if len(batches) > 1: + cur_data = get_hard_batch(fw_loss, fw_data) + # ---------------------------------------- + # Backward pass: Note that the backward pass returns summary only + # when it is asked. Also, we manually keep note of step here, and + # not use the tensorflow version. This is to simplify the migration + # to another framework, if needed. + do_validation = step % self.config.validation_interval == 0 + cur_summary = self.network.backward( + subtask, cur_data, provide_summary=do_validation) + if do_validation and cur_summary is not None: + # Make sure we have the summary data + assert cur_summary is not None + # Write training summary + self.summary_writer[subtask].add_summary(cur_summary, step) + # Do multiple rounds of validation + cur_val_loss = np.zeros(self.config.validation_rounds) + for _val_round in xrange(self.config.validation_rounds): + # Fetch validation data + cur_data = self.dataset.next_batch( + task="valid", + subtask=subtask, + batch_size=batch_size, + aug_rot=self.use_aug_rot) + # Perform validation of the model using validation data + cur_val_loss[_val_round] = self.network.validate( + subtask, cur_data) + cur_val_loss = np.mean(cur_val_loss) + # Inject validation result to summary + summaries = [ + tf.Summary.Value( + tag="validation/err-{}".format(subtask), + simple_value=cur_val_loss, + ) + ] + self.summary_writer[subtask].add_summary( + tf.Summary(value=summaries), step) + # Flush the writer + self.summary_writer[subtask].flush() + + # TODO: Repeat without augmentation if necessary + # ... + + if cur_val_loss < self.best_val_loss[subtask]: + self.best_val_loss[subtask] = cur_val_loss + self.best_step[subtask] = step + save_network(self, subtask) + + def hardmine_scheduler(self, config, step, recursion=True): + """The hard mining scheduler. + + Modes ("--mining-sched"): + "none": no mining. + "step": increase one batch at a time. + "smooth": increase one sample at a time, filling the rest of the + batch with zeros if necessary. + + Returns a list with the number of samples for every batch. + """ + + sched = config.mining_sched + if sched == 'none': + return [config.batch_size] + elif sched not in ['step', 'smooth']: + raise RuntimeError('Unknown scheduler') + + # Nothing to do if mining_step is not defined + if config.mining_step <= 0: + return [config.batch_size] + + max_batches = config.mining_ceil if config.mining_ceil > 0 else 32 + num_samples = int(round(config.batch_size * + (config.mining_base + step / config.mining_step))) + if num_samples > max_batches * config.batch_size: + # Limit has been reached + batches = [config.batch_size] * max_batches + else: + batches = [config.batch_size] * int(num_samples // config.batch_size) + # Do the remainder on the last batch + remainder = num_samples % config.batch_size + if remainder > 0: + # 'smooth': add remainder to the last batch + if sched == 'smooth': + batches[-1] += remainder + # 'step': add a full batch when the remainder goes above 50% + elif sched == 'step' and remainder >= config.batch_size / 2: + batches += [config.batch_size] + + # Feedback + if recursion and step > 0: + prev = self.hardmine_scheduler(config, step - 1, recursion=False) + if sum(prev) < sum(batches): + print(('\n[{}] Mining: increasing number of samples: ' + + '{} -> {} (batches: {} -> {}, last batch size: {})').format( + config.subtask, + sum(prev), + sum(batches), + len(prev), + len(batches), + batches[-1])) + + return batches + +# +# trainer.py ends here diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..0500b71 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,45 @@ +# __init__.py --- +# +# Filename: __init__.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 15:33:06 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +from utils.config import * +from utils.dump import * +from utils.kp_tools import * +from utils.layer import * +from utils.network import * +from utils.nms import * +from utils.test import * +from utils.tf import * +from utils.train import * + +# Legacy module is not loaded automatically +# from utils.legacy import * + +# +# __init__.py ends here diff --git a/utils/config.py b/utils/config.py new file mode 100644 index 0000000..4f0a836 --- /dev/null +++ b/utils/config.py @@ -0,0 +1,68 @@ +# config.py --- +# +# Filename: config.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 15:39:25 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import numpy as np + + +def get_ratio_scale(config): + """Calculate ratio_scale from other configs""" + + kp_input_size = config.kp_input_size + kp_base_scale = config.kp_base_scale + ratio_scale = (float(kp_input_size) / 2.0) / kp_base_scale + + return ratio_scale + + +def get_patch_size_no_aug(config): + """Determine large patch size without rotation augmentations""" + + desc_input_size = config.desc_input_size + desc_support_ratio = config.desc_support_ratio + + ratio_scale = get_ratio_scale(config) + patch_size = np.round( + float(desc_input_size) * ratio_scale / desc_support_ratio) + + return patch_size + + +def get_patch_size(config): + """Get the large patch size from other configs""" + + patch_size = get_patch_size_no_aug(config) + if config.use_augmented_set: + patch_size = np.ceil(np.sqrt(2) * patch_size) + + return patch_size + + +# +# config.py ends here diff --git a/utils/dump.py b/utils/dump.py new file mode 100644 index 0000000..bcc48e4 --- /dev/null +++ b/utils/dump.py @@ -0,0 +1,199 @@ +# dump.py --- +# +# Filename: dump.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 15:36:36 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import os + +import h5py +import tensorflow as tf + +from utils.legacy import load_legacy_network + +# Some constant strings +best_val_loss_filename = "best_val_loss.h5" +best_step_filename = "step.h5" + + +def saveh5(dict_to_dump, dump_file_full_name): + ''' Saves a dictionary as h5 file ''' + + with h5py.File(dump_file_full_name, 'w') as h5file: + if isinstance(dict_to_dump, list): + for i, d in enumerate(dict_to_dump): + newdict = {'dict' + str(i): d} + writeh5(newdict, h5file) + else: + writeh5(dict_to_dump, h5file) + + +def writeh5(dict_to_dump, h5node): + ''' Recursive function to write dictionary to h5 nodes ''' + + for _key in dict_to_dump.keys(): + if isinstance(dict_to_dump[_key], dict): + h5node.create_group(_key) + cur_grp = h5node[_key] + writeh5(dict_to_dump[_key], cur_grp) + else: + h5node[_key] = dict_to_dump[_key] + + +def loadh5(dump_file_full_name): + ''' Loads a h5 file as dictionary ''' + + with h5py.File(dump_file_full_name, 'r') as h5file: + dict_from_file = readh5(h5file) + + return dict_from_file + + +def readh5(h5node): + ''' Recursive function to read h5 nodes as dictionary ''' + + dict_from_file = {} + for _key in h5node.keys(): + if isinstance(h5node[_key], h5py._hl.group.Group): + dict_from_file[_key] = readh5(h5node[_key]) + else: + dict_from_file[_key] = h5node[_key].value + + return dict_from_file + + +def save_network(supervisor, subtask, verbose=True): + """Save the current training status""" + + # Skip if there's no saver of this subtask + if subtask not in supervisor.saver: + return + + cur_logdir = os.path.join(supervisor.config.logdir, subtask) + # Create save directory if it does not exist + if verbose: + print("") + print("[{}] Checking if save directory exists in {}" + "".format(subtask, cur_logdir)) + if not os.path.exists(cur_logdir): + os.makedirs(cur_logdir) + # Save the model + supervisor.saver[subtask].save( + supervisor.sess, + os.path.join(cur_logdir, "network") + ) + if verbose: + print("[{}] Saved model at {}" + "".format(subtask, cur_logdir)) + # Save mean std + saveh5(supervisor.network.mean, os.path.join(cur_logdir, "mean.h5")) + saveh5(supervisor.network.std, os.path.join(cur_logdir, "std.h5")) + if verbose: + print("[{}] Saved input normalization at {}" + "".format(subtask, cur_logdir)) + # Save the validation loss + saveh5({subtask: supervisor.best_val_loss[subtask]}, + os.path.join(cur_logdir, best_val_loss_filename)) + if verbose: + print("[{}] Saved best validation at {}" + "".format(subtask, cur_logdir)) + # Save the step + saveh5({subtask: supervisor.best_step[subtask]}, + os.path.join(cur_logdir, best_step_filename)) + if verbose: + print("[{}] Saved best step at {}" + "".format(subtask, cur_logdir)) + + # We also save the network metadata here + supervisor.saver[subtask].export_meta_graph( + os.path.join(cur_logdir, "network.meta")) + + +def restore_network(supervisor, subtask): + """Restore training status""" + + # Skip if there's no saver of this subtask + if subtask not in supervisor.saver: + return False + + is_loaded = False + + # Check if pretrain weight file is specified + predir = getattr(supervisor.config, "pretrained_{}".format(subtask)) + # Try loading the old weights + is_loaded += load_legacy_network(supervisor, subtask, predir) + # Try loading the tensorflow weights + is_loaded += load_network(supervisor, subtask, predir) + + # Load network using tensorflow saver + logdir = os.path.join(supervisor.config.logdir, subtask) + is_loaded += load_network(supervisor, subtask, logdir) + + return is_loaded + + +def load_network(supervisor, subtask, load_dir): + """Load function for our new framework""" + + print("[{}] Checking if previous Tensorflow run exists in {}" + "".format(subtask, load_dir)) + latest_checkpoint = tf.train.latest_checkpoint(load_dir) + if latest_checkpoint is not None: + # Load parameters + supervisor.saver[subtask].restore( + supervisor.sess, + latest_checkpoint + ) + print("[{}] Loaded previously trained weights".format(subtask)) + # Save mean std (falls back to default if non-existent) + if os.path.exists(os.path.join(load_dir, "mean.h5")): + supervisor.network.mean = loadh5(os.path.join(load_dir, "mean.h5")) + supervisor.network.std = loadh5(os.path.join(load_dir, "std.h5")) + print("[{}] Loaded input normalizers".format(subtask)) + # Load best validation result + supervisor.best_val_loss[subtask] = loadh5( + os.path.join(load_dir, best_val_loss_filename) + )[subtask] + print("[{}] Loaded best validation result = {}".format( + subtask, supervisor.best_val_loss[subtask])) + # Load best validation result + supervisor.best_step[subtask] = loadh5( + os.path.join(load_dir, best_step_filename) + )[subtask] + print("[{}] Loaded best step = {}".format( + subtask, supervisor.best_step[subtask])) + + return True + + else: + print("[{}] No previous Tensorflow result".format(subtask)) + + return False + + +# +# dump.py ends here diff --git a/utils/kp_tools.py b/utils/kp_tools.py new file mode 100644 index 0000000..0f92163 --- /dev/null +++ b/utils/kp_tools.py @@ -0,0 +1,682 @@ +# kp_tools.py --- +# +# Filename: kp_tools.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Sat Feb 20 10:11:28 2016 (+0100) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + + +from __future__ import print_function + +import cv2 +import numpy as np +import scipy.ndimage +from scipy.linalg import lu_factor, lu_solve + +from six.moves import xrange + +# Keypoint List Structure Index Info +IDX_X, IDX_Y, IDX_SIZE, IDX_ANGLE, IDX_RESPONSE, IDX_OCTAVE = ( + 0, 1, 2, 3, 4, 5) # , IDX_CLASSID not used +IDX_a, IDX_b, IDX_c = (6, 7, 8) +# NOTE the row-major colon-major adaptation here +IDX_A0, IDX_A2, IDX_A1, IDX_A3 = (9, 10, 11, 12) +# # IDX_CLASSID for KAZE +# IDX_CLASSID = 13 + +KP_LIST_LEN = 13 + +# Note that the element in the IDX_SIZE field will be scale, not the opencv +# keypoint's size. As open cv uses diameters, we need to multiply this with two +# to get the opencv size. + +# ------------------------------------------------------------------------ +# Functions for Swapping between Opencv Kp and Our Kp List +# ------------------------------------------------------------------------ + + +def kp_list_2_opencv_kp_list(kp_list): + + opencv_kp_list = [] + for kp in kp_list: + opencv_kp = cv2.KeyPoint(x=kp[IDX_X], + y=kp[IDX_Y], + _size=kp[IDX_SIZE] * 2.0, + _angle=kp[IDX_ANGLE], + _response=kp[IDX_RESPONSE], + _octave=np.int32(kp[IDX_OCTAVE]), + # _class_id=np.int32(kp[IDX_CLASSID]) + ) + opencv_kp_list += [opencv_kp] + + return opencv_kp_list + + +def opencv_kp_list_2_kp_list(opencv_kp_list): + + # IMPORTANT: make sure this part corresponds to the one in + # loadKpListFromTxt + + kp_list = [] + for opencv_kp in opencv_kp_list: + kp = np.zeros((KP_LIST_LEN, )) + kp[IDX_X] = opencv_kp.pt[0] + kp[IDX_Y] = opencv_kp.pt[1] + kp[IDX_SIZE] = opencv_kp.size * 0.5 + kp[IDX_ANGLE] = opencv_kp.angle + kp[IDX_RESPONSE] = opencv_kp.response + kp[IDX_OCTAVE] = opencv_kp.octave + # Compute a,b,c for vgg affine + kp[IDX_a] = 1. / (kp[IDX_SIZE]**2) + kp[IDX_b] = 0. + kp[IDX_c] = 1. / (kp[IDX_SIZE]**2) + # Compute A0, A1, A2, A3 and update + kp = update_affine(kp) + # kp[IDX_CLASSID] = opencv_kp.class_id + + kp_list += [kp] + + return kp_list + + +def rescale_circ_kp(kp, fRescale): + + kp[IDX_SIZE] *= fRescale + + return update_affine(update_abc_from_scale(kp)) + + +def update_abc_from_scale(kp): + + kp[IDX_a] = 1. / (kp[IDX_SIZE]**2) + kp[IDX_b] = 0. + kp[IDX_c] = 1. / (kp[IDX_SIZE]**2) + + return kp + + +def update_affine(kp): + """Returns an updated version of the keypoint. + + Note + ---- + This function should be applied only to individual keypoints, not a list. + + """ + + # Compute A0, A1, A2, A3 + S = np.asarray([[kp[IDX_a], kp[IDX_b]], [kp[IDX_b], kp[IDX_c]]]) + invS = np.linalg.inv(S) + a = np.sqrt(invS[0, 0]) + b = invS[0, 1] / max(a, 1e-18) + A = np.asarray([[a, 0], [b, np.sqrt(max(invS[1, 1] - b**2, 0))]]) + + # We need to rotate first! + cos_val = np.cos(np.deg2rad(kp[IDX_ANGLE])) + sin_val = np.sin(np.deg2rad(kp[IDX_ANGLE])) + R = np.asarray([[cos_val, -sin_val], [sin_val, cos_val]]) + + A = np.dot(A, R) + + kp[IDX_A0] = A[0, 0] + kp[IDX_A1] = A[0, 1] + kp[IDX_A2] = A[1, 0] + kp[IDX_A3] = A[1, 1] + + return kp + + +# ------------------------------------------------------------------------ +# Keypoint I/O operations +# ------------------------------------------------------------------------ + +def writeFeature2OxFile(file_name, kp_list, desc_list): + + # kp_list : len(kp_list) == number of keypoints, each element is a + # opencv Keypoint object + # desc_list : kp_list.shape == [number of keypoints, dimension], + # descriptors for each keypoint + + # pt, scale, orientation, desc): + # write the features in the vlfeat format + + assert len(kp_list) == desc_list.shape[0] + + fp = open(file_name, 'w') # open the file object + + # write the headings + # dimension of the descriptor (SIFT is 128) + fp.write('%d\n' % (desc_list.shape[1])) + # number of keypoints to be evaluated + fp.write('%d\n' % (desc_list.shape[0])) + + # write the keypoint information + for idxKp in xrange(len(kp_list)): + u = kp_list[idxKp][IDX_X] + v = kp_list[idxKp][IDX_Y] + a = kp_list[idxKp][IDX_a] + b = kp_list[idxKp][IDX_b] + c = kp_list[idxKp][IDX_c] + desc = desc_list[idxKp, :] + + # turn each kp into vlfeat format + writestr = '%f %f %f %f %f' % (u, v, a, b, + c) + " " + " ".join(desc.astype(str)) + + fp.write(writestr + '\n') + + fp.close() # close the file object + + +def loadKpListFromTxt(kp_file_name): + + # Open keypoint file for read + kp_file = open(kp_file_name, 'rb') + + # skip the first two lines + kp_line = kp_file.readline() + kp_line = kp_file.readline() + + kp_list = [] + num_elem = -1 + while True: + # read a line from file + kp_line = kp_file.readline() + # check EOF + if not kp_line: + break + # split read information + kp_info = kp_line.split() + parsed_kp_info = [] + for idx in xrange(len(kp_info)): + parsed_kp_info += [float(kp_info[idx])] + parsed_kp_info = np.asarray(parsed_kp_info) + + if num_elem == -1: + num_elem = len(parsed_kp_info) + else: + assert num_elem == len(parsed_kp_info) + + # IMPORTANT: make sure this part corresponds to the one in + # opencv_kp_list_2_kp_list + + # check if we have all the kp list info + if len(parsed_kp_info) == 6: # if we only have opencv info + # Compute a,b,c for vgg affine + a = 1. / (parsed_kp_info[IDX_SIZE]**2) + b = 0. + c = 1. / (parsed_kp_info[IDX_SIZE]**2) + parsed_kp_info = np.concatenate((parsed_kp_info, [a, b, c])) + + if len(parsed_kp_info) == 9: # if we don't have the Affine warp + parsed_kp_info = np.concatenate((parsed_kp_info, np.zeros((4, )))) + parsed_kp_info = update_affine(parsed_kp_info) + + # if len(parsed_kp_info) == 13: + # # add dummy class id + # parsed_kp_info = np.concatenate((parsed_kp_info, [0])) + + # make sure we have everything! + assert len(parsed_kp_info) == KP_LIST_LEN + + kp_list += [parsed_kp_info] + + # Close keypoint file + kp_file.close() + + return kp_list + + +def saveKpListToTxt(kp_list, orig_kp_file_name, kp_file_name): + + # first line KP_LIST_LEN to indicate we have the full + kp_line = str(KP_LIST_LEN) + '\n' + + # Open keypoint file for write + kp_file = open(kp_file_name, 'w') + + # write the first line + kp_file.write(kp_line) + + # write the number of kp in second line + kp_file.write('{}\n'.format(len(kp_list))) + + for kp in kp_list: + + # Make sure we have all info for kp + assert len(kp) == KP_LIST_LEN + + # Form the string to write + write_string = "" + for kp_elem, _i in zip(kp, range(len(kp))): + # if _i == IDX_OCTAVE or _i == IDX_CLASSID: + if _i == IDX_OCTAVE: # in case of the octave + write_string += str(np.int32(kp_elem)) + " " + else: + write_string += str(kp_elem) + " " + write_string += "\n" + + # Write the string + kp_file.write(write_string) + + # Close keypoint file + kp_file.close() + + +def XYZS2kpList(XYZS): + + kp_list = [None] * XYZS.shape[0] + for idxKp in xrange(XYZS.shape[0]): + + kp = np.zeros((KP_LIST_LEN, )) + kp[IDX_X] = XYZS[idxKp, 0] + kp[IDX_Y] = XYZS[idxKp, 1] + kp[IDX_SIZE] = XYZS[idxKp, 2] + kp[IDX_ANGLE] = 0 + kp[IDX_RESPONSE] = XYZS[idxKp, 3] + + # computing the octave should be dealt with caution. We compute in the + # SIFT way. The SIFT code of openCV computes scale in the following + # way. + # >>> scale = 1.6 * 2**((layer+xi) / 3) * 2**octave + # where octave is packed by octave = layer << 8 + octv + layer_n_octv = np.log2(kp[IDX_SIZE] / 1.6) + layer_n_octv = max(0, layer_n_octv) # TODO: FOR NOW LET'S JUST DO THIS + octv = int(np.floor(layer_n_octv)) + layer_n_xi = (layer_n_octv - np.floor(layer_n_octv)) * 3.0 + layer = int(np.floor(layer_n_xi)) + xi = layer_n_xi - np.floor(layer_n_xi) + # make sure we have the layer correctly by checking that it is smaller + # than 3 + assert layer < 3 + # pack octave in the same way as openCV + # kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16); + # also remember + # octave = octave < 128 ? octave : (-128 | octave); + # which means + # kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255) + # so if octave is negative & 255 will give otcv >= 128 later... + octv = octv & 255 + octave = octv + (layer << 8) + (int(np.round((xi + 0.5) * 255.)) << 16) + kp[IDX_OCTAVE] = octave + + # Compute a,b,c for vgg affine + kp[IDX_a] = 1. / (kp[IDX_SIZE]**2) + kp[IDX_b] = 0. + kp[IDX_c] = 1. / (kp[IDX_SIZE]**2) + # Compute A0, A1, A2, A3 and update + kp = update_affine(kp) + # kp[IDX_CLASSID] = 0 + + kp_list[idxKp] = kp + + return kp_list + + +# ------------------------------------------------------------------------ +# NonMaxSuppresion related +# ------------------------------------------------------------------------ + + +def get_XYZS_from_res_list(res_list, resize_to_test, scales_to_test, nearby=1, + edge_th=0, scl_intv=2, nms_intv=1, + do_interpolation=False, fScaleEdgeness=0.0): + # NMS + nms_res = nonMaxSuppression(res_list, nearby=nearby, + scl_intv=scl_intv, nms_intv=nms_intv) + + XYZS = get_subpixel_XYZS(res_list, nms_res, resize_to_test, + scales_to_test, edge_th, do_interpolation, + fScaleEdgeness) + # sort by score + XYZS = XYZS[np.argsort(XYZS[:, 3])[::-1]] + + return XYZS + + +def get_subpixel_XYZS(score_list, nms_list, resize_to_test, + scales_to_test, edge_th, do_interpolation, + fScaleEdgeness): + + log_scales = np.log2(scales_to_test) + log_scale_step = ((np.max(log_scales) - np.min(log_scales)) / + (len(scales_to_test) - 1.0)) + + X = [()] * len(nms_list) + Y = [()] * len(nms_list) + Z = [()] * len(nms_list) + S = [()] * len(nms_list) + for idxScale in xrange(len(nms_list)): + nms = nms_list[idxScale] + + pts = np.where(nms) + if len(pts[0]) == 0: + continue + + assert idxScale > 0 and idxScale < len(nms_list) - 1 + + # compute ratio for coordinate conversion + fRatioUp = ( + (np.asarray(score_list[idxScale + 1].shape, dtype='float') - 1.0) / + (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0) + ).reshape([2, -1]) + fRatioDown = ( + (np.asarray(score_list[idxScale - 1].shape, dtype='float') - 1.0) / + (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0) + ).reshape([2, -1]) + + # the conversion function + def at(dx, dy, ds): + if not isinstance(dx, np.ndarray): + dx = np.ones(len(pts[0]),) * dx + if not isinstance(dy, np.ndarray): + dy = np.ones(len(pts[0]),) * dy + if not isinstance(ds, np.ndarray): + ds = np.ones(len(pts[0]),) * ds + new_pts = (pts[0] + dy, pts[1] + dx) + ds = np.round(ds).astype(int) + fRatio = ((ds == 0).reshape([1, -1]) * 1.0 + + (ds == -1).reshape([1, -1]) * fRatioDown + + (ds == 1).reshape([1, -1]) * fRatioUp) + assert np.max(ds) <= 1 and np.min(ds) >= -1 + new_pts = tuple([np.round(v * r).astype(int) + for v, r in zip(new_pts, fRatio)]) + scores_to_return = np.asarray([ + score_list[idxScale + _ds][_y, _x] + for _ds, _x, _y in zip( + ds, new_pts[1], new_pts[0] + ) + ]) + return scores_to_return + + # compute the gradient + Dx = 0.5 * (at(+1, 0, 0) - at(-1, 0, 0)) + Dy = 0.5 * (at(0, +1, 0) - at(0, -1, 0)) + Ds = 0.5 * (at(0, 0, +1) - at(0, 0, -1)) + + # compute the Hessian + Dxx = (at(+1, 0, 0) + at(-1, 0, 0) - 2.0 * at(0, 0, 0)) + Dyy = (at(0, +1, 0) + at(0, -1, 0) - 2.0 * at(0, 0, 0)) + Dss = (at(0, 0, +1) + at(0, 0, -1) - 2.0 * at(0, 0, 0)) + + Dxy = 0.25 * (at(+1, +1, 0) + at(-1, -1, 0) - + at(-1, +1, 0) - at(+1, -1, 0)) + Dxs = 0.25 * (at(+1, 0, +1) + at(-1, 0, -1) - + at(-1, 0, +1) - at(+1, 0, -1)) + Dys = 0.25 * (at(0, +1, +1) + at(0, -1, -1) - + at(0, -1, +1) - at(0, +1, -1)) + + # filter out all keypoints which we have inf + is_good = (np.isfinite(Dx) * np.isfinite(Dy) * np.isfinite(Ds) * + np.isfinite(Dxx) * np.isfinite(Dyy) * np.isfinite(Dss) * + np.isfinite(Dxy) * np.isfinite(Dxs) * np.isfinite(Dys)) + Dx = Dx[is_good] + Dy = Dy[is_good] + Ds = Ds[is_good] + Dxx = Dxx[is_good] + Dyy = Dyy[is_good] + Dss = Dss[is_good] + Dxy = Dxy[is_good] + Dxs = Dxs[is_good] + Dys = Dys[is_good] + pts = tuple([v[is_good] for v in pts]) + # check if empty + if len(pts[0]) == 0: + continue + + # filter out all keypoints which are on edges + if edge_th > 0: + + # # re-compute the Hessian + # Dxx = (at(b[:, 0] + 1, b[:, 1], b[:, 2]) + + # at(b[:, 0] - 1, b[:, 1], b[:, 2]) - + # 2.0 * at(b[:, 0], b[:, 1], b[:, 2])) + # Dyy = (at(b[:, 0], b[:, 1] + 1, b[:, 2]) + + # at(b[:, 0], b[:, 1] - 1, b[:, 2]) - + # 2.0 * at(b[:, 0], b[:, 1], b[:, 2])) + + # Dxy = 0.25 * (at(b[:, 0] + 1, b[:, 1] + 1, b[:, 2]) + + # at(b[:, 0] - 1, b[:, 1] - 1, b[:, 2]) - + # at(b[:, 0] - 1, b[:, 1] + 1, b[:, 2]) - + # at(b[:, 0] + 1, b[:, 1] - 1, b[:, 2])) + + # H = np.asarray([[Dxx, Dxy, Dxs], + # [Dxy, Dyy, Dys], + # [Dxs, Dys, Dss]]).transpose([2, 0, 1]) + + edge_score = (Dxx + Dyy) * (Dxx + Dyy) / (Dxx * Dyy - Dxy * Dxy) + is_good = ((edge_score >= 0) * + (edge_score < (edge_th + 1.0)**2 / edge_th)) + + if fScaleEdgeness > 0: + is_good = is_good * ( + abs(Dss) > fScaleEdgeness + ) + + Dx = Dx[is_good] + Dy = Dy[is_good] + Ds = Ds[is_good] + Dxx = Dxx[is_good] + Dyy = Dyy[is_good] + Dss = Dss[is_good] + Dxy = Dxy[is_good] + Dxs = Dxs[is_good] + Dys = Dys[is_good] + pts = tuple([v[is_good] for v in pts]) + # check if empty + if len(pts[0]) == 0: + continue + + b = np.zeros((len(pts[0]), 3)) + if do_interpolation: + # from VLFEAT + + # solve linear system + A = np.asarray([[Dxx, Dxy, Dxs], + [Dxy, Dyy, Dys], + [Dxs, Dys, Dss]]).transpose([2, 0, 1]) + + b = np.asarray([-Dx, -Dy, -Ds]).transpose([1, 0]) + + b_solved = np.zeros_like(b) + for idxPt in xrange(len(A)): + b_solved[idxPt] = lu_solve(lu_factor(A[idxPt]), b[idxPt]) + + b = b_solved + + # throw away the ones with bad subpixel localizatino + is_good = ((abs(b[:, 0]) < 1.5) * (abs(b[:, 1]) < 1.5) * + (abs(b[:, 2]) < 1.5)) + b = b[is_good] + pts = tuple([v[is_good] for v in pts]) + # check if empty + if len(pts[0]) == 0: + continue + + x = pts[1] + b[:, 0] + y = pts[0] + b[:, 1] + log_ds = b[:, 2] + + S[idxScale] = at(b[:, 0], b[:, 1], b[:, 2]) + X[idxScale] = x / resize_to_test[idxScale] + Y[idxScale] = y / resize_to_test[idxScale] + Z[idxScale] = scales_to_test[idxScale] * 2.0**(log_ds * log_scale_step) + + X = np.concatenate(X) + Y = np.concatenate(Y) + Z = np.concatenate(Z) + S = np.concatenate(S) + + XYZS = np.concatenate([X.reshape([-1, 1]), + Y.reshape([-1, 1]), + Z.reshape([-1, 1]), + S.reshape([-1, 1])], + axis=1) + + return XYZS + + +def nonMaxSuppression(score_img_or_list, nearby=1, scl_intv=2, nms_intv=1): + """ Performs Non Maximum Suppression. + + Parameters + ---------- + score_img_or_list: nparray or list + WRITEME + + nearby: int + Size of the neighborhood. + + scl_intv: int + How many levels we have between half scale. + + nms_intv: int + How many levels we use for scale space nms. + + """ + + filter_size = (nearby * 2 + 1,) * 2 + + if isinstance(score_img_or_list, list): + bis2d = False + else: + bis2d = True + + if bis2d: + score = score_img_or_list + # max score in region + max_score = scipy.ndimage.filters.maximum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + # second score in region + second_score = scipy.ndimage.filters.rank_filter( + score, -2, filter_size, mode='constant', cval=-np.inf + ) + # min score in region to check infs + min_score = scipy.ndimage.filters.minimum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + nonmax_mask_or_list = ((score == max_score) * + (max_score > second_score) * + np.isfinite(min_score)) + + else: + + max2d_list = [ + scipy.ndimage.filters.maximum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + for score in score_img_or_list + ] + + second2d_list = [ + scipy.ndimage.filters.rank_filter( + score, -2, filter_size, mode='constant', cval=-np.inf + ) + for score in score_img_or_list + ] + + min2d_list = [ + scipy.ndimage.filters.minimum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + for score in score_img_or_list + ] + + nonmax2d_list = [(score == max_score) * (max_score > second_score) * + np.isfinite(min_score) + for score, max_score, second_score, min_score in + zip(score_img_or_list, + max2d_list, + second2d_list, + min2d_list) + ] + + nonmax_mask_or_list = [None] * len(nonmax2d_list) + for idxScale in xrange(len(nonmax2d_list)): + + nonmax2d = nonmax2d_list[idxScale] + max2d = max2d_list[idxScale] + + # prep output + nonmax_mask = np.zeros_like(nonmax2d) + + # get coordinates of the local max positions of nonmax2d + coord2d_max = np.where(nonmax2d) + # print("2d nonmax = {} kp".format(len(coord2d_max[0]))) + + # range of other scales to look at + # scl_diffs = np.arange(-scl_intv, scl_intv + 1) + # scl_diffs = np.arange(-1, 1 + 1) + scl_diffs = np.arange(-nms_intv, nms_intv + 1) + scl_diffs = scl_diffs[scl_diffs != 0] + + # skip if we don't have the complete set + if (idxScale + np.min(scl_diffs) < 0 or + idxScale + np.max(scl_diffs) > len(nonmax2d_list) - 1): + continue + + # Test on multiple scales to see if it is scalespace local max + for scl_diff in scl_diffs: + + scl_to_compare = idxScale + scl_diff + + # look at the other scales max + max2d_other = max2d_list[scl_to_compare] + # compute ratio for coordinate conversion + fRatio \ + = (np.asarray(max2d_other.shape, dtype='float') - 1.0) \ + / (np.asarray(nonmax2d.shape, dtype='float') - 1.0) + # get indices for lower layer + coord_other = tuple([np.round(v * r).astype(int) + for v, r in zip(coord2d_max, fRatio)]) + # find good points which should survive + idxGood = np.where((max2d[coord2d_max] > + max2d_other[coord_other]) * + np.isfinite(max2d_other[coord_other]) + ) + + # copy only the ones that are good + coord2d_max = tuple([v[idxGood] for v in coord2d_max]) + + # mark surviving points + nonmax_mask[coord2d_max] = 1.0 + + # special case when we are asked with single item in list + if len(nonmax2d_list) == 1: + # get coordinates of the local max positions of nonmax2d + coord2d_max = np.where(nonmax2d) + # mark surviving points + nonmax_mask[coord2d_max] = 1.0 + + # add to list + nonmax_mask_or_list[idxScale] = nonmax_mask + + return nonmax_mask_or_list + + +# +# kp_tools.py ends here diff --git a/utils/layer.py b/utils/layer.py new file mode 100644 index 0000000..b864308 --- /dev/null +++ b/utils/layer.py @@ -0,0 +1,85 @@ +# layer.py --- +# +# Filename: layer.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 15:37:55 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import numpy as np +import tensorflow as tf + + +def get_W_b_conv2d(ksize, fanin, fanout): + W = tf.get_variable( + name="weights", + shape=[ksize, ksize, fanin, fanout], + # initializer=tf.random_normal_initializer(2e-2), + initializer=tf.truncated_normal_initializer( + stddev=2.0 / (ksize * ksize * fanin)), + ) + b = tf.get_variable( + name="biases", + shape=[fanout], + initializer=tf.constant_initializer(0.0), + ) + + return W, b + + +def get_W_b_fc(fanin, fanout): + W = tf.get_variable( + name="weights", + shape=[fanin, fanout], + # initializer=tf.random_normal_initializer(2e-2), + initializer=tf.truncated_normal_initializer(stddev=2.0 / fanin), + ) + b = tf.get_variable( + name="biases", + shape=[fanout], + initializer=tf.constant_initializer(0.0), + ) + return W, b + + +def softmax(val, axis, softmax_strength): + ''' Soft max function used for cost function ''' + + softmax_strength = np.cast["float32"](softmax_strength) + + if softmax_strength < 0: + res_after_max = tf.reduce_max(val, axis=axis) + else: + res_after_max = np.cast["float32"](1.0) / softmax_strength \ + * tf.log(tf.reduce_mean(tf.exp( + softmax_strength * ( + val - tf.reduce_max(val, axis=axis, keep_dims=True) + )), axis=axis)) + tf.reduce_max(val, axis=axis) + + return res_after_max + + +# +# layer.py ends here diff --git a/utils/legacy.py b/utils/legacy.py new file mode 100644 index 0000000..17f4651 --- /dev/null +++ b/utils/legacy.py @@ -0,0 +1,183 @@ +# legacy.py --- +# +# Filename: legacy.py +# Description: Functions related to using the old framework +# Author: Kwang Moo Yi +# Maintainer: +# Created: Tue Jul 11 14:52:25 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import os + +import tensorflow as tf + + +def build_legacy(network): + """Builds the tensorflow ops related to loading legacy weights. + + Note that implementation for this function is **intentionally** left out + from the original class as it is only a hack to allow old weights to be + loaded into tensorflow. + + """ + + # Lazy import to prevent import issues + from utils.tf import get_tensor_shape + + # Check the current subtask + subtask = network.config.subtask + + # Mapping from the old framework variable name to the new tensorflow + # variable names + name_map = {} + if subtask == "kp" or subtask == "joint": + # Load the keypoint only when necessary + name_map["kp-c0"] = "network/lift/kp/conv-ghh-1" + if subtask == "ori" or subtask == "joint": + # Load the orientation only when necessary + name_map["ori-c0"] = "network/lift/ori/conv-act-pool-1" + name_map["ori-c1"] = "network/lift/ori/conv-act-pool-2" + name_map["ori-c2"] = "network/lift/ori/conv-act-pool-3" + name_map["ori-f3"] = "network/lift/ori/fc-ghh-drop-4" + name_map["ori-f4"] = "network/lift/ori/fc-ghh-5" + if subtask != "kp": + # Load the descriptor only when necessary + name_map["desc-1-conv"] = "network/lift/desc/conv-act-pool-norm-1" + name_map["desc-5-conv"] = "network/lift/desc/conv-act-pool-norm-2" + name_map["desc-9-conv"] = "network/lift/desc/conv-act-pool-3" + + # # Save name_map + # network.legacy_name_map = name_map + + # Build placeholders from the original variable shapes and create assign + # ops related to it + network.legacy_weights_in = {} + network.legacy_assign_op = {} + # _old: variable scope name for the old framework + # _new: variable scope name for the new framework + for _old in name_map: + _new = name_map[_old] + for _param_name in ["/weights", "/biases"]: + with tf.variable_scope("", reuse=True): + cur_param = tf.get_variable(_new + _param_name) + network.legacy_weights_in[_new + _param_name] = tf.placeholder( + tf.float32, + get_tensor_shape(cur_param), + ) + network.legacy_assign_op[_new + _param_name] = tf.assign( + cur_param, + network.legacy_weights_in[_new + _param_name], + ) + + # Create function attribute that actually runs the loader with the session + if not hasattr(network, "legacy_load_func"): + network.legacy_load_func = {} + + def legacy_load_func(sess, model): + # Create a feed_dict + feed_dict = create_feed_dict( + model, network.legacy_weights_in, name_map) + # Run all assign ops within the session + sess.run(network.legacy_assign_op, feed_dict=feed_dict) + + network.legacy_load_func[subtask] = legacy_load_func + + +def create_feed_dict(model, placeholders, name_map): + """Creates a feed dict to use for the assignment op. + + model: dictionary containing the old weights + + placeholders: placeholders to feed to + + name_map: mapping for the old name to the new variable name + + """ + + feed_dict = {} + + # For each variables we want to assign + for _old in name_map: + + # Get the new name + _new = name_map[_old] + + # For weights + cur_weights = model[_old][_old + ".W"] + # If it is 4D+, shrink dimension, as the last dimension was there for + # legacy dev purposes + cur_weights = cur_weights.reshape(cur_weights.shape[:4]) + # Convert theano weights to tensorflow + if len(cur_weights.shape) == 4: + cur_weights = cur_weights.transpose((2, 3, 1, 0)) + + # For biases + cur_biases = model[_old][_old + ".b"] + + # Add to feed_dict after + feed_dict[placeholders[_new + "/weights"]] = cur_weights + feed_dict[placeholders[_new + "/biases"]] = cur_biases + + return feed_dict + + +def load_legacy_network(supervisor, subtask, load_dir): + """Load function for our old framework""" + + # Lazy loading to prevent import issues + from utils import loadh5 + + print("[{}] Checking if old pre-trained weights exists in {}" + "".format(subtask, load_dir)) + model_file = os.path.join(load_dir, "model.h5") + norm_file = os.path.join(load_dir, "norm.h5") + base_file = os.path.join(load_dir, "base.h5") + + if os.path.exists(model_file) and os.path.exists(norm_file) and \ + os.path.exists(base_file): + model = loadh5(model_file) + norm = loadh5(norm_file) + base = loadh5(base_file) + # Load the input normalization parameters + supervisor.network.mean["kp"] = float(norm["mean_x"]) + supervisor.network.mean["ori"] = float(norm["mean_x"]) + supervisor.network.mean["desc"] = float(base["patch-mean"]) + supervisor.network.std["kp"] = float(norm["std_x"]) + supervisor.network.std["ori"] = float(norm["std_x"]) + supervisor.network.std["desc"] = float(base["patch-std"]) + # Load weights for the component + supervisor.network.legacy_load_func[subtask](supervisor.sess, model) + print("[{}] Loaded previously trained weights".format(subtask)) + + return True + + else: + print("[{}] No pretrained weights from the old framework" + "".format(subtask)) + + return False + + +# +# legacy.py ends here diff --git a/utils/network.py b/utils/network.py new file mode 100644 index 0000000..c6e528d --- /dev/null +++ b/utils/network.py @@ -0,0 +1,64 @@ +# network.py --- +# +# Filename: network.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 15:38:49 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import tensorflow as tf + + +def make_theta(xyz, cs=None, scale=None, rr=0.5): + """Make the theta to be used for the spatial transformer + + If cs is None, simply just do the translation only. + + """ + + # get dx, dy, dz + dx = xyz[:, 0] + dy = xyz[:, 1] + dz = xyz[:, 1] + # compute the resize from the largest scale image + reduce_ratio = rr + dr = (reduce_ratio) * (2.0)**dz + + if cs is None: + c = tf.ones_like(dx) + s = tf.zeros_like(dx) + else: + c = cs[:, 0] + s = cs[:, 1] + + theta = tf.stack( + [dr * c, -dr * s, dx, + dr * s, dr * c, dy], axis=1) + + return theta + + +# +# network.py ends here diff --git a/utils/nms.py b/utils/nms.py new file mode 100644 index 0000000..2779f83 --- /dev/null +++ b/utils/nms.py @@ -0,0 +1,375 @@ +# nms.py --- +# +# Filename: nms.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 16:26:38 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import numpy as np +import scipy +from scipy.linalg import lu_factor, lu_solve + +from six.moves import xrange + + +def get_XYZS_from_res_list(res_list, resize_to_test, scales_to_test, nearby=1, + edge_th=0, scl_intv=2, nms_intv=1, + do_interpolation=False, fScaleEdgeness=0.0): + # NMS + nms_res = nonMaxSuppression(res_list, nearby=nearby, + scl_intv=scl_intv, nms_intv=nms_intv) + + XYZS = get_subpixel_XYZS(res_list, nms_res, resize_to_test, + scales_to_test, edge_th, do_interpolation, + fScaleEdgeness) + # sort by score + XYZS = XYZS[np.argsort(XYZS[:, 3])[::-1]] + + return XYZS + + +def get_subpixel_XYZS(score_list, nms_list, resize_to_test, + scales_to_test, edge_th, do_interpolation, + fScaleEdgeness): + + log_scales = np.log2(scales_to_test) + log_scale_step = ((np.max(log_scales) - np.min(log_scales)) / + (len(scales_to_test) - 1.0)) + + X = [()] * len(nms_list) + Y = [()] * len(nms_list) + Z = [()] * len(nms_list) + S = [()] * len(nms_list) + for idxScale in xrange(len(nms_list)): + nms = nms_list[idxScale] + + pts = np.where(nms) + if len(pts[0]) == 0: + continue + + assert idxScale > 0 and idxScale < len(nms_list) - 1 + + # compute ratio for coordinate conversion + fRatioUp = ( + (np.asarray(score_list[idxScale + 1].shape, dtype='float') - 1.0) / + (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0) + ).reshape([2, -1]) + fRatioDown = ( + (np.asarray(score_list[idxScale - 1].shape, dtype='float') - 1.0) / + (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0) + ).reshape([2, -1]) + + # the conversion function + def at(dx, dy, ds): + if not isinstance(dx, np.ndarray): + dx = np.ones(len(pts[0]),) * dx + if not isinstance(dy, np.ndarray): + dy = np.ones(len(pts[0]),) * dy + if not isinstance(ds, np.ndarray): + ds = np.ones(len(pts[0]),) * ds + new_pts = (pts[0] + dy, pts[1] + dx) + ds = np.round(ds).astype(int) + fRatio = ((ds == 0).reshape([1, -1]) * 1.0 + + (ds == -1).reshape([1, -1]) * fRatioDown + + (ds == 1).reshape([1, -1]) * fRatioUp) + assert np.max(ds) <= 1 and np.min(ds) >= -1 + new_pts = tuple([np.round(v * r).astype(int) + for v, r in zip(new_pts, fRatio)]) + scores_to_return = np.asarray([ + score_list[idxScale + _ds][_y, _x] + for _ds, _x, _y in zip( + ds, new_pts[1], new_pts[0] + ) + ]) + return scores_to_return + + # compute the gradient + Dx = 0.5 * (at(+1, 0, 0) - at(-1, 0, 0)) + Dy = 0.5 * (at(0, +1, 0) - at(0, -1, 0)) + Ds = 0.5 * (at(0, 0, +1) - at(0, 0, -1)) + + # compute the Hessian + Dxx = (at(+1, 0, 0) + at(-1, 0, 0) - 2.0 * at(0, 0, 0)) + Dyy = (at(0, +1, 0) + at(0, -1, 0) - 2.0 * at(0, 0, 0)) + Dss = (at(0, 0, +1) + at(0, 0, -1) - 2.0 * at(0, 0, 0)) + + Dxy = 0.25 * (at(+1, +1, 0) + at(-1, -1, 0) - + at(-1, +1, 0) - at(+1, -1, 0)) + Dxs = 0.25 * (at(+1, 0, +1) + at(-1, 0, -1) - + at(-1, 0, +1) - at(+1, 0, -1)) + Dys = 0.25 * (at(0, +1, +1) + at(0, -1, -1) - + at(0, -1, +1) - at(0, +1, -1)) + + # filter out all keypoints which we have inf + is_good = (np.isfinite(Dx) * np.isfinite(Dy) * np.isfinite(Ds) * + np.isfinite(Dxx) * np.isfinite(Dyy) * np.isfinite(Dss) * + np.isfinite(Dxy) * np.isfinite(Dxs) * np.isfinite(Dys)) + Dx = Dx[is_good] + Dy = Dy[is_good] + Ds = Ds[is_good] + Dxx = Dxx[is_good] + Dyy = Dyy[is_good] + Dss = Dss[is_good] + Dxy = Dxy[is_good] + Dxs = Dxs[is_good] + Dys = Dys[is_good] + pts = tuple([v[is_good] for v in pts]) + # check if empty + if len(pts[0]) == 0: + continue + + # filter out all keypoints which are on edges + if edge_th > 0: + + # # re-compute the Hessian + # Dxx = (at(b[:, 0] + 1, b[:, 1], b[:, 2]) + + # at(b[:, 0] - 1, b[:, 1], b[:, 2]) - + # 2.0 * at(b[:, 0], b[:, 1], b[:, 2])) + # Dyy = (at(b[:, 0], b[:, 1] + 1, b[:, 2]) + + # at(b[:, 0], b[:, 1] - 1, b[:, 2]) - + # 2.0 * at(b[:, 0], b[:, 1], b[:, 2])) + + # Dxy = 0.25 * (at(b[:, 0] + 1, b[:, 1] + 1, b[:, 2]) + + # at(b[:, 0] - 1, b[:, 1] - 1, b[:, 2]) - + # at(b[:, 0] - 1, b[:, 1] + 1, b[:, 2]) - + # at(b[:, 0] + 1, b[:, 1] - 1, b[:, 2])) + + # H = np.asarray([[Dxx, Dxy, Dxs], + # [Dxy, Dyy, Dys], + # [Dxs, Dys, Dss]]).transpose([2, 0, 1]) + + edge_score = (Dxx + Dyy) * (Dxx + Dyy) / (Dxx * Dyy - Dxy * Dxy) + is_good = ((edge_score >= 0) * + (edge_score < (edge_th + 1.0)**2 / edge_th)) + + if fScaleEdgeness > 0: + is_good = is_good * ( + abs(Dss) > fScaleEdgeness + ) + + Dx = Dx[is_good] + Dy = Dy[is_good] + Ds = Ds[is_good] + Dxx = Dxx[is_good] + Dyy = Dyy[is_good] + Dss = Dss[is_good] + Dxy = Dxy[is_good] + Dxs = Dxs[is_good] + Dys = Dys[is_good] + pts = tuple([v[is_good] for v in pts]) + # check if empty + if len(pts[0]) == 0: + continue + + b = np.zeros((len(pts[0]), 3)) + if do_interpolation: + # from VLFEAT + + # solve linear system + A = np.asarray([[Dxx, Dxy, Dxs], + [Dxy, Dyy, Dys], + [Dxs, Dys, Dss]]).transpose([2, 0, 1]) + + b = np.asarray([-Dx, -Dy, -Ds]).transpose([1, 0]) + + b_solved = np.zeros_like(b) + for idxPt in xrange(len(A)): + b_solved[idxPt] = lu_solve(lu_factor(A[idxPt]), b[idxPt]) + + b = b_solved + + # throw away the ones with bad subpixel localizatino + is_good = ((abs(b[:, 0]) < 1.5) * (abs(b[:, 1]) < 1.5) * + (abs(b[:, 2]) < 1.5)) + b = b[is_good] + pts = tuple([v[is_good] for v in pts]) + # check if empty + if len(pts[0]) == 0: + continue + + x = pts[1] + b[:, 0] + y = pts[0] + b[:, 1] + log_ds = b[:, 2] + + S[idxScale] = at(b[:, 0], b[:, 1], b[:, 2]) + X[idxScale] = x / resize_to_test[idxScale] + Y[idxScale] = y / resize_to_test[idxScale] + Z[idxScale] = scales_to_test[idxScale] * 2.0**(log_ds * log_scale_step) + + X = np.concatenate(X) + Y = np.concatenate(Y) + Z = np.concatenate(Z) + S = np.concatenate(S) + + XYZS = np.concatenate([X.reshape([-1, 1]), + Y.reshape([-1, 1]), + Z.reshape([-1, 1]), + S.reshape([-1, 1])], + axis=1) + + return XYZS + + +def nonMaxSuppression(score_img_or_list, nearby=1, scl_intv=2, nms_intv=1): + """ Performs Non Maximum Suppression. + + Parameters + ---------- + score_img_or_list: nparray or list + WRITEME + + nearby: int + Size of the neighborhood. + + scl_intv: int + How many levels we have between half scale. + + nms_intv: int + How many levels we use for scale space nms. + + """ + + filter_size = (nearby * 2 + 1,) * 2 + + if isinstance(score_img_or_list, list): + bis2d = False + else: + bis2d = True + + if bis2d: + score = score_img_or_list + # max score in region + max_score = scipy.ndimage.filters.maximum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + # second score in region + second_score = scipy.ndimage.filters.rank_filter( + score, -2, filter_size, mode='constant', cval=-np.inf + ) + # min score in region to check infs + min_score = scipy.ndimage.filters.minimum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + nonmax_mask_or_list = ((score == max_score) * + (max_score > second_score) * + np.isfinite(min_score)) + + else: + + max2d_list = [ + scipy.ndimage.filters.maximum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + for score in score_img_or_list + ] + + second2d_list = [ + scipy.ndimage.filters.rank_filter( + score, -2, filter_size, mode='constant', cval=-np.inf + ) + for score in score_img_or_list + ] + + min2d_list = [ + scipy.ndimage.filters.minimum_filter( + score, filter_size, mode='constant', cval=-np.inf + ) + for score in score_img_or_list + ] + + nonmax2d_list = [(score == max_score) * (max_score > second_score) * + np.isfinite(min_score) + for score, max_score, second_score, min_score in + zip(score_img_or_list, + max2d_list, + second2d_list, + min2d_list) + ] + + nonmax_mask_or_list = [None] * len(nonmax2d_list) + for idxScale in xrange(len(nonmax2d_list)): + + nonmax2d = nonmax2d_list[idxScale] + max2d = max2d_list[idxScale] + + # prep output + nonmax_mask = np.zeros_like(nonmax2d) + + # get coordinates of the local max positions of nonmax2d + coord2d_max = np.where(nonmax2d) + # print("2d nonmax = {} kp".format(len(coord2d_max[0]))) + + # range of other scales to look at + # scl_diffs = np.arange(-scl_intv, scl_intv + 1) + # scl_diffs = np.arange(-1, 1 + 1) + scl_diffs = np.arange(-nms_intv, nms_intv + 1) + scl_diffs = scl_diffs[scl_diffs != 0] + + # skip if we don't have the complete set + if (idxScale + np.min(scl_diffs) < 0 or + idxScale + np.max(scl_diffs) > len(nonmax2d_list) - 1): + continue + + # Test on multiple scales to see if it is scalespace local max + for scl_diff in scl_diffs: + + scl_to_compare = idxScale + scl_diff + + # look at the other scales max + max2d_other = max2d_list[scl_to_compare] + # compute ratio for coordinate conversion + fRatio \ + = (np.asarray(max2d_other.shape, dtype='float') - 1.0) \ + / (np.asarray(nonmax2d.shape, dtype='float') - 1.0) + # get indices for lower layer + coord_other = tuple([np.round(v * r).astype(int) + for v, r in zip(coord2d_max, fRatio)]) + # find good points which should survive + idxGood = np.where((max2d[coord2d_max] > + max2d_other[coord_other]) * + np.isfinite(max2d_other[coord_other]) + ) + + # copy only the ones that are good + coord2d_max = tuple([v[idxGood] for v in coord2d_max]) + + # mark surviving points + nonmax_mask[coord2d_max] = 1.0 + + # special case when we are asked with single item in list + if len(nonmax2d_list) == 1: + # get coordinates of the local max positions of nonmax2d + coord2d_max = np.where(nonmax2d) + # mark surviving points + nonmax_mask[coord2d_max] = 1.0 + + # add to list + nonmax_mask_or_list[idxScale] = nonmax_mask + + return nonmax_mask_or_list + +# +# nms.py ends here diff --git a/utils/test.py b/utils/test.py new file mode 100644 index 0000000..168067e --- /dev/null +++ b/utils/test.py @@ -0,0 +1,386 @@ +# test.py --- +# +# Filename: test.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 16:24:22 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import cv2 +import numpy as np +import tensorflow as tf + +from six.moves import xrange + + +def compute_auc(x, y): + """Compute AUCs given curve (x, y) + + Parameters + ---------- + x : np.ndarray, float, 1d + The values in x axis + + y : np.ndarray, float, 1d + The values in y axis + """ + # AUC for precision-recall + dx = x[1:] - x[:-1] # bin width + y_avg = (y[1:] + y[:-1]) / 2.0 # avg y in bin + # Below expression but without loops + id_dx_valid = np.cast[float](dx > 0) + auc = np.sum(y_avg * dx * id_dx_valid) + # auc = 0 + # for _dx, _y_avg in zip(dx, y_avg): + # if _dx > 0: + # auc += _dx * _y_avg + + return auc + + +def eval_descs(d1, d2, d3, neg_weight): + + # Compute descriptor distances + pair_dists = np.sqrt(np.sum((d1 - d2)**2, axis=1)).flatten() + nonpair_dists = np.sqrt(np.sum((d1 - d3)**2, axis=1)).flatten() + + # Make dists and labels + dists = np.concatenate([pair_dists, nonpair_dists], axis=0) + labels = np.concatenate([ + np.ones_like(pair_dists), np.zeros_like(nonpair_dists)], axis=0) + + # create curves by hand + num_th = 1000 + + # LATER: Check if neg_per_pos is working + + # ------------------------------------------------- + # Do some smart trick to only sort and compare once + # ------------------------------------------------- + # sort labels according to data indices + sorted_labels = labels[np.argsort(dists)] + # get thresholds using histogram function + num_in_bin, thr = np.histogram(dists, num_th - 1) + num_in_bin = np.concatenate([np.zeros((1,)), num_in_bin]) + # alocate + prec = np.zeros((num_th,)) + rec = np.zeros((num_th,)) + tnr = np.zeros((num_th,)) + # for each bin + idx_s = 0 + retrieved_pos = 0 + retrieved_neg = 0 + total_pos = np.sum(sorted_labels == 1).astype(float) + total_neg = np.sum(sorted_labels == 0).astype(float) * neg_weight + for i in xrange(num_th): + # The number of elements we need to look at + idx_e = int(idx_s + num_in_bin[i]) + # look at the labels to see how many are right/wrong and accumulate + retrieved_pos += np.sum(sorted_labels[idx_s:idx_e] == 1).astype(float) + retrieved_neg += np.sum( + sorted_labels[idx_s:idx_e] == 0).astype(float) * neg_weight + # move start point + idx_s = idx_e + + retrieved = retrieved_pos + retrieved_neg + if retrieved > 0: + prec[i] = retrieved_pos / retrieved + else: + # precision is one, is this correct? + prec[i] = 1.0 + rec[i] = retrieved_pos / total_pos + tnr[i] = 1 - retrieved_neg / total_neg + + tpr = rec + + return compute_auc(rec, prec) + + +def eval_kps(xyzs, poss, scores, r_base): + """Evaluate keypoints based on their locations and scores. + + + Parameters + ---------- + + xyzs: the coordinates of the keypoints for P1, P2, P3, and P4 + + poss: the ground-truth location of the SfM points for each patch + + scores: scores for each patch + + neg_weight: Weight of the negative sample + + r_base: float + Base multiplier to apply to z to get rectangle support regions' half + width. Typical computation would be: + + >>> r_base = (float(config.desc_input_size) / + float(get_patch_size(config))) / 6.0 + + """ + + # Let's convert everything to numpy arrays just to make our lives easier + xyzs = np.asarray(xyzs) + poss = np.asarray(poss) + scores = np.asarray(scores) + + # Check for each keypoint if it is repeated. Note that we only compute the + # overlap for *corresponding* pairs. + # + # th = 0.4 + # is_repeated = (get_kp_overlap(xyzs[0], poss[0], + # xyzs[1], poss[1], r_base) >= 0.4) + kp_overlap = get_kp_overlap(xyzs[0], poss[0], xyzs[1], poss[1], + r_base, mode='rectangle').flatten() + + # set number of thresholds we want to test + num_th = 1000 + max_score = np.max(scores) + min_score = np.min(scores) + eps = 1e-10 + ths = np.linspace(max_score, min_score - 2 * eps, num_th) + eps + + # alocate + prec = np.zeros((num_th,)) + rec = np.zeros((num_th,)) + tnr = np.zeros((num_th,)) + # Number of all positive pairs + total_pos_pair = float(len(xyzs[0])) + # Number of all non-SfM keypoints + total_non_sfm_kp = float(len(xyzs[3])) + for i in xrange(len(ths)): + # set threshold + th = ths[i] + # find all that are selected as positives + retv = [None] * 4 + for idx_branch in xrange(4): + retv[idx_branch] = np.where(scores[idx_branch] >= th)[0] + # number of retrieved keypoins. No balancing is needed since we don't + # have repetitions any more + retrieved_kp = sum([float(len(retv[_i])) for _i in xrange(4)]) + # Number of retrieved SfM keypoints. Again, no balancing + retrieved_sfm_kp = sum([float(len(retv[_i])) for _i in xrange(3)]) + # number of retrieved non SfM keypoints + retrieved_non_sfm_kp = float(len(retv[3])) + # find positive pairs that are recovered (ones that are repeated) + # check if both of the pairs are retrieved + is_pair_retrieved = scores[1][retv[0]] >= th + # get cumulative overlap + retrieved_pos_pair = float( + np.sum(is_pair_retrieved * kp_overlap[retv[0]])) + + if retrieved_kp > 0: + prec[i] = retrieved_sfm_kp / retrieved_kp + else: + # precision is one, is this correct? + prec[i] = 1.0 + rec[i] = retrieved_pos_pair / total_pos_pair + tnr[i] = 1 - retrieved_non_sfm_kp / total_non_sfm_kp + + # if i == 500: + # import IPython + # IPython.embed() + + tpr = rec + + #, computeAUC(tnr[::-1], tpr[::-1]), prec, rec + return compute_auc(rec, prec) + + +def rebase_xyz(xyz, base): + """Recomputes 'xyz' with center being 'base'. + + Parameters + ---------- + xyz: theano.tensor.matrix + 'xyz' to be modified to have center at 'base'. + + base: theano.tensor.matrix + The new center position. + + Notes + ----- + This function is required as xy is affected by which z you are having. + """ + + # Move to the same z. Notice how we set the scaler here. For + # example, if base == 1 then it means that we are moving to a + # lower scale, so we need to multiply x and y by two. + new_z = xyz[:, 2] - base[:, 2] + scaler = 2.0**base[:, 2] + new_x = xyz[:, 0] * scaler + new_y = xyz[:, 1] * scaler + + # Move x and y. The new scaler here will be based on where x and y + # are after moving, i.e. new_z. For example, if new_z == 1, then + # we need to move x and y in half the value of the base x and y. + scaler = 2.0**(-new_z) + new_x -= base[:, 0] * scaler + new_y -= base[:, 1] * scaler + + if isinstance(new_x, np.ndarray): + new_xyz = np.concatenate([ + v.reshape([-1, 1]) for v in [new_x, new_y, new_z] + ], axis=1) + else: + new_xyz = tf.concat([ + tf.reshape(v, [-1, 1]) for v in [new_x, new_y, new_z] + ], axis=1) + + return new_xyz + + +def get_rect_inter(x1, y1, r1, x2, y2, r2): + """Computes Intersection of Rectangles + + Parameters + ---------- + x1: ndarray or tensor + x coordinates of the first rectangle. + + y1: ndarray or tensor + y coordinates of the first rectangle. + + r1: ndarray or tensor + Half width of the first rectangle (just to be consistent with + circle impl). + + x2: ndarray or tensor + x coordinates of the second rectangle. + + y2: ndarray or tensor + y coordinates of the second rectangle. + + r2: ndarray or tensor + Half width of the second rectangle (just to be consistent with + circle impl). + """ + + # use theano or numpy depending on the variable + if isinstance(x1, np.ndarray): + # Checking if ndarray will prevent us from loading theano + # unnecessarily. + cm = np + else: + # import tensorflow (Note that the code works for theano if you just + # uncomment below) + # import theano.tensor as T + # cm = T + import tensorflow as tf + cm = tf + + # intersection computation + inter_w = cm.minimum(x1 + r1, x2 + r2) - cm.maximum(x1 - r1, x2 - r2) + inter_w = cm.maximum(0.0, inter_w) + inter_h = cm.minimum(y1 + r1, y2 + r2) - cm.maximum(y1 - r1, y2 - r2) + inter_h = cm.maximum(0.0, inter_h) + + return inter_w * inter_h + + +def get_kp_overlap(xyz1, pos1, xyz2, pos2, r_base, mode='circle'): + """Determines if keypoint is repeated + + Parameters + ---------- + xyz1: ndarray, float, 3d + Coordinates and scale of the first keypoint. + + pos1: ndarray, float, 3d + Coordinates and scale of the ground truth position of the first + keypoint. + + xyz2: ndarray, float, 3d + Coordinates and scale of the second keypoint. + + pos1: ndarray, float, 3d + Coordinates and scale of the ground truth position of the second + keypoint. + + r_base: float + Base multiplier to apply to z to get rectangle support regions' half + width. Typical computation would be: + + >>> r_base = (float(myNet.config.nDescInputSize) / + float(myNet.config.nPatchSize)) / 6.0 + + We use the 1.0/6.0 of the support region to compute the overlap + + Note + ---- + Unlike the cost function, here we use the intersection of circles to be + maximum compatible with the other benchmarks. However, this can be easily + changed. Also, r_base is computed to use the original keypoints scale, + whereas the cost function uses the descriptor support region. + + """ + + # Rebase the two xyzs so that they can be compared. + xyz1 = rebase_xyz(xyz1, pos1) + xyz2 = rebase_xyz(xyz2, pos2) + + # Retrieve the keypoint support region based on r_base. + x1 = xyz1[:, 0] + y1 = xyz1[:, 1] + r1 = r_base * 2.0**xyz1[:, 2] + + x2 = xyz2[:, 0] + y2 = xyz2[:, 1] + r2 = r_base * 2.0**xyz2[:, 2] + + if mode == 'circle': + raise NotImplementedError( + "LATER: after the release, when somebody wants to do it") + # inter = get_circle_inter(x1, y1, r1, x2, y2, r2) + # union = r1**2.0 + r2**2.0 - inter + elif mode == 'rectangle': + inter = get_rect_inter(x1, y1, r1, x2, y2, r2) + union = (2.0 * r1)**2.0 + (2.0 * r2)**2.0 - inter + else: + raise ValueError('Unsupported mode ' + mode) + + return (inter / union).astype("float32").reshape([-1, 1]) + + +def draw_XYZS_to_img(XYZS, image_color, out_file_name): + """ Drawing functino for displaying """ + + # draw onto the original image + if cv2.__version__[0] == '3': + linetype = cv2.LINE_AA + else: + linetype = cv2.CV_AA + + [cv2.circle(image_color, tuple(np.round(pos).astype(int)), + np.round(rad * 6.0).astype(int), (0, 255, 0), 2, + lineType=linetype) + for pos, rad in zip(XYZS[:, :2], XYZS[:, 2])] + + cv2.imwrite(out_file_name, image_color) + + +# +# test.py ends here diff --git a/utils/tf.py b/utils/tf.py new file mode 100644 index 0000000..d52ddfe --- /dev/null +++ b/utils/tf.py @@ -0,0 +1,62 @@ +# tf.py --- +# +# Filename: tf.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 15:35:36 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import tensorflow as tf +import tensorflow.contrib.slim as slim + + +def show_all_variables(): + # Adapted from original code at + # https://github.com/carpedm20/simulated-unsupervised-tensorflow + model_vars = tf.trainable_variables() + slim.model_analyzer.analyze_vars(model_vars, print_info=True) + + +def image_summary_nhwc(name, img, max_outputs=1): + """Image summary function for NHWC format""" + + return tf.summary.image(name, img, max_outputs) + + +def image_summary_nchw(name, img, max_outputs=1): + """Image summary function for NCHW format""" + + return tf.summary.image( + name, tf.transpose(img, (0, 2, 3, 1)), max_outputs) + + +def get_tensor_shape(tensor): + + return [_s if _s is not None else -1 for + _s in tensor.get_shape().as_list()] + + +# +# tf.py ends here diff --git a/utils/train.py b/utils/train.py new file mode 100644 index 0000000..682f435 --- /dev/null +++ b/utils/train.py @@ -0,0 +1,90 @@ +# train.py --- +# +# Filename: train.py +# Description: +# Author: Kwang Moo Yi +# Maintainer: +# Created: Thu Jul 6 16:23:31 2017 (+0200) +# Version: +# Package-Requires: () +# URL: +# Doc URL: +# Keywords: +# Compatibility: +# +# + +# Commentary: +# +# +# +# + +# Change Log: +# +# +# +# Copyright (C), EPFL Computer Vision Lab. + +# Code: + +import numpy as np + + +def get_hard_batch(loss_list, data_list): + """Returns a batch with the hardest samples. + + Given a list of losses and data, merges them together to form a batch with + is the hardest samples. + + NOTE + ---- + + data item in the data_list is accessible by data_list[i]["patch"]["P1"][j] + + loss item in the loss_list is accessible by loss_list[i][j] + + """ + + # Concatenate all data + all_data = {} + for _type in data_list[0]: + all_data[_type] = {} + for _name in data_list[0][_type]: + # "aug_rot" is a dictionary with two keys + if isinstance(data_list[0][_type][_name], dict): + all_data[_type][_name] = {} + for _key in data_list[0][_type][_name]: + all_data[_type][_name][_key] = np.concatenate( + (lambda _t=_type, _n=_name, _k=_key, _d=data_list: [ + data[_t][_n][_k] for data in _d])(), axis=0) + else: + all_data[_type][_name] = np.concatenate( + (lambda _t=_type, _n=_name, _d=data_list: [ + data[_t][_n] for data in _d])(), axis=0) + + # Extract batch size + batch_size = len(loss_list[0]) + + # Concatenate all loss + all_loss = np.concatenate(loss_list, axis=0) + + # Sort by loss and get indices for the hardes ones + ind = np.argsort(all_loss)[::-1] + ind = ind[:batch_size] + + # Select the hardest examples + for _type in all_data: + for _name in all_data[_type]: + if isinstance(all_data[_type][_name], dict): + all_data[_type][_name] = { + _key: all_data[_type][_name][_key][ind] + for _key in all_data[_type][_name]} + else: + all_data[_type][_name] = all_data[_type][_name][ind] + + return all_data + + +# +# train.py ends here