From e9727d167fd8289715be06e956cc42f06eda9fa6 Mon Sep 17 00:00:00 2001
From: Eduard Trulls <etru1927@gmail.com>
Date: Mon, 16 Oct 2017 16:07:03 +0200
Subject: [PATCH] First release.

---
 .gitignore                        |   9 +
 collate.py                        |  34 ++
 config.py                         | 253 +++++++++
 datasets/__init__.py              |   0
 datasets/eccv2016/__init__.py     |   0
 datasets/eccv2016/custom_types.py | 617 ++++++++++++++++++++
 datasets/eccv2016/eccv.py         | 733 ++++++++++++++++++++++++
 datasets/eccv2016/helper.py       | 831 +++++++++++++++++++++++++++
 datasets/eccv2016/math_tools.py   | 359 ++++++++++++
 datasets/eccv2016/wrapper.py      | 127 +++++
 datasets/lift.py                  | 334 +++++++++++
 datasets/test.py                  | 150 +++++
 layers.py                         | 316 +++++++++++
 losses.py                         | 129 +++++
 main.py                           |  81 +++
 modules/__init__.py               |   0
 modules/bypass.py                 |  69 +++
 modules/lift_desc.py              | 127 +++++
 modules/lift_kp.py                | 181 ++++++
 modules/lift_ori.py               | 151 +++++
 modules/spatial_transformer.py    | 222 ++++++++
 networks/__init__.py              |   0
 networks/lift.py                  | 899 ++++++++++++++++++++++++++++++
 readme.md                         | 112 ++++
 test_desc.sh                      |   2 +
 test_kp.sh                        |   2 +
 test_kp_legacy.sh                 |   2 +
 test_ori.sh                       |   2 +
 test_orig_lift.sh                 |  81 +++
 tester.py                         | 405 ++++++++++++++
 trainer.py                        | 250 +++++++++
 utils/__init__.py                 |  45 ++
 utils/config.py                   |  68 +++
 utils/dump.py                     | 199 +++++++
 utils/kp_tools.py                 | 682 +++++++++++++++++++++++
 utils/layer.py                    |  85 +++
 utils/legacy.py                   | 183 ++++++
 utils/network.py                  |  64 +++
 utils/nms.py                      | 375 +++++++++++++
 utils/test.py                     | 386 +++++++++++++
 utils/tf.py                       |  62 +++
 utils/train.py                    |  90 +++
 42 files changed, 8717 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 collate.py
 create mode 100644 config.py
 create mode 100644 datasets/__init__.py
 create mode 100644 datasets/eccv2016/__init__.py
 create mode 100644 datasets/eccv2016/custom_types.py
 create mode 100644 datasets/eccv2016/eccv.py
 create mode 100644 datasets/eccv2016/helper.py
 create mode 100644 datasets/eccv2016/math_tools.py
 create mode 100644 datasets/eccv2016/wrapper.py
 create mode 100644 datasets/lift.py
 create mode 100644 datasets/test.py
 create mode 100644 layers.py
 create mode 100644 losses.py
 create mode 100644 main.py
 create mode 100644 modules/__init__.py
 create mode 100644 modules/bypass.py
 create mode 100644 modules/lift_desc.py
 create mode 100644 modules/lift_kp.py
 create mode 100644 modules/lift_ori.py
 create mode 100644 modules/spatial_transformer.py
 create mode 100644 networks/__init__.py
 create mode 100644 networks/lift.py
 create mode 100644 readme.md
 create mode 100755 test_desc.sh
 create mode 100755 test_kp.sh
 create mode 100755 test_kp_legacy.sh
 create mode 100755 test_ori.sh
 create mode 100755 test_orig_lift.sh
 create mode 100644 tester.py
 create mode 100644 trainer.py
 create mode 100644 utils/__init__.py
 create mode 100644 utils/config.py
 create mode 100644 utils/dump.py
 create mode 100644 utils/kp_tools.py
 create mode 100644 utils/layer.py
 create mode 100644 utils/legacy.py
 create mode 100644 utils/network.py
 create mode 100644 utils/nms.py
 create mode 100644 utils/test.py
 create mode 100644 utils/tf.py
 create mode 100644 utils/train.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0c30bd5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+.pyc
+logs
+*.pyc
+*__pycache__*
+*.dll
+*.tfrecords
+*.h5
+test/
+gpupy
diff --git a/collate.py b/collate.py
new file mode 100644
index 0000000..e416033
--- /dev/null
+++ b/collate.py
@@ -0,0 +1,34 @@
+from os import listdir
+from os.path import isfile, isdir, join
+from argparse import ArgumentParser
+import tensorflow as tf
+from utils.dump import loadh5
+
+
+def look_for_checkpoints(folder, task):
+    dirs = [f
+            for f in listdir(folder)
+            if isdir(join(folder, f))]
+
+    for d in dirs:
+        # print(join(folder, d))
+        if d == task or (d in ['kp', 'ori', 'desc', 'joint'] and task == 'all'):
+            cp = tf.train.latest_checkpoint(join(folder, d))
+            if cp is not None:
+                # Load best validation result
+                r = loadh5(join(folder, d, 'best_val_loss.h5'))[d]
+                s = loadh5(join(folder, d, 'step.h5'))[d]
+                print('{0:s} -> {1:.05f} [{2:d}]'.format(join(folder, d), r, s))
+        else:
+            look_for_checkpoints(join(folder, d), task)
+
+
+parser = ArgumentParser()
+parser.add_argument('--logdir', type=str, required=True)
+parser.add_argument('--task', type=str, default='all', help='kp, ori, desc, joint, all')
+params = parser.parse_args()
+
+print()
+print('*** Logdir: "{}" (task: {}) ***'.format(params.logdir, params.task))
+look_for_checkpoints(params.logdir, params.task)
+print()
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..ffe324b
--- /dev/null
+++ b/config.py
@@ -0,0 +1,253 @@
+# config.py ---
+#
+# Filename: config.py
+# Description: Configuration module.
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Wed Jun 28 13:08:23 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+# Adapted from original code at
+# https://github.com/carpedm20/simulated-unsupervised-tensorflow
+#
+#
+
+# Change Log:
+#
+#
+#
+
+# Code:
+
+
+import argparse
+import getpass
+import json
+import os
+from utils.config import get_patch_size
+
+
+def str2bool(v):
+    return v.lower() in ("true", "1")
+
+
+arg_lists = []
+parser = argparse.ArgumentParser()
+username = getpass.getuser()
+
+
+def add_argument_group(name):
+    arg = parser.add_argument_group(name)
+    arg_lists.append(arg)
+    return arg
+
+
+# ----------------------------------------
+# Network
+net_arg = add_argument_group("Network")
+net_arg.add_argument("--kp_input_size", type=int, default=48, help="")
+net_arg.add_argument("--kp_filter_size", type=int, default=25, help="")
+net_arg.add_argument("--kp_base_scale", type=float, default=2.0, help="")
+net_arg.add_argument("--kp_com_strength", type=float, default=10.0, help="")
+# net_arg.add_argument("--ori_input_size", type=int, default=28, help="")
+net_arg.add_argument("--ori_input_size", type=int, default=64, help="")
+net_arg.add_argument("--desc_input_size", type=int, default=64, help="")
+net_arg.add_argument("--desc_support_ratio", type=float, default=6.0, help="")
+
+# Module selection
+net_arg.add_argument("--module_kp", type=str, default="lift_kp", help="")
+net_arg.add_argument("--module_ori", type=str, default="lift_ori", help="")
+net_arg.add_argument("--module_desc", type=str, default="lift_desc", help="")
+
+# Batch-norm on-off
+net_arg.add_argument("--use_input_batch_norm", type=str2bool, default=False, help="")
+net_arg.add_argument("--use_batch_norm", type=str2bool, default=True, help="")
+
+# Data compatibility option
+net_arg.add_argument("--old_data_compat", type=str2bool, default=False, help="Use hard-mined, non-augmented set")
+
+# Orientation detector options
+net_arg.add_argument("--use_augmented_set", type=str2bool, default=False, help="Use/extract the dataset for augmented rotations")
+net_arg.add_argument("--augment_rotations", type=str2bool, default=False, help="")
+net_arg.add_argument("--use_dropout_ori", type=bool, default=False, help="")
+net_arg.add_argument("--ori_activation", type=str, default="ghh", choices=["ghh", "tanh"], help="")
+
+# Descriptor options
+net_arg.add_argument("--desc_activ", type=str, default="relu", help="Descriptor activation")
+net_arg.add_argument("--desc_pool", type=str, default="avg_pool", help="Descriptor pooling")
+net_arg.add_argument("--use_subtractive_norm", type=str2bool, default=False, help="Descriptor subtractive normalization")
+net_arg.add_argument("--use_triplet_loss", type=str2bool, default=True, help="Triplet loss")
+
+# Use old mean/std
+net_arg.add_argument("--use_old_mean_std", type=str2bool, default=False, help="")
+
+# ----------------------------------------
+# Loss Function
+loss_arg = add_argument_group("Loss")
+loss_arg.add_argument("--alpha_overlap", type=float, default=1e0, help="")
+loss_arg.add_argument("--alpha_classification", type=float, default=1e-8,
+                      help="")
+loss_arg.add_argument("--alpha_margin", type=float, default=4.0, help="")
+# for joint training
+loss_arg.add_argument("--alpha_kp", type=float, default=1e0, help="")
+loss_arg.add_argument("--alpha_desc", type=float, default=1e0, help="")
+loss_arg.add_argument("--kp_scoremap_softmax_strength",
+                      type=float, default=10.0, help="")
+
+# ----------------------------------------
+# Data
+data_arg = add_argument_group("Data")
+data_arg.add_argument("--nchannel", type=int, default=1, help="")
+data_arg.add_argument("--data_type", type=str, default="eccv2016", help="")
+data_arg.add_argument("--data_name", type=str, default="piccadilly", help="")
+data_arg.add_argument(
+    "--data_dir", type=str, help=(
+        "The directory containing the dataset. "
+        "Will look for {data_dir}/{data_name}"
+    ), default="/cvlabdata2/home/{}/Datasets/".format(username),
+)
+data_arg.add_argument(
+    "--temp_dir", type=str, help=(
+        "The temporary directory where data related cache will be stored."
+    ), default="/cvlabdata2/home/{}/Temp/".format(username),
+)
+data_arg.add_argument(
+    "--scratch_dir", type=str, help=(
+        "The temporary directory that will be used as cache."
+        "We have this since the large data is typically stored in a "
+        "network share"
+    ), default="/scratch/{}/Temp/".format(username),
+)
+data_arg.add_argument(
+    "--pair_dir", type=str, help=(
+        "Creating pairs are time consuming. "
+        "We store the pair in this directory. "
+        "This behavior might be removed in the future. "
+    ), default="./pairs",
+)
+data_arg.add_argument("--regen_pairs", type=str2bool, default=False, help="")
+
+# ----------------------------------------
+# Task
+task_arg = add_argument_group("Task")
+task_arg.add_argument("--task", type=str, default="train",
+                      choices=["train", "test"],
+                      help="")
+task_arg.add_argument("--subtask", type=str, default="desc",
+                      choices=["kp", "ori", "desc", "joint"],
+                      help="")
+task_arg.add_argument("--logdir", type=str, default="", help="")
+task_arg.add_argument("--finetune", type=str, default="kp", help="e.g. 'kp+ori+desc'")
+
+# ----------------------------------------
+# Training
+train_arg = add_argument_group("Train")
+train_arg.add_argument("--random_seed", type=int, default=1234, help="")
+train_arg.add_argument("--batch_size", type=int, default=128, help="")
+train_arg.add_argument("--pair_interval", type=int, default=1, help="")
+train_arg.add_argument("--pair_use_cache", type=str2bool,
+                       default=True, help="")
+train_arg.add_argument("--max_step", type=int, default=1e7, help="")
+train_arg.add_argument("--optimizer", type=str, default="adam",
+                       choices=["adam", "rmsprop", "sgd"],
+                       help="")
+train_arg.add_argument("--learning_rate", type=float, default=1e-3, help="")
+train_arg.add_argument("--max_grad_norm", type=float, default=-1.0, help="")
+train_arg.add_argument("--check_numerics", type=str2bool,
+                       default=True, help="")
+train_arg.add_argument("--tqdm_width", type=int, default=79, help="")
+train_arg.add_argument("--mining_sched", type=str, default="none",
+                       help="Scheduler: 'none', 'step', 'smooth'")
+train_arg.add_argument("--mining_base", type=int, default=1,
+                       help="Starting number of batches")
+train_arg.add_argument("--mining_step", type=int, default=0,
+                       help="Add one batch every these many (0 to disable)")
+train_arg.add_argument("--mining_ceil", type=int, default=0,
+                       help="Max number of batches (0 to disable)")
+
+# Pretrain information to force in if needed
+train_arg.add_argument("--pretrained_kp", type=str, default="", help="")
+train_arg.add_argument("--pretrained_ori", type=str, default="", help="")
+train_arg.add_argument("--pretrained_desc", type=str, default="", help="")
+train_arg.add_argument("--pretrained_joint", type=str, default="", help="")
+
+# ----------------------------------------
+# Validation
+valid_arg = add_argument_group("Validation")
+valid_arg.add_argument("--validation_interval", type=int, default=1e3, help="")
+valid_arg.add_argument("--validation_rounds", type=int, default=100, help="")
+valid_arg.add_argument("--neg_per_pos", type=float, default=100.0, help="")
+valid_arg.add_argument("--valid_method", type=str, default="desc", help="")
+
+# ----------------------------------------
+# Test
+test_arg = add_argument_group("Test")
+test_arg.add_argument("--test_img_file", type=str, default="", help="")
+test_arg.add_argument("--test_kp_file", type=str, default="", help="")
+test_arg.add_argument("--test_out_file", type=str, default="", help="")
+test_arg.add_argument("--test_num_keypoint", type=int, default=1000, help="")
+test_arg.add_argument("--test_scl_intv", type=int, default=4, help="")
+test_arg.add_argument("--test_min_scale_log2", type=int, default=1, help="")
+test_arg.add_argument("--test_max_scale_log2", type=int, default=4, help="")
+test_arg.add_argument("--test_kp_use_tensorflow",
+                      type=str2bool, default=True, help="")
+test_arg.add_argument("--test_nearby_ratio", type=float, default=1.0, help="")
+test_arg.add_argument("--test_nms_intv", type=int, default=2, help="")
+test_arg.add_argument("--test_edge_th", type=float, default=10.0, help="")
+
+train_arg = add_argument_group("Misc")
+loss_arg.add_argument("--usage", type=float, default=0.96, help="Force GPU memory usage")
+
+
+def get_config(argv):
+    config, unparsed = parser.parse_known_args()
+
+    # Sanity checks
+    if config.augment_rotations and not config.use_augmented_set:
+        config.use_augmented_set = True
+        print('-- Forcing use_augmented_set = True')
+
+    if config.augment_rotations and config.subtask is "desc":
+        raise RuntimeError("Rotation augmentation is incompatible "
+                           "with descriptor training.")
+
+    if config.old_data_compat and (
+            config.use_augmented_set or config.augment_rotations):
+        raise RuntimeError("Options incompatible with legacy data generation.")
+
+    if config.subtask == 'joint':
+        what = config.finetune.split('+')
+        if ("kp" not in what) and ("ori" not in what) and \
+                ("desc" not in what):
+            raise RuntimeError("Nothing to finetune? Check --finetune")
+
+
+    # Create the prefix automatically from run command
+    if config.logdir == "":
+        config.logdir = "-".join(argv)
+        config.logdir = os.path.join("logs",
+                                     config.logdir.replace("main.py", ""))
+
+    return config, unparsed
+
+
+def save_config(model_dir, config):
+    param_path = os.path.join(model_dir, config.subtask, "params.json")
+
+    print("[*] MODEL dir: %s" % model_dir)
+    print("[*] PARAM path: %s" % param_path)
+
+    with open(param_path, "w") as fp:
+        json.dump(config.__dict__, fp, indent=4, sort_keys=True)
+
+#
+# config.py ends here
diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/datasets/eccv2016/__init__.py b/datasets/eccv2016/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/datasets/eccv2016/custom_types.py b/datasets/eccv2016/custom_types.py
new file mode 100644
index 0000000..c80d3a0
--- /dev/null
+++ b/datasets/eccv2016/custom_types.py
@@ -0,0 +1,617 @@
+# custom_types.py ---
+#
+# Filename: custom_types.py
+# Description: Python Module for custom types
+# Author: Kwang
+# Maintainer:
+# Created: Fri Jan 16 12:01:52 2015 (+0100)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+#
+#
+
+# Code:
+
+# -------------------------------------------
+# Imports
+from __future__ import print_function
+
+import hashlib
+import os
+import shutil
+import string
+from copy import deepcopy
+from datetime import timedelta
+from inspect import currentframe, getframeinfo
+
+import numpy as np
+from flufl.lock import Lock
+from parse import parse  # ??? is parse a standard package ???
+
+
+# -------------------------------------------
+# Path structure
+class pathConfig:
+    dataset = None
+    temp = None
+    result = None
+    debug = None
+
+    train_data = None
+    train_mask = None
+
+    # -------------------------------------------------------------------------
+    # Prefix stuff that will affect keypoint generation and pairing
+    def prefix_dataset(self, param, do_sort=True):
+        prefixstr = ""
+
+        group_list = ['dataset']
+
+        exclude_list = {}
+        exclude_list['dataset'] = ['trainSetList', 'validSetList']
+
+        for group_name in group_list:
+            key_list = deepcopy(
+                list(getattr(param, group_name).__dict__.keys()))
+            if do_sort:
+                key_list.sort()
+            for key_name in key_list:
+                if key_name not in exclude_list[group_name]:
+                    prefixstr += str(getattr(
+                        getattr(param, group_name),
+                        key_name))
+
+        prefixstr = hashlib.md5(prefixstr.encode()).hexdigest()
+        prefixstr += "/"
+
+        return prefixstr
+
+    # --------------------------------------------------------------------------
+    # Prefix stuff that will affect patch extraction
+    def prefix_patch(self, param, do_sort=True):
+        prefixstr = ""
+
+        group_list = ['patch']
+
+        exclude_list = {}
+        exclude_list['patch'] = []
+
+        for group_name in group_list:
+            key_list = deepcopy(
+                list(getattr(param, group_name).__dict__.keys()))
+            if do_sort:
+                key_list.sort()
+            for key_name in key_list:
+                if key_name not in exclude_list[group_name]:
+                    prefixstr += str(getattr(getattr(param,
+                                                     group_name), key_name))
+
+        prefixstr = hashlib.md5(prefixstr.encode()).hexdigest()
+        prefixstr += "/"
+
+        return prefixstr
+
+    # -------------------------------------------------------------------------
+    # Prefix stuff that will affect learning outcome
+    def prefix_learning(self, param, do_sort=True):
+        prefixstr = ""
+
+        group_list = ['model', 'learning']
+
+        for group_name in group_list:
+            key_list = deepcopy(
+                list(getattr(param, group_name).__dict__.keys()))
+            if do_sort:
+                key_list.sort()
+            for key_name in key_list:
+                prefixstr += str(getattr(getattr(param, group_name), key_name))
+
+        prefixstr = hashlib.md5(prefixstr.encode()).hexdigest()
+        prefixstr += "/"
+
+        return prefixstr
+
+    def setupTrain(self, param, dataset_in):
+
+        # Lock to prevent race condition
+        if not os.path.exists(".locks"):
+            os.makedirs(".locks")
+        lock_file = ".locks/setup.lock"  
+        if os.name == "posix":
+            
+            lock = Lock(lock_file)
+            lock.lifetime = timedelta(days=2)
+            frameinfo = getframeinfo(currentframe())
+            print(">> {}/{}: waiting to obtain lock <<".format(
+                    frameinfo.filename, frameinfo.lineno))
+            lock.lock()
+            print(">> obtained lock for posix system <<")
+        elif os.name == "nt":
+            import filelock
+            lock = filelock.FileLock(lock_file)
+            lock.acquire()
+            if lock.is_locked:
+                print(">> obtained lock for windows system <<")
+        else:
+            print("Unknown operating system, lock unavailable")
+                
+        
+
+        # ---------------------------------------------------------------------
+        # Base path
+        # for dataset
+        # self.dataset = os.getenv('PROJ_DATA_DIR', '')
+        # if self.dataset == '':
+        #     self.dataset = os.path.expanduser("~/Datasets")
+        # self.dataset += "/" + str(dataset_in)
+        self.dataset = os.path.join(
+            param.data_dir, str(dataset_in)
+        ).rstrip("/") + "/"
+        # for temp
+        # self.temp = os.getenv('PROJ_TEMP_DIR', '')
+        # if self.temp == '':
+        #     self.temp = os.path.expanduser("~/Temp")
+        # self.temp += "/" + str(dataset_in)
+        self.temp = os.path.join(
+            param.temp_dir, str(dataset_in)
+        ).rstrip("/") + "/"
+        # for volatile temp
+        # self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '')
+        # if self.volatile_temp == '':
+        #     self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp"
+        # self.volatile_temp += "/" + str(dataset_in)
+        self.volatile_temp = os.path.join(
+            param.scratch_dir, str(dataset_in)
+        ).rstrip("/") + "/"
+        # self.negdata = os.path.expanduser("~/Datasets/NegData/")  # LEGACY
+
+        # # create dump directory if it does not exist
+        # if not os.path.exists(self.temp):
+        #     os.makedirs(self.temp)
+
+        # ---------------------------------------------------------------------
+        # Path for data loading
+
+        # path for synthetic data generation
+        self.train_data = self.dataset + "train/" + self.prefix_dataset(param)
+        self.train_mask = (self.dataset + "train/" +
+                           self.prefix_dataset(param) + "masks/")
+        if not os.path.exists(self.train_data):
+            # Check if the un-sorted prefix exists
+            unsorted_hash_path = (self.dataset + "train/" +
+                                  self.prefix_dataset(param, do_sort=False))
+            if os.path.exists(unsorted_hash_path):
+                os.symlink(unsorted_hash_path.rstrip("/"),
+                           self.train_data.rstrip("/"))
+                # shutil.copytree(unsorted_hash_path, self.train_data)
+                # shutil.rmtree(unsorted_hash_path)
+
+        # dump folder for dataset selection (sampling and etc)
+        self.train_dump = self.temp + "train/" + self.prefix_dataset(param)
+        if not os.path.exists(self.train_dump):
+            # Check if the un-sorted prefix exists
+            unsorted_hash_path = (self.temp + "train/" +
+                                  self.prefix_dataset(param, do_sort=False))
+            if os.path.exists(unsorted_hash_path):
+                os.symlink(unsorted_hash_path.rstrip("/"),
+                           self.train_dump.rstrip("/"))
+                # shutil.copytree(unsorted_hash_path, self.train_dump)
+                # shutil.rmtree(unsorted_hash_path)
+
+        # dump folder for patch extraction (if necessary)
+        self.patch_dump = (self.temp + "train/" + self.prefix_dataset(param) +
+                           self.prefix_patch(param))
+        if not os.path.exists(self.patch_dump):
+            # Check if the un-sorted prefix exists
+            unsorted_hash_path = (self.temp + "train/" +
+                                  self.prefix_dataset(param, do_sort=False) +
+                                  self.prefix_patch(param, do_sort=False))
+            if os.path.exists(unsorted_hash_path):
+                os.symlink(unsorted_hash_path.rstrip("/"),
+                           self.patch_dump.rstrip("/"))
+                # shutil.copytree(unsorted_hash_path, self.patch_dump)
+                # shutil.rmtree(unsorted_hash_path)
+
+        # volatile dump folder for patch extraction (if necessary)
+        self.volatile_patch_dump = (self.volatile_temp + "train/" +
+                                    self.prefix_dataset(param) +
+                                    self.prefix_patch(param))
+        # if not os.path.exists(self.volatile_patch_dump):
+        #     # Check if the un-sorted prefix exists
+        #     unsorted_hash_path = (self.volatile_temp + "train/" +
+        #                           self.prefix_dataset(param, do_sort=False) +
+        #                           self.prefix_patch(param, do_sort=False))
+        #     os.symlink(unsorted_hash_path.rstrip("/"),
+        #                self.volatile_patch_dump.rstrip("/"))
+        #     # shutil.copytree(unsorted_hash_path, self.volatile_patch_dump)
+        #     # shutil.rmtree(unsorted_hash_path)
+
+        # debug info folder
+        self.debug = self.dataset + "debug/" + self.prefix_dataset(param)
+        if not os.path.exists(self.debug):
+            # Check if the un-sorted prefix exists
+            unsorted_hash_path = (self.dataset + "debug/" +
+                                  self.prefix_dataset(param, do_sort=False))
+            if os.path.exists(unsorted_hash_path):
+                shutil.copytree(unsorted_hash_path, self.debug)
+                shutil.rmtree(unsorted_hash_path)
+
+        # # ---------------------------------------------------------------------
+        # # Path for the model learning
+        # resdir = os.getenv('PROJ_RES_DIR', '')
+        # if resdir == '':
+        #     resdir = os.path.expanduser("~/Results")
+        # self.result = (resdir + "/" +
+        #                self.getResPrefix(param) +
+        #                self.prefix_dataset(param) +
+        #                self.prefix_patch(param) +
+        #                self.prefix_learning(param))
+        # if not os.path.exists(self.result):
+        #     # Check if the un-sorted prefix exists
+        #     unsorted_hash_path = (resdir + "/" +
+        #                           self.getResPrefix(param, do_sort=False) +
+        #                           self.prefix_dataset(param, do_sort=False) +
+        #                           self.prefix_patch(param, do_sort=False) +
+        #                           self.prefix_learning(param, do_sort=False))
+
+        #     if os.path.exists(unsorted_hash_path):
+        #         shutil.copytree(unsorted_hash_path, self.result)
+        #         shutil.rmtree(unsorted_hash_path)
+
+        # # create result directory if it does not exist
+        # if not os.path.exists(self.result):
+        #     os.makedirs(self.result)
+
+        if os.name == "posix":
+            lock.unlock()
+        elif os.name == "nt":
+            lock.release()
+        else:
+            pass
+
+    def getResPrefix(self, param, do_sort=True):
+        trainSetList = deepcopy(list(param.dataset.trainSetList))
+        if hasattr(param.dataset, "validSetList"):
+            trainSetList += deepcopy(list(param.dataset.validSetList))
+
+        if do_sort:
+            trainSetList.sort()
+        res_prefix = param.dataset.dataType + '/' + \
+            hashlib.md5(
+                "".join(trainSetList).encode()).hexdigest()
+
+        # this is probably deprecated
+        if 'prefixStr' in param.__dict__.keys():
+            print('I am not deprecated!!!')
+            res_prefix = param.prefixStr + "/" + res_prefix
+
+        return res_prefix + '/'
+
+
+# -------------------------------------------
+# Parameter structure
+class paramGroup:
+
+    def __init__(self):
+        # do nothing
+        return
+
+
+class paramStruct:
+
+    def __init__(self):
+
+        # ---------------------------------------------------------------------
+        # NOTICE: everything set to None is to make it crash without config
+
+        # ---------------------------------------------------------------------
+        # Legacy
+        self.legacy = paramGroup()
+        # WHETHER TO GET A NEW CANNONICAL ANGLE WHETHER WE LEARN WITH SIFT
+        # PRE_WARPING
+        self.legacy.bRefineAngle = False
+        self.legacy.bLearn4Refienment = False
+        self.legacy.bUseCached = True           # Boolean for using dumped data
+        # THe throw away threshold in degrees (not used if negative)
+        self.legacy.fThrowAwayTh = -1
+        self.legacy.fConsiderRatio = 3.0
+        # If we want to learn the std to filter out bad keypoints
+        self.legacy.bLearnStd = False
+
+        self.legacy.numBin = 72
+        # to mark parts to ignore when applying the shape constraint
+        self.legacy.fRBFKernelTh = 0.01
+        # width of the kernel in bins (kernelWidth == 0.368, 2*kernelWidth ==
+        # 0.018 in shape)
+        self.legacy.kernelWidth = 2.0
+        # CNN_LEARNING_RATE = 0.01
+        # GHH_LEARNING_RATE = 0.003
+        self.legacy.hyperparamClassif = 1.0
+        self.legacy.hyperparamShape = 0.01 * \
+            (1.0 / (self.legacy.kernelWidth * 2 * 2 + 1))
+        self.legacy.hyperparamLaplacian = 0.001
+
+        # ---------------------------------------------------------------------
+        # Parameters for SIFT descriptor
+        # nNumOrientation = SIFT_ORI_HIST_BINS        # Number of orientations
+        # to be used (might be deprecated!)
+        self.keypoint = paramGroup()
+        self.keypoint.sKpType = "SIFT"          # Descriptor type
+        self.keypoint.sDescType = "SIFT"          # Descriptor type
+        self.keypoint.desc_num_orientation = 72
+        self.keypoint.fOverlapThresh = 40.0
+
+        self.keypoint.fMinKpSize = 0.0       # min allowed size of a kp
+        # Consider keypoints within these pixels as duplicates
+        self.keypoint.fDupRange = 5.0
+        self.keypoint.bNewCleanMethod = False     # for backward compatibility
+        # maximum number of keypoints (-1 for unlimited)
+        self.keypoint.dMaxKeypointNum = -1
+
+        # ---------------------------------------------------------------------
+        # # Paramters for patch extraction
+        self.patch = paramGroup()
+        # self.patch.nPatchSize = None       # Width and Height of the patch
+        # self.patch.sDataType = None        # Image Data type for Patches
+        # self.patch.bNormalizePatch = True  # NOrmalize single patch?
+        # self.patch.bLogPolarPatch = False  # Use log-polar for patches?
+        # self.patch.bPolarPatch = False     # Use log-polar for patches?
+        # self.patch.bApplyWeights = None   # Whether to apply weights to input
+        # # fRatioScale will be multiplied to the SIFT scale (negative uses
+        # # fixed scale)
+        # self.patch.fRatioScale = None
+        # # use this size if smaller than this size for each kp
+        # self.patch.fMinSize = None
+        # self.patch.bPyramidLearn = False       # learning using pyramid patch
+        # extraction
+
+        # ---------------------------------------------------------------------
+        # Parameters for synthetic images
+        self.synth = paramGroup()
+        # np.pi/4.0 # From -pi/4 to pi/4
+        self.synth.dPerturbType = 0
+        self.synth.nbAngleInPlane = 5
+        self.synth.nbAngleOutPlane = 5
+        self.synth.aInPlaneRot \
+            = 1.0 / (self.synth.nbAngleInPlane - 1) \
+            * np.arange(self.synth.nbAngleInPlane, dtype=np.float) \
+            * np.pi - np.pi / 2.0  # From -pi/2 to pi/2
+        self.synth.aOutPlaneRot \
+            = 1.0 / (self.synth.nbAngleOutPlane - 1) \
+            * np.arange(self.synth.nbAngleOutPlane, dtype=np.float) \
+            * np.pi / 2.0 - np.pi / 4.0  # From -pi/4 to pi/4
+        self.synth.aScaleChange = [0.5, 0.75, 1.0, 1.5, 2.0]
+
+        self.synth.bFrontalLearn = False  # learning with frontal pair only
+        self.synth.sFrontalName = 'img1.png'
+
+        # print 'In plane rotation are: '
+        # for a in aInPlaneRot:
+        #     print a, ' '
+        # print '\n'
+        # print 'Out of plane rotation are: '
+        # for a in aOutPlaneRot:
+        #     print a, ' '
+        # print '\n'
+
+        # ---------------------------------------------------------------------
+        # Parameters for testing
+        self.PCA = paramGroup()
+        self.PCA.PCAdim = None
+
+        # ---------------------------------------------------------------------
+        # Model parameters
+        self.model = paramGroup()
+        # Which type am I running in terms of python executable
+        self.model.modelType = None
+        # self.model.num_siamese = None          # number of siamese clones
+        # self.model.fRatio = None               # the ratio of the WNN
+        self.model.bNormalizeInput = None      # whether to normalize the input
+
+        # ---------------------------------------------------------------------
+        # Optimization parameters
+        self.learning = paramGroup()
+        # name of the solver (e.g. adam, adadelta)
+        self.learning.optimizer = None
+        self.learning.n_epochs = None      # maximum number of epochs to run
+        # self.learning.min_epoch = 5        # minimum number of epochs to run
+        self.learning.batch_size = None    # batch size for SGD
+        # self.learning.alpha_L2 = None      # L2 regularisor weight
+        # # SGD with momentum
+        # self.learning.momentum = None             # momentum component
+        # self.learning.learning_rate = None        # learning rate
+        # # Adadelta
+        # self.learning.decay = None                # ADADELTA param
+        # self.learning.epsilon = None              # ADADELTA param
+        # ADAM
+        self.learning.lr = None
+        self.learning.beta1 = None
+        self.learning.beta2 = None
+        self.learning.epsilon = None
+        # Epoch LR decay param
+        # learning rate is halved every this epoch
+        self.learning.lr_half_interval = 2.0
+        # reduction of learning rate after pretrain
+        self.learning.pretrain_reduction = 1e-2
+
+        # ---------------------------------------------------------------------
+        # Dataset parameters
+        self.dataset = paramGroup()
+        self.dataset.dataType = "ECCV"
+
+        # ---------------------------------------------------------------------
+        # Validation parameters (Not used in prefix generation)
+        self.validation = paramGroup()
+
+    def loadParam(self, file_name, verbose=True):
+
+        config_file = open(file_name, 'rb')
+        if verbose:
+            print("Parameters")
+
+        # ------------------------------------------
+        # Read the configuration file line by line
+        while True:
+            line2parse = config_file.readline().decode("utf-8")
+            if verbose:
+                print(line2parse, end='')
+
+            # Quit parsing if we reach the end
+            if not line2parse:
+                break
+
+            # Parse
+            parse_res = parse(
+                '{parse_type}: {group}.{field_name} = {read_value};{trash}',
+                line2parse)
+
+            # Skip if it is something we cannot parse
+            if parse_res is not None:
+                if parse_res['parse_type'] == 'ss':
+                    setattr(getattr(self, parse_res['group']), parse_res[
+                            'field_name'], parse_res['read_value'].split(','))
+                elif parse_res['parse_type'] == 's':
+                    setattr(getattr(self, parse_res['group']), parse_res[
+                            'field_name'], parse_res['read_value'])
+                elif parse_res['parse_type'] == 'd':
+                    eval_res = eval(parse_res['read_value'])
+                    if isinstance(eval_res, np.ndarray):
+                        setattr(getattr(self, parse_res['group']), parse_res[
+                                'field_name'], eval_res.astype(int))
+                    elif isinstance(eval_res, list):
+                        setattr(getattr(self, parse_res['group']), parse_res[
+                                'field_name'], [int(v) for v in eval_res])
+                    else:
+                        setattr(getattr(self, parse_res['group']), parse_res[
+                                'field_name'], int(eval_res))
+                elif parse_res['parse_type'] == 'f':
+                    eval_res = eval(parse_res['read_value'])
+                    if isinstance(eval_res, np.ndarray):
+                        setattr(getattr(self, parse_res['group']), parse_res[
+                                'field_name'], eval_res.astype(float))
+                    elif isinstance(eval_res, list):
+                        setattr(getattr(self, parse_res['group']), parse_res[
+                                'field_name'], [float(v) for v in eval_res])
+                    else:
+                        setattr(getattr(self, parse_res['group']), parse_res[
+                                'field_name'], float(eval_res))
+                elif parse_res['parse_type'] == 'sf':
+                    setattr(getattr(self, parse_res['group']),
+                            parse_res['field_name'],
+                            [float(eval(s)) for s in
+                             parse_res['read_value'].split(',')])
+                elif parse_res['parse_type'] == 'b':
+                    setattr(getattr(self, parse_res['group']), parse_res[
+                            'field_name'], bool(int(parse_res['read_value'])))
+                else:
+                    if verbose:
+                        print('  L-> skipped')
+
+        # Check if we want the small range for perturbations and update it
+        if self.synth.dPerturbType == 1:  # using smaller synthetics
+            self.synth.nbAngleInPlane = 5
+            self.synth.nbAngleOutPlane = 1
+            self.synth.aInPlaneRot \
+                = 1.0 / (self.synth.nbAngleInPlane - 1) \
+                * np.arange(self.synth.nbAngleInPlane, dtype=np.float) \
+                * np.pi / 2.0 - np.pi / 4.0  # From -pi/4 to pi/4
+            self.synth.aOutPlaneRot = [0.0]
+            self.synth.aScaleChange = [0.75, 1.0, 1.0 / 0.75]
+        elif self.synth.dPerturbType == 2:  # using only the actual
+            self.synth.nbAngleInPlane = 1
+            self.synth.nbAngleOutPlane = 1
+            self.synth.aInPlaneRot = [0.0]
+            self.synth.aOutPlaneRot = [0.0]
+            self.synth.aScaleChange = [1.0]
+        elif self.synth.dPerturbType == 3:  # using only the frontal
+            self.synth.nbAngleInPlane = 5
+            self.synth.nbAngleOutPlane = 1
+            self.synth.aInPlaneRot \
+                = 1.0 / (self.synth.nbAngleInPlane - 1) \
+                * np.arange(self.synth.nbAngleInPlane, dtype=np.float) \
+                * np.pi / 2.0 - np.pi / 4.0  # From -pi/4 to pi/4
+            self.synth.aOutPlaneRot = [0.0]
+            self.synth.aScaleChange = [0.75, 1.0, 1.0 / 0.75]
+
+
+# ------------------------------------------
+# Data structure
+# dataStruct = np.dtype([
+#     ('gt_angle', np.float64),
+#     ('up_angle', np.float64),
+#     ('rotVec', np.float64,3)
+# ])
+
+class dataStruct:
+    fSIFTAngle = 0       # SIFT angle
+    fGTAngle = 0         # Ground truth angle obtained by warping the reference
+
+    mRawPatch = []              # patch data near the keypoint region
+    # patch data of a warped (according to SIFT estimation) region
+    mWarpedPatch = []
+    vHistogram = []        # Orientation Histogram of the selected patch
+    vWarpedHistogram = []  # Warped Orientation Histogram of the selected patch
+
+    mRefRawPatch = []           # patch data near the keypoint region
+    # patch data of a warped (according to SIFT estimation) region
+    mRefWarpedPatch = []
+
+    # patch data of a warped (according to GT estimation) region
+    mGTWarpedPatch = []
+
+    vRefHistogram = []       # Orientation Histogram of the selected patch
+    vRefWarpedHistogram = []  # Warped Orient. Histogram of the selected patch
+
+    # descriptor computed for all angles (the name is misleading here!)
+    mOrientedSIFTdesc = []
+    vSIFTdesc = []              # descriptor computed with SIFT angle
+    vGTdesc = []                # descriptor computed with GT angle
+
+    dLabel = 0     # Label of the data
+    fLabelStd = 0  # Std of the keypoints having this label in the training set
+    pt = []        # coordinates of the keypoint
+
+    sFileName = []           # Original File name which this data was extracted
+
+    # ------------------------------------------------------------------------
+    # Fro Brown data
+    refID = None
+    kpID = None
+    interest_x = None
+    interest_y = None
+    interest_orientation = None
+    interest_scale = None
+    # mOrigPatch = None
+
+
+class data2DrawStruct:
+    # orig_keypoint = cv2.KeyPoint()        # Opencv Keypoint
+    # updt_keypoint = cv2.KeyPoint()        # Opencv Keypoints with updated
+    # angle (recomputed)
+    gt_angle = 0
+    up_angle = 0
+    # rotation Vector used to synthesize the image and keypoint
+    rotVec = np.zeros((3))
+
+
+#
+# custom_types.py ends here
diff --git a/datasets/eccv2016/eccv.py b/datasets/eccv2016/eccv.py
new file mode 100644
index 0000000..a997a7e
--- /dev/null
+++ b/datasets/eccv2016/eccv.py
@@ -0,0 +1,733 @@
+# eccv.py ---
+#
+# Filename: eccv.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Fri Feb 26 12:39:54 2016 (+0100)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+# Thu 29 Jun 14:52:41 CEST 2017, Kwang Moo Yi
+#
+# - Adaptation to be used inside the modules
+# 
+# Mon 26 Jun 13:52:37 CEST 2017, Kwang Moo Yi
+#
+# - Runs differently based on user name. Will use cvlab environment if possible
+# - NFS safe lock is now re-enabled to prevent race condition.
+#
+
+# Code:
+
+
+# -----------------------------------------------------------------------------
+# Imports
+from __future__ import print_function
+
+import hashlib
+import multiprocessing as mp
+import os
+import shutil
+import sys
+import time
+from datetime import timedelta
+from inspect import currentframe, getframeinfo
+
+import cv2
+import h5py
+import numpy as np
+import six
+from flufl.lock import Lock
+
+# from Utils.dataset_tools.helper import (load_patches,
+#                                        random_mine_non_kp_with_2d_distance,
+#                                        random_mine_non_kp_with_3d_blocking)
+from datasets.eccv2016.helper import (load_patches,
+                                      random_mine_non_kp_with_2d_distance,
+                                      random_mine_non_kp_with_3d_blocking)
+from six.moves import xrange
+# from Utils.custom_types import pathConfig
+from datasets.eccv2016.custom_types import pathConfig
+from utils import loadh5, saveh5
+
+# ratio of cpu cores this script is allowed to use
+ratio_CPU = 0.5
+
+
+# -----------------------------------------------------------------------------
+# Dataset class
+def createDump(args):
+
+    idx_jpg, jpg_file, train_data_dir, dump_data_dir, tmp_patch_dir, \
+        scale_hist, scale_hist_c, out_dim, param, queue = args
+
+    # queue for monitoring
+    if queue is not None:
+        queue.put(idx_jpg)
+
+    final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5")
+    if not os.path.exists(final_dump_file_name):
+        # load image
+        bUseColorImage = getattr(param.patch, "bUseColorImage", False)
+        if not bUseColorImage:
+            bUseDebugImage = getattr(param.patch, "bUseDebugImage", False)
+            if not bUseDebugImage:
+                img = cv2.cvtColor(cv2.imread(train_data_dir + jpg_file),
+                                   cv2.COLOR_BGR2GRAY)
+            else:
+                # debug image contains keypoints survive the SfM
+                # pipeline (blue) and the rest (in red)
+                img = cv2.cvtColor(cv2.imread(train_data_dir + jpg_file),
+                                   cv2.COLOR_BGR2GRAY)
+                debug_jpg = train_data_dir + jpg_file.replace(
+                    ".jpg", "-kp-minsc-" + str(param.dataset.fMinKpSize) +
+                    ".jpg")
+                imgd = cv2.cvtColor(cv2.imread(debug_jpg), cv2.COLOR_BGR2GRAY)
+                img = cv2.resize(imgd, img.shape[:2][::-1])
+
+            in_dim = 1
+        else:
+            img = cv2.imread(train_data_dir + jpg_file)
+            in_dim = 3
+            assert(img.shape[-1] == in_dim)
+
+        # ----------------------------------------
+        # load kp data
+        # Note that data is in order of [x,y,scale,angle,pointid,setid]]
+
+        # For positive: select randomly from "valid_keypoints"
+        pos_kp_file_name = train_data_dir + jpg_file.replace(
+            ".jpg", "-kp-minsc-" + str(param.dataset.fMinKpSize) + ".h5")
+        with h5py.File(pos_kp_file_name, "r") as h5file:
+            sfm_kp = np.asarray(h5file["valid_keypoints"], dtype="float")
+            non_sfm_kp = np.asarray(h5file["other_keypoints"], dtype="float")
+            # add two dummy fields to non_sfm since they don"t have id
+            # and group. THis means pointid,setid are set to 1 for non_sfm_kp
+            non_sfm_kp = np.concatenate(
+                [non_sfm_kp, -np.ones((non_sfm_kp.shape[0], 2))],
+                axis=1
+            )
+
+        pos_kp = np.concatenate((sfm_kp, np.ones((sfm_kp.shape[0], 1),
+                                                 dtype="float")), axis=1)
+        # Select subset for positives (assuming we have same coordinates for
+        # all image)
+        dump_file_name = dump_data_dir + jpg_file.replace(".jpg",
+                                                          "_idxPosSel.h5")
+
+        # if dump file not exist, create it; otherwise load it
+        if not os.path.exists(dump_file_name):
+            # idxPosShuffle = np.argsort(pos_kp[3,:])[::-1] # sort backwards
+            idxPosShuffle = np.random.permutation(
+                len(pos_kp))  # random shuffle
+            pos_2_keep = len(idxPosShuffle)
+            if param.dataset.nPosPerImg > 0:
+                pos_2_keep = min(pos_2_keep, param.dataset.nPosPerImg)
+            idxPosSel = idxPosShuffle[:pos_2_keep]  # shuffle the points
+            to_save = {"saveval": idxPosSel}
+            saveh5(to_save, dump_file_name)
+        else:
+            to_load = loadh5(dump_file_name)
+            idxPosSel = to_load["saveval"]
+        pos_kp = pos_kp[idxPosSel]
+
+        # negative sampling:
+        #
+        # 1) only use Sfm points, too close keypoints will be rejected as
+        # negative pair (because of high potential overlapping)
+        #
+        # 2) Use all SIFT points, when the overlapping of two feature context
+        # windows is larger than a threshold, it will be rejected as a negative
+        # pair (because it shares too much common regions..). In this step, we
+        # concatenate sfm_kp and non_sfm_kp to form keypoint class.
+        neg_mine_method = getattr(param.patch, "sNegMineMethod",
+                                  "use_only_SfM_points")
+        if neg_mine_method == "use_only_SfM_points":
+            # where is
+            neg_kp = random_mine_non_kp_with_2d_distance(
+                img, sfm_kp, scale_hist, scale_hist_c, param)
+        elif neg_mine_method == "use_all_SIFT_points":
+            max_iter = getattr(param.patch, "nMaxRandomNegMineIter", 100)
+            sift_kp = np.concatenate([sfm_kp, non_sfm_kp], axis=0)
+            neg_kp = random_mine_non_kp_with_3d_blocking(
+                img, sift_kp, scale_hist, scale_hist_c, param,
+                max_iter=max_iter)
+        else:
+            raise ValueError("Mining method {} is not supported!"
+                             "".format(neg_mine_method))
+        # add another dim indicating good or bad
+        neg_kp = np.concatenate((neg_kp, np.zeros((len(neg_kp), 1),
+                                                  dtype="float")), axis=1)
+        # concatenate negative and positives
+        kp = np.concatenate((pos_kp, neg_kp), axis=0)
+
+        # Retrive target values, 1 for pos and 0 for neg
+        y = kp[:, 6]
+
+        # Retrieve angles
+        angle = kp[:, 3]
+
+        # Assign ID to keypoints (for sfm points, ID = 3d point ind; for non
+        # sfm kp, ID = -1)
+        ID = kp[:, 4]
+
+        # load patches with id (drop out of boundary)
+        bPerturb = getattr(param.patch, "bPerturb", False)
+        fPerturbInfo = getattr(param.patch, "fPerturbInfo", np.zeros((3,)))
+        nAugmentedRotations = getattr(param.patch, "nAugmentedRotations", 1)
+        fAugmentRange = getattr(param.patch, "fAugmentRange", 0)
+        fAugmentCenterRandStrength = getattr(
+            param.patch, "fAugmentCenterRandStrength", 0
+        )
+        sAugmentCenterRandMethod = getattr(
+            param.patch, "sAugmentCenterRandMethod", "uniform"
+        )
+
+        cur_data_set = load_patches(
+            img, kp, y, ID, angle, param.patch.fRatioScale,
+            param.patch.fMaxScale, param.patch.nPatchSize,
+            param.model.nDescInputSize, in_dim, bPerturb, fPerturbInfo,
+            bReturnCoords=True,
+            nAugmentedRotations=nAugmentedRotations,
+            fAugmentRange=fAugmentRange,
+            fAugmentCenterRandStrength=fAugmentCenterRandStrength,
+            sAugmentCenterRandMethod=sAugmentCenterRandMethod,
+            nPatchSizeAug=param.patch.nPatchSizeAug,
+        )
+        # save dump as dictionary using saveh5
+        # from here Kernel died, because of not finding "MKL_intel_thread.dll"
+
+        # pdb.set_trace()
+        # here the data are saved in tmp_dump file, keys are numbers [0,1..]
+        tmpdict = dict(
+            (str(_idx), np.asarray(_data)) for _idx, _data in
+            zip(np.arange(len(cur_data_set)), cur_data_set)
+        )
+        saveh5(tmpdict, final_dump_file_name)
+
+    return idx_jpg
+
+
+class data_obj(object):
+    """ Dataset Object class.
+
+    Implementation of the dataset object
+    """
+
+    def __init__(self, param, mode="train"):
+
+        # Set parameters
+        self.out_dim = 1        # a single regressor output
+
+        # Load data
+        # self.x = None           # data (patches) to be used for learning [N,
+        #                         # channel, w, h]
+        # self.y = None           # label/target to be learned
+        # self.ID = None          # id of the data for manifold regularization
+        self.load_data(param, mode)
+
+        # Set parameters
+        self.num_channel = self.x.shape[1]  # single channel image
+        # patch width == patch height (28x28)
+        self.patch_height = self.x.shape[2]
+        self.patch_width = self.x.shape[3]
+
+        self
+
+    def load_data(self, param, mode):
+
+        print(" --------------------------------------------------- ")
+        print(" ECCV Data Module ")
+        print(" --------------------------------------------------- ")
+
+        # for each dataset item in the list
+        id_base = 0
+        for idxSet in xrange(len(param.dataset.trainSetList)):
+
+            pathconf = pathConfig()
+            pathconf.setupTrain(param, param.dataset.trainSetList[idxSet])
+
+            # Save the hash, for the pairs folder
+            # TODO make this work with multiple datasets
+            self.hash = '/'.join(pathconf.patch_dump.split('/')[-3:])
+
+            # load the data from one dataset (multi dataset can be used)
+            cur_data = self.load_data_for_set(pathconf, param, mode)
+            fixed_id = cur_data[2] + (cur_data[2] >= 0) * id_base
+            if idxSet == 0:
+                self.x = cur_data[0]
+                self.y = cur_data[1]
+                self.ID = fixed_id
+                self.pos = cur_data[3]
+                self.angle = cur_data[4]
+                self.coords = cur_data[5]
+            else:
+                self.x = np.concatenate([self.x, cur_data[0]])
+                self.y = np.concatenate([self.y, cur_data[1]])
+                self.ID = np.concatenate([self.ID, fixed_id])
+                self.pos = np.concatenate([self.pos, cur_data[3]])
+                self.angle = np.concatenate([self.angle, cur_data[4]])
+                self.coords = np.concatenate([self.coords, cur_data[5]])
+
+            id_base = np.max(self.ID) + 1
+
+    def load_data_for_set(self, pathconf, param, mode):
+
+        # ----------------------------------------------------------------------
+        # Train, Validation, and Test
+        # mlab = matlab.engine.start_matlab()
+
+        # Read from pathconf
+        # Original implementation
+        train_data_dir = pathconf.dataset
+        dump_data_dir = pathconf.train_dump
+        dump_patch_dir = pathconf.patch_dump
+        # local (or volatile) copy of the dump data
+        tmp_patch_dir = pathconf.volatile_patch_dump
+
+        print("train_data_dir = {}".format(train_data_dir))
+        print("dump_data_dir = {}".format(dump_data_dir))
+        print("dump_patch_dir = {}".format(dump_patch_dir))
+        print("tmp_patch_dir = {}".format(tmp_patch_dir))
+
+        if not os.path.exists(dump_data_dir):
+            os.makedirs(dump_data_dir)
+        if not os.path.exists(dump_patch_dir):
+            os.makedirs(dump_patch_dir)
+        if not os.path.exists(tmp_patch_dir):
+            os.makedirs(tmp_patch_dir)
+
+        # Check if we have the big h5 file ready
+        big_file_name = dump_patch_dir + mode + "-data-chunked.h5"
+        # if os.getenv("MLTEST_DEBUG", default=""):
+        #     import pdb
+        #     pdb.set_trace()
+
+        # Mutex lock
+        #
+        # We will create an nfs-safe lock file in a temporary directory to
+        # prevent our script from using corrupted, or data that is still being
+        # generated. This allows us to launch multiple instances at the same
+        # time, and allow only a single instance to generate the big_file.
+        if not os.path.exists(".locks"):
+            os.makedirs(".locks")
+        check_lock_file = ".locks/" + \
+            hashlib.md5(big_file_name.encode()).hexdigest()
+        if os.name == "posix":
+            check_lock = Lock(check_lock_file)
+            check_lock.lifetime = timedelta(days=2)
+            frameinfo = getframeinfo(currentframe())
+            print("-- {}/{}: waiting to obtain lock --".format(
+                frameinfo.filename, frameinfo.lineno))
+            check_lock.lock()
+            print(">> obtained lock for posix system<<")
+        elif os.name == "nt":
+            import filelock
+            check_lock = filelock.FileLock(check_lock_file)
+            check_lock.timeout = 2000
+            check_lock.acquire()
+            if check_lock.is_locked:
+                print(">> obtained lock for windows system <<")
+        else:
+            print("Unknown operating system, lock unavailable")
+
+        if not os.path.exists(big_file_name):
+
+            if not os.path.exists(dump_patch_dir + mode + "-data.h5"):
+
+                # Read scale histogram
+                hist_file = h5py.File(train_data_dir +
+                                      "scales-histogram-minsc-" +
+                                      str(param.dataset.fMinKpSize) + ".h5",
+                                      "r")
+                scale_hist = np.asarray(hist_file["histogram_bins"]).flatten()
+                scale_hist /= np.sum(scale_hist)
+                scale_hist_c = np.asarray(
+                    hist_file["histogram_centers"]
+                ).flatten()
+
+                # Read list of images from split files
+                split_name = ""
+                split_name += str(param.dataset.nTrainPercent) + "-"
+                split_name += str(param.dataset.nValidPercent) + "-"
+                split_name += str(param.dataset.nTestPercent) + "-"
+                if mode == "train":
+                    split_name += "train-"
+                elif mode == "valid":
+                    split_name += "val-"
+                elif mode == "test":
+                    split_name += "test-"
+                split_file_name = train_data_dir + "split-" \
+                    + split_name + "minsc-" \
+                    + str(param.dataset.fMinKpSize) + ".h.txt"
+                list_jpg_file = []
+                for file_name in list(
+                        np.loadtxt(split_file_name, dtype=bytes)
+                ):
+                    list_jpg_file += [
+                        file_name.decode("utf-8").replace(
+                            "-kp-minsc-" + str(param.dataset.fMinKpSize),
+                            ".jpg")
+                    ]
+
+                # -------------------------------------------------
+                # Create dumps in parallel
+                # I am lazy so create arguments in loop lol
+                pool_arg = [None] * len(list_jpg_file)
+                for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+                    pool_arg[idx_jpg] = (idx_jpg,
+                                         list_jpg_file[idx_jpg],
+                                         train_data_dir,
+                                         dump_data_dir, tmp_patch_dir,
+                                         scale_hist, scale_hist_c,
+                                         self.out_dim, param)
+
+                # if true, use multi thread, otherwise use only single thread
+                if True:
+                    number_of_process = int(ratio_CPU * mp.cpu_count())
+                    pool = mp.Pool(processes=number_of_process)
+                    manager = mp.Manager()
+                    queue = manager.Queue()
+                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+                        pool_arg[idx_jpg] = pool_arg[idx_jpg] + (queue,)
+                    # map async
+                    pool_res = pool.map_async(createDump, pool_arg)
+                    # monitor loop
+                    while True:
+                        if pool_res.ready():
+                            # print("")
+                            break
+                        else:
+                            size = queue.qsize()
+                            print("\r -- " + mode + ": Processing image "
+                                  "{}/{}".format(size, len(list_jpg_file)),
+                                  end="")
+                            sys.stdout.flush()
+                            time.sleep(1)
+                    pool.close()
+                    pool.join()
+                    print("\r -- " + mode + ": Finished Processing Images!")
+                # for debugging, if multi thread is used, then it is difficult
+                # to debug
+                else:
+                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+                        pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None,)
+                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+                        createDump(pool_arg[idx_jpg])
+                        print("\r -- " + mode + ": Processing image "
+                              "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
+                              end="")
+                        sys.stdout.flush()
+                    print("\r -- " + mode + ": Finished Processing Images!")
+                # -------------------------------------------------
+
+                # # --------------------
+                # # use single thread for simplify debugging
+                # for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+                #     pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None,)
+                # for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+                #     createDump(pool_arg[idx_jpg])
+                #     print("\r -- " + mode + ": Processing image "
+                #           "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
+                #           end="")
+                #     sys.stdout.flush()
+                # print("\r -- " + mode + ": Finished Processing Images!")
+
+                # ------------------------------------------------------------------
+                # Use only valid indices to ascertain mutual exclusiveness
+                id_file_name = train_data_dir + "split-"
+                id_file_name += str(param.dataset.nTrainPercent) + "-"
+                id_file_name += str(param.dataset.nValidPercent) + "-"
+                id_file_name += str(param.dataset.nTestPercent) + "-"
+                id_file_name += ("minsc-" +
+                                 str(param.dataset.fMinKpSize) +
+                                 ".h5")
+
+                if mode == "train":
+                    id_key = "indices_train"
+                elif mode == "valid":
+                    id_key = "indices_val"
+                elif mode == "test":
+                    id_key = "indices_test"
+
+                with h5py.File(id_file_name, "r") as id_file:
+                    id_2_keep = np.asarray(id_file[id_key])
+
+                # ind_2_keep = np.in1d(dataset[2], id_2_keep)
+                # ind_2_keep += dataset[2] < 0
+
+                # loop through files to figure out how many valid items we have
+#                pdb.set_trace() # for tracking of the dataset
+
+                num_valid = 0
+                for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+
+                    jpg_file = list_jpg_file[idx_jpg]
+
+                    print("\r -- " + mode + ": "
+                          "Reading dumps to figure out number of valid "
+                          "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
+                          end="")
+                    sys.stdout.flush()
+
+                    # Load created dump
+                    final_dump_file_name = tmp_patch_dir \
+                        + jpg_file.replace(".jpg", ".h5")
+                    # Use loadh5 and turn it back to original cur_data_set
+                    with h5py.File(final_dump_file_name, "r") as dump_file:
+                        cur_ids = dump_file["2"].value
+
+                    # Find cur valid by looking at id_2_keep
+                    cur_valid = np.in1d(cur_ids, id_2_keep)
+                    # Add all negative labels as valid (neg data)
+                    cur_valid += cur_ids < 0
+
+                    # Sum it up
+                    num_valid += np.sum(cur_valid)
+
+                print("\n -- " + mode + ": "
+                      "Found {} valid data points from {} files"
+                      "".format(num_valid, len(list_jpg_file)))
+
+                # Get the first data to simply check the shape
+                tmp_dump_file_name = tmp_patch_dir \
+                    + list_jpg_file[0].replace(".jpg", ".h5")
+                with h5py.File(tmp_dump_file_name, "r") as dump_file:
+                    dataset_shape = []
+                    dataset_type = []
+                    for _idx in six.moves.xrange(len(dump_file.keys())):
+                        dataset_shape += [dump_file[str(_idx)].shape]
+                        dataset_type += [dump_file[str(_idx)].dtype]
+
+                # create and save the large dataset chunk
+                with h5py.File(big_file_name, "w-") as big_file:
+                    big_file["time_stamp"] = np.asarray(time.localtime())
+                    name_list = ["x", "y", "ID", "pos", "angle", "coords"]
+                    # create the dataset storage chunk
+                    for __i in six.moves.xrange(len(dataset_shape)):
+                        big_file.create_dataset(
+                            name_list[__i],
+                            (num_valid,) + dataset_shape[__i][1:],
+                            chunks=(1,) + dataset_shape[__i][1:],
+                            maxshape=(
+                                (num_valid,) + dataset_shape[__i][1:]
+                            ),
+                            dtype=dataset_type[__i]
+                        )
+                    # loop through the file to save to a big chunk
+                    save_base = 0
+                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+
+                        jpg_file = list_jpg_file[idx_jpg]
+
+                        print("\r -- " + mode + ": "
+                              "Saving the data to the big dump "
+                              "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
+                              end="")
+                        sys.stdout.flush()
+
+                        # Load created dump
+                        final_dump_file_name = tmp_patch_dir \
+                            + jpg_file.replace(".jpg", ".h5")
+                        # Use loadh5 and turn it back to original cur_data_set
+                        tmpdict = loadh5(final_dump_file_name)
+                        cur_data_set = tuple([
+                            tmpdict[str(_idx)] for _idx in
+                            range(len(tmpdict.keys()))
+                        ])
+                        # Find cur valid by looking at id_2_keep
+                        cur_valid = np.in1d(cur_data_set[2], id_2_keep)
+                        # Add all negative labels as valid (neg data)
+                        cur_valid += cur_data_set[2] < 0
+                        for __i in six.moves.xrange(len(dataset_shape)):
+                            big_file[name_list[__i]][
+                                save_base:save_base + np.sum(cur_valid)
+                            ] = cur_data_set[__i][cur_valid]
+                        # Move base to the next chunk
+                        save_base += np.sum(cur_valid)
+
+                    # Assert that we saved all
+                    assert save_base == num_valid
+
+                print("\n -- " + mode + ": "
+                      "Done saving {} valid data points from {} files"
+                      "".format(num_valid, len(list_jpg_file)))
+
+                # --------------------------------------------------
+                #  Cleanup dump
+                for idx_jpg in six.moves.xrange(len(list_jpg_file)):
+
+                    jpg_file = list_jpg_file[idx_jpg]
+
+                    print("\r -- " + mode + ": "
+                          "Removing dump "
+                          "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
+                          end="")
+                    sys.stdout.flush()
+
+                    # Delete dump
+                    final_dump_file_name = tmp_patch_dir \
+                        + jpg_file.replace(".jpg", ".h5")
+                    os.remove(final_dump_file_name)
+
+                print("\r -- " + mode + ": "
+                      "Cleaned up dumps! "
+                      "Local dump is now clean!")
+
+            else:
+                print(" -- Found old file without chunks. "
+                      "Copying to new file with chunks...")
+                old_big_file_name = dump_patch_dir + mode + "-data.h5"
+                with h5py.File(old_big_file_name, "r") as old_big_file, \
+                        h5py.File(big_file_name, "w-") as big_file:
+                    dataset = []
+
+                    # load old train into array
+                    name_list = ["x", "y", "ID", "pos", "angle", "coords"]
+                    for __i in six.moves.xrange(len(name_list)):
+                        dataset += [np.asarray(old_big_file[name_list[__i]])]
+
+                    # save train
+                    big_file["time_stamp"] = np.asarray(time.localtime())
+
+                    # allocate and write
+                    for __i in six.moves.xrange(len(name_list)):
+                        if name_list[__i] == "x":
+                            chunk_shape = (1,) + dataset[__i].shape[1:]
+                        else:
+                            chunk_shape = None
+                        big_file.create_dataset(
+                            name_list[__i],
+                            dataset[__i].shape,
+                            data=dataset[__i],
+                            chunks=chunk_shape,
+                            maxshape=dataset[__i].shape,
+                        )
+
+                print(" -- Finished creating chunked file, removing old...")
+                os.remove(old_big_file_name)
+
+        # ----------------------------------------------------------------------
+        # Copy to local tmp if necessary
+        if not os.path.exists(tmp_patch_dir + mode + "-data-chunked.h5"):
+            print(" -- " + mode + ": "
+                  "Local dump does not exist! "
+                  "Copying big dump to local drive... ")
+            shutil.copy(dump_patch_dir + mode + "-data-chunked.h5",
+                        tmp_patch_dir + mode + "-data-chunked.h5")
+        else:
+            print(" -- " + mode + ": "
+                  "Local dump exists. Checking timestamp...")
+
+            # get timestamp from nfs
+            with h5py.File(dump_patch_dir + mode + "-data-chunked.h5", "r") \
+                    as nfs_file:
+                nfs_time = np.asarray(nfs_file["time_stamp"])
+
+            # get timestamp from local
+            with h5py.File(tmp_patch_dir + mode + "-data-chunked.h5", "r") \
+                    as local_file:
+                local_time = np.asarray(local_file["time_stamp"])
+
+            # if the two files have different time stamps
+            if any(nfs_time != local_time):
+                print(" -- " + mode + ": "
+                      "Time stamps are different! "
+                      "Copying big dump to local drive... ")
+                shutil.copy(dump_patch_dir + mode + "-data-chunked.h5",
+                            tmp_patch_dir + mode + "-data-chunked.h5")
+            else:
+                print(" -- " + mode + ": "
+                      "Time stamps are identical! Re-using local dump")
+
+        # Free lock
+        if os.name == "posix":
+            check_lock.unlock()
+            print("-- free lock --")
+        elif os.name == "nt":
+            check_lock.release()
+            print("-- free lock --")
+        else:
+            pass
+        # ----------------------------------------------------------------------
+        # Use local copy for faster speed
+        print(" -- " + mode + ": Loading from local drive... ")
+        big_file_name = tmp_patch_dir + mode + "-data-chunked.h5"
+
+        # open big_file and don"t close
+        big_file = h5py.File(big_file_name, "r")
+
+        x = big_file["x"]
+        # work arround for h5py loading all things to memory
+        read_batch_size = 10000
+        read_batch_num = int(np.ceil(
+            float(big_file["x"].shape[0]) /
+            float(read_batch_size)
+        ))
+
+        # Manual, since I don't want to bother debugging the below
+        # fields = ["y", "ID", "pos", "angle", "coords"]
+        # for var_name in fields:
+        #     # allocate data
+        #     exec("{0} = np.zeros(big_file['{0}'].shape, "
+        #          "dtype=big_file['{0}'].dtype)".format(var_name))
+        #     # copy data in batches
+        #     for idx_batch in six.moves.xrange(read_batch_num):
+        #         idx_s = idx_batch * read_batch_size
+        #         idx_e = (idx_batch + 1) * read_batch_size
+        #         idx_e = np.minimum(idx_e, big_file["x"].shape[0])
+        #         exec("{0}[idx_s:idx_e] = np.asarray(big_file['{0}'][idx_s:idx_e])"
+        #              "". format(var_name))
+
+        # Allocate
+        y = np.zeros(big_file["y"].shape, dtype=big_file["y"].dtype)
+        ID = np.zeros(big_file["ID"].shape, dtype=big_file["ID"].dtype)
+        pos = np.zeros(big_file["pos"].shape, dtype=big_file["pos"].dtype)
+        angle = np.zeros(big_file["angle"].shape,
+                         dtype=big_file["angle"].dtype)
+        coords = np.zeros(big_file["coords"].shape,
+                          dtype=big_file["coords"].dtype)
+
+        # Copy data in batches
+        for idx_batch in six.moves.xrange(read_batch_num):
+            idx_s = idx_batch * read_batch_size
+            idx_e = (idx_batch + 1) * read_batch_size
+            idx_e = np.minimum(idx_e, big_file["x"].shape[0])
+
+            y[idx_s:idx_e] = np.asarray(big_file['y'][idx_s:idx_e])
+            ID[idx_s:idx_e] = np.asarray(big_file['ID'][idx_s:idx_e])
+            pos[idx_s:idx_e] = np.asarray(big_file['pos'][idx_s:idx_e])
+            angle[idx_s:idx_e] = np.asarray(big_file['angle'][idx_s:idx_e])
+            coords[idx_s:idx_e] = np.asarray(big_file['coords'][idx_s:idx_e])
+
+        #     import pdb
+        #     pdb.set_trace()
+
+        # # Make sure data is contiguos
+        # y = np.ascontiguousarray(y)
+        # ID = np.ascontiguousarray(ID)
+        # pos = np.ascontiguousarray(pos)
+        # angle = np.ascontiguousarray(angle)
+        # coords = np.ascontiguousarray(coords)
+
+        print(" -- " + mode + ": Done... ")
+
+        return x, y, ID, pos, angle, coords
+
+#
+# eccv.py ends here
diff --git a/datasets/eccv2016/helper.py b/datasets/eccv2016/helper.py
new file mode 100644
index 0000000..fabb1d3
--- /dev/null
+++ b/datasets/eccv2016/helper.py
@@ -0,0 +1,831 @@
+# helper.py ---
+#
+# Filename: helper.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Tue Feb 23 15:18:50 2016 (+0100)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+from __future__ import print_function
+
+import os
+
+import cv2
+import h5py  # for hdf5
+import numpy as np
+import scipy.io  # for matlab
+import six
+
+# dump tools
+from utils import loadh5, saveh5
+from datasets.eccv2016.math_tools import getIoUOfRectangles
+# from Utils.transformations import quaternion_from_matrix
+
+
+# # custom types
+# from Utils.custom_types import paramStruct, pathConfig
+
+# def load_geom(geom_file, geom_type, scale_factor, flip_R=False):
+#    if geom_type == "calibration":
+#        # load geometry file
+#        geom_dict = loadh5(geom_file)
+#        # Check if principal point is at the center
+#        K = geom_dict["K"]
+#        assert(K[0, 2] < 1e-3 and K[1, 2] < 1e-3)
+#        # Rescale calbration according to previous resizing
+#        S = np.asarray([[scale_factor, 0, 0],
+#                        [0, scale_factor, 0],
+#                        [0, 0, 1]])
+#        K = np.dot(S, K)
+#        geom_dict["K"] = K
+#        # Transpose Rotation Matrix if needed
+#        if flip_R:
+#            R = geom_dict["R"].T.copy()
+#            geom_dict["R"] = R
+#        # append things to list
+#        geom_list = []
+#        geom_info_name_list = ["K", "R", "T", "imsize"]
+#        for geom_info_name in geom_info_name_list:
+#            geom_list += [geom_dict[geom_info_name].flatten()]
+#        # Finally do K_inv since inverting K is tricky with theano
+#        geom_list += [np.linalg.inv(geom_dict["K"]).flatten()]
+#        # Get the quaternion from Rotation matrices as well
+#        q = quaternion_from_matrix(geom_dict["R"])
+#        geom_list += [q.flatten()]
+#        # Also add the inverse of the quaternion
+#        q_inv = q.copy()
+#        np.negative(q_inv[1:], q_inv[1:])
+#        geom_list += [q_inv.flatten()]
+#        # Add to list
+#        geom = np.concatenate(geom_list)
+#
+#    elif geom_type == "homography":
+#        H = np.loadtxt(geom_file)
+#        geom = H.flatten()
+#
+#    return geom
+
+
+def create_perturb(orig_pos, nPatchSize, nDescInputSize, fPerturbInfo):
+    """Create perturbations in the xyz format we use in the keypoint component.
+
+    The return value is in x,y,z where x and y are scaled coordinates that are
+    between -1 and 1. In fact, the coordinates are scaled so that 1 = 0.5 *
+    (nPatchSize-1) + patch center.
+
+    In case of the z, it is in log2 scale, where zero corresponds to the
+    original scale of the keypoint. For example, z of 1 would correspond to 2
+    times the original scale of the keypoint.
+
+    Parameters
+    ----------
+    orig_pos: ndarray of float
+        Center position of the patch in coordinates.
+
+    nPatchSize: int
+        Patch size of the entire patch extraction region.
+
+    nDescInputSize: int
+        Patch size of the descriptor support region.
+
+    fPerturbInfo: ndarray
+        Amount of maximum perturbations we allow in xyz.
+
+    Notes
+    -----
+    The perturbation has to ensure that the *ground truth* support region in
+    included.
+    """
+
+    # Check provided perturb info so that it is guaranteed that it will
+    # generate perturbations that holds the ground truth support region
+    assert fPerturbInfo[0] <= 1.0 - float(nDescInputSize) / float(nPatchSize)
+    assert fPerturbInfo[1] <= 1.0 - float(nDescInputSize) / float(nPatchSize)
+
+    # Generate random perturbations
+    perturb_xyz = ((2.0 * np.random.rand(orig_pos.shape[0], 3) - 1.0) *
+                   fPerturbInfo.reshape([1, 3]))
+
+    return perturb_xyz
+
+
+def apply_perturb(orig_pos, perturb_xyz, maxRatioScale):
+    """Apply the perturbation to ``a'' keypoint.
+
+    The xyz perturbation is in format we use in the keypoint component. See
+    'create_perturb' for details.
+
+    Parameters
+    ----------
+    orig_pos: ndarray of float (ndim == 1, size 3)
+        Original position of a *single* keypoint in pixel coordinates.
+
+    perturb_xyz: ndarray of float (ndim == 1, size 3)
+        The center position in xyz after the perturbation is applied.
+
+    maxRatioScale: float
+        The multiplier that get's multiplied to scale to get half-width of the
+        region we are going to crop.
+    """
+
+    # assert that we are working with only one sample
+    assert len(orig_pos.shape) == 1
+    assert len(perturb_xyz.shape) == 1
+
+    # get the new scale
+    new_pos_s = orig_pos[2] * (2.0**(-perturb_xyz[2]))
+
+    # get xy to pixels conversion
+    xyz_to_pix = new_pos_s * maxRatioScale
+
+    # Get the new x and y according to scale. Note that we multiply the
+    # movement we need to take by 2.0**perturb_xyz since we are moving at a
+    # different scale
+    new_pos_x = orig_pos[0] - perturb_xyz[0] * 2.0**perturb_xyz[2] * xyz_to_pix
+    new_pos_y = orig_pos[1] - perturb_xyz[1] * 2.0**perturb_xyz[2] * xyz_to_pix
+
+    perturbed_pos = np.asarray([new_pos_x, new_pos_y, new_pos_s])
+
+    return perturbed_pos
+
+
+def get_crop_range(xx, yy, half_width):
+    """Function for retrieving the crop coordinates"""
+
+    xs = np.cast['int'](np.round(xx - half_width))
+    xe = np.cast['int'](np.round(xx + half_width))
+    ys = np.cast['int'](np.round(yy - half_width))
+    ye = np.cast['int'](np.round(yy + half_width))
+
+    return xs, xe, ys, ye
+
+
+def crop_patch(img, cx, cy, clockwise_rot, resize_ratio, nPatchSize):
+    """Crops the patch.
+
+    Crops with center at cx, cy with patch size of half_width and resizes to
+    nPatchSize x nPatchsize patch.
+
+    Parameters
+    ----------
+    img: np.ndarray
+        Input image to be cropped from.
+
+    cx: float
+        x coordinate of the patch.
+
+    cy: float
+        y coordinate of the patch.
+
+    clockwise_rot: float (degrees)
+        clockwise rotation to apply when extracting
+
+    resize_ratio: float
+        Ratio of the resize. For example, ratio of two will crop a 2 x
+        nPatchSize region.
+
+    nPatchSize: int
+        Size of the returned patch.
+
+    Notes
+    -----
+
+    The cv2.warpAffine behaves in similar way to the spatial transformers. The
+    M matrix should move coordinates from the original image to the patch,
+    i.e. inverse transformation.
+
+    """
+
+    # Below equation should give (nPatchSize-1)/2 when M x [cx, 0, 1]',
+    # 0 when M x [cx - (nPatchSize-1)/2*resize_ratio, 0, 1]', and finally,
+    # nPatchSize-1 when M x [cx + (nPatchSize-1)/2*resize_ratio, 0, 1]'.
+    dx = (nPatchSize - 1.0) * 0.5 - cx / resize_ratio
+    dy = (nPatchSize - 1.0) * 0.5 - cy / resize_ratio
+    M = np.asarray([[1. / resize_ratio, 0.0, dx],
+                    [0.0, 1. / resize_ratio, dy],
+                    [0.0, 0.0, 1.0]])
+    # move to zero base before rotation
+    R_pre = np.asarray([[1.0, 0.0, -(nPatchSize - 1.0) * 0.5],
+                        [0.0, 1.0, -(nPatchSize - 1.0) * 0.5],
+                        [0.0, 0.0, 1.0]])
+    # rotate
+    theta = clockwise_rot / 180.0 * np.pi
+    R_rot = np.asarray([[np.cos(theta), -np.sin(theta), 0.0],
+                        [np.sin(theta), np.cos(theta), 0.0],
+                        [0.0, 0.0, 1.0]])
+    # move back to corner base
+    R_post = np.asarray([[1.0, 0.0, (nPatchSize - 1.0) * 0.5],
+                         [0.0, 1.0, (nPatchSize - 1.0) * 0.5],
+                         [0.0, 0.0, 1.0]])
+    # combine
+    R = np.dot(R_post, np.dot(R_rot, R_pre))
+
+    crop = cv2.warpAffine(img, np.dot(R, M)[:2, :], (nPatchSize, nPatchSize))
+
+    return crop
+
+
+def load_patches(img, kp_in, y_in, ID_in, angle_in, fRatioScale, fMaxScale,
+                 nPatchSize, nDescInputSize, in_dim, bPerturb, fPerturbInfo,
+                 bReturnCoords=False, nAugmentedRotations=1,
+                 fAugmentRange=180.0, fAugmentCenterRandStrength=0.0,
+                 sAugmentCenterRandMethod="uniform", nPatchSizeAug=None):
+    '''Loads Patches given img and list of keypoints
+
+    Parameters
+    ----------
+
+    img: input grayscale image (or color)
+
+    kp_in: 2-D array of keypoints [nbkp, 3+], Note that data is in order of
+        [x,y,scale,...]
+
+    y_in: 1-D array of keypoint labels (or target values)
+
+    ID_in: 1-D array of keypoint IDs
+
+    fRatioScale: float
+        The ratio which gets multiplied to obtain the crop radius. For example
+        if fRatioScale is (48-1) /2 /2, and the scale is 2, the crop region
+        will be of size 48x48. Note that -1 is necessary as the center pixel is
+        inclusive. In our previous implementation regarding ECCV submission, -1
+        was negelected. This however should not make a big difference.
+
+    fMaxScale: float
+        Since we are trying to make scale space learning possible, we give list
+        of scales for training. This should contain the maximum value, so that
+        we consider the largest scale when cropping.
+
+    nPatchSize: int
+        Size of the patch (big one, not to be confused with nPatchSizeKp).
+
+    nDescInputSize: int
+        Size of the inner patch (descriptor support region). Used for computing
+        the bounds for purturbing the patch location.
+
+    in_dim: int
+        Number of dimensions of the input. For example grayscale would mean
+        `in_dim == 1`.
+
+    bPerturb: boolean
+        Whether to perturb when extracting patches
+
+    fPerturbInfo: np.array (float)
+        How much perturbation (in relative scale) for x, y, scale
+
+    bReturnCoord: boolean
+        Return groundtruth coordinates. Should be set to True for new
+        implementations. Default is False for backward compatibility.
+
+    nAugmentedRotations: int
+        Number of augmented rotations (equaly spaced) to be added to the
+        dataset. The original implementation should be equal to having this
+        number set to 1.
+
+    fAugmentRange: float (degrees)
+        The range of the augnmented degree. For example, 180 would mean the
+        full range, 90 would be the half range.
+
+    fAugmentCenterRandStrength: float (degrees)
+        The strength of the random to be applied to the center angle
+        perturbation.
+
+    sAugmentCenterRandMethod: string
+        Name of the center randomness method.
+
+    nPatchSizeAug: int
+        Size of the patch when augmenting rotations. This is used to create
+        smaller perturbations to ensure we do not crop outside the patch.
+
+    '''
+
+    # get max possible scale ratio
+    maxRatioScale = fRatioScale * fMaxScale
+
+    # check validity of  nPreRotPatchSize
+    assert nAugmentedRotations >= 1
+    # # Since we apply perturbations, we need to be at least sqrt(2) larger than
+    # # the desired when random augmentations are introduced
+    # if nAugmentedRotations > 1 or fAugmentCenterRandStrength > 0:
+    #     nInitPatchSize = np.round(np.sqrt(2.0) * nPatchSize).astype(int)
+    # else:
+    #     nInitPatchSize = nPatchSize
+
+    if nPatchSizeAug is None:
+        nPatchSizeAug = nPatchSize
+    assert nPatchSize <= nPatchSizeAug
+
+    # pre-allocate maximum possible array size for data
+    x = np.zeros((kp_in.shape[0] * nAugmentedRotations, in_dim,
+                  nPatchSizeAug, nPatchSizeAug), dtype='uint8')
+    y = np.zeros((kp_in.shape[0] * nAugmentedRotations,), dtype='float32')
+    ID = np.zeros((kp_in.shape[0] * nAugmentedRotations,), dtype='int')
+    pos = np.zeros((kp_in.shape[0] * nAugmentedRotations, 3), dtype='float')
+    angle = np.zeros((kp_in.shape[0] * nAugmentedRotations,), dtype='float32')
+    coords = np.tile(np.zeros_like(kp_in), (nAugmentedRotations, 1))
+
+    # create perturbations
+    # Note: the purturbation still considers only the nPatchSize
+    perturb_xyz = create_perturb(kp_in, nPatchSize,
+                                 nDescInputSize, fPerturbInfo)
+
+#    import pdb
+#    pdb.set_trace()
+
+    # delete perturbations for the negatives (look at kp[6])
+    # perturb_xyz[kp_in[:, 6] == 0] = 0
+    perturb_xyz[y_in == 0] = 0
+
+    idxKeep = 0
+    for idx in six.moves.xrange(kp_in.shape[0]):
+
+        # current kp position
+        cur_pos = apply_perturb(kp_in[idx], perturb_xyz[idx],  maxRatioScale)
+        cx = cur_pos[0]
+        cy = cur_pos[1]
+        cs = cur_pos[2]
+
+        # retrieve the half width acording to scale
+        max_hw = cs * maxRatioScale
+
+        # get crop range considering bigger area
+        xs, xe, ys, ye = get_crop_range(cx, cy, max_hw * np.sqrt(2.0))
+
+        # boundary check with safety margin
+        safety_margin = 1
+        # if xs < 0 or xe >= img.shape[1] or ys < 0 or ys >= img.shape[0]:
+        if (xs < safety_margin or xe >= img.shape[1] - safety_margin or
+                ys < safety_margin or ys >= img.shape[0] - safety_margin):
+            continue
+
+        # create an initial center orientation
+        center_rand = 0
+        if sAugmentCenterRandMethod == "uniform":
+            # Note that the below will give zero when
+            # `fAugmentCenterRandStrength == 0`. This effectively disables the
+            # random perturbation.
+            center_rand = ((np.random.rand() * 2.0 - 1.0) *
+                           fAugmentCenterRandStrength)
+        else:
+            raise NotImplementedError(
+                "Unknown random method "
+                "sAugmentCenterRandMethod = {}".format(
+                    sAugmentCenterRandMethod
+                )
+            )
+
+        # create an array of rotations to used
+        rot_diff_list = np.arange(nAugmentedRotations).astype(float)
+        # compute the step between subsequent rotations
+        rot_step = 2.0 * fAugmentRange / float(nAugmentedRotations)
+        rot_diff_list *= rot_step
+
+        for rot_diff in rot_diff_list:
+
+            # the rotation to be applied for this patch
+            crot_deg = rot_diff + center_rand
+            crot_rad = crot_deg * np.pi / 180.0
+
+            # Crop using the crop function
+            # crop = img[ys:ye, xs:xe]
+            # x[idxKeep, 0, :, :] = crop_patch(
+            #     img, cx, cy, crot_deg,
+            #     max_hw / (float(nPatchSizeAug - 1) * 0.5),
+            #     nPatchSizeAug)     # note the nInitPatchSize
+            cur_patch = crop_patch(
+                img, cx, cy, crot_deg,
+                max_hw / (float(nPatchSizeAug - 1) * 0.5),
+                nPatchSizeAug)
+            if len(cur_patch.shape) == 2:
+                #                pdb.set_trace()
+                cur_patch = cur_patch[..., np.newaxis]
+
+            x[idxKeep] = cur_patch.transpose(2, 0, 1)
+
+            # crop = img[ys:ye, xs:xe]
+
+            # update target value and id
+            y[idxKeep] = y_in[idx]
+            ID[idxKeep] = ID_in[idx]
+            # add crot (in radians), note that we are between -2pi and 0 for
+            # compatiblity
+            angle[idxKeep] = ((angle_in[idx] + crot_rad) % (2.0 * np.pi) -
+                              (2.0 * np.pi))
+
+            # Store the perturbation (center of the patch is 0,0,0)
+            new_perturb_xyz = perturb_xyz[idx].copy()
+            xx, yy, zz = new_perturb_xyz
+            rx = np.cos(crot_rad) * xx - np.sin(crot_rad) * yy
+            ry = np.sin(crot_rad) * xx + np.cos(crot_rad) * yy
+            rz = zz
+            pos[idxKeep] = np.asarray([rx, ry, rz])
+
+            # store the original pixel coordinates
+            new_kp_in = kp_in[idx].copy()
+            new_kp_in[3] = ((new_kp_in[3] + crot_rad) % (2.0 * np.pi) -
+                            (2.0 * np.pi))
+            coords[idxKeep] = new_kp_in
+
+            idxKeep += 1
+
+    # Delete unassigned
+    x = x[:idxKeep]
+    y = y[:idxKeep]
+    ID = ID[:idxKeep]
+    pos = pos[:idxKeep]
+    angle = angle[:idxKeep]
+    coords = coords[:idxKeep]
+
+    if not bReturnCoords:
+        return x.astype("uint8"), y, ID.astype("int"), pos, angle
+    else:
+        return x.astype("uint8"), y, ID.astype("int"), pos, angle, coords
+
+
+def random_mine_non_kp_with_2d_distance(img, pos_kp, scale_hist,
+                                        scale_hist_c, param):
+    """Randomly mines negatives with 2d distance.
+
+    This function looks at raw pixel distances, without considering the
+    scale. The threshold sould be given by the parameter. This function was our
+    first atempt which was not really successful.
+
+    Parameters
+    ----------
+    img: np.ndrray
+        Input image to be mined
+
+    pos_kp: np.ndarray
+        Positive keypoint locations that we should avoid when mining. Note that
+        this can be either SfM points, or all SIFT keypoints.
+
+    scale_hist: np.ndarray
+        Scale distribution to randomly sample scale from. Should be the scale
+        histogram of positive keypoints.
+
+    scale_hist_c: np.ndarray
+        Bin center position of the scale histogram. Note that numpy histogram
+        (by default) stores the cutoff values where we want the center values.
+
+    param: paramStruct object
+        The parameter object which is read from the configuration.
+
+    """
+
+    # read param
+    neg_2_mine = param.dataset.nNegPerImg
+    neg_dist_th = param.dataset.nNegDistTh
+    if 'fNegOverlapTh' in param.patch.__dict__.keys():
+        raise ValueError('fNegOverlapTh should not be defined '
+                         'when using distance method!')
+
+    all_neg_kp = None
+
+    num_iter = 0
+    while neg_2_mine > 0:
+        # random sample positions
+        neg_kp = np.random.rand(neg_2_mine * 10, 2)  # randomly get 10 times
+        neg_kp *= np.array([img.shape[1] - 1 - 2.0 * neg_dist_th,
+                            img.shape[0] - 1 - 2.0 * neg_dist_th],
+                           dtype='float').reshape([1, 2])  # adjust to borders
+        neg_kp += neg_dist_th
+
+        # remove too close keypoints
+        dists = neg_kp.reshape([-1, 2, 1]) - pos_kp[:, :2].reshape([1, 2, -1])
+        idx_far_enough = (dists**2).sum(axis=1).min(axis=1) > neg_dist_th
+        neg_kp = neg_kp[idx_far_enough]
+
+        # random shuffle and take neg_2_mine
+        idx_shuffle = np.random.permutation(len(neg_kp))
+        neg_kp = neg_kp[idx_shuffle[:min(neg_2_mine, len(idx_shuffle))]]
+
+        # concatenate random scale
+        random_scales = np.random.choice(scale_hist_c, size=(len(neg_kp), 1),
+                                         p=scale_hist)
+        neg_kp = np.concatenate([neg_kp, random_scales], axis=1)
+
+        # concatenate dummy values
+        neg_kp = np.concatenate([neg_kp,
+                                 np.zeros((len(neg_kp), 1)),
+                                 -1.0 * np.ones((len(neg_kp), 1)),
+                                 np.zeros((len(neg_kp), 1))], axis=1)
+
+        # concatenate to the final list
+        if all_neg_kp is None:
+            all_neg_kp = neg_kp
+        else:
+            all_neg_kp = np.concatenate([all_neg_kp, neg_kp])
+
+        # update neg_2_mine
+        neg_2_mine = param.dataset.nNegPerImg - len(all_neg_kp)
+
+        # update number of iterations
+        num_iter += 1
+
+        if num_iter > 100:
+            raise RuntimeError('I am taking too much')
+
+        neg_kp = all_neg_kp[:param.dataset.nNegPerImg]
+
+    return neg_kp
+
+
+def random_mine_non_kp_with_3d_blocking(img, pos_kp, scale_hist,
+                                        scale_hist_c, param,
+                                        neg_per_iter=None,
+                                        max_iter=100):
+    """Randomly mines negatives with 3d blocking.
+
+    This function uses overlap and if the overlap / proposed region is larget
+    than the given threshold, we don't use that area.
+
+    Parameters
+    ----------
+    img: np.ndrray
+        Input image to be mined
+
+    pos_kp: np.ndarray
+        Positive keypoint locations that we should avoid when mining. Note that
+        this can be either SfM points, or all SIFT keypoints.
+
+    scale_hist: np.ndarray
+        Scale distribution to randomly sample scale from. Should be the scale
+        histogram of positive keypoints.
+
+    scale_hist_c: np.ndarray
+        Bin center position of the scale histogram. Note that numpy histogram
+        (by default) stores the cutoff values where we want the center values.
+
+    param: paramStruct object
+        The parameter object which is read from the configuration.
+
+    neg_per_iter: int or None (optional)
+        If not given or set to None, we will mine 10 times the desired amount
+        at each iteration. This may cause negatives overlapping with each
+        other. To make sure none of the negatives are overlapping, use 1 for
+        this argument.
+
+    max_iter: int (optional)
+        Maximum iterations to the looping to try mining. When he does not find
+        after this, he will simply stop mining and maybe print a message, but
+        it will not necessarily crash. I did it this way since this function is
+        intended to run in a sub-process, which the individual crashes do not
+        really matter. And also, if you can't find enough negatives after that
+        mining, maybe you are better off just mining no more.
+
+    """
+
+    # read param
+    neg_2_mine = param.dataset.nNegPerImg
+    neg_overlap_th = param.patch.fNegOverlapTh
+    if 'nNegDistTh' in param.dataset.__dict__.keys():
+        raise ValueError('nNegDistTh should not be defined '
+                         'when using distance method!')
+    # To stay inside the image boundaries
+    maxRatioScale = param.patch.fRatioScale * param.patch.fMaxScale
+    # The multiplier to get support region
+    ratioDescSupport = (param.patch.fRatioScale *
+                        float(param.model.nDescInputSize) /
+                        float(param.patch.nPatchSize))
+
+    all_neg_kp = None
+
+    num_iter = 0
+    while neg_2_mine > 0:
+
+        if neg_per_iter is None:
+            neg_per_iter = neg_2_mine * 10
+
+        # randomly sample scales
+        random_scales = np.random.choice(scale_hist_c, size=(neg_per_iter, 1),
+                                         p=scale_hist)
+
+        # random sample positions with scale in mind - this will ensure we are
+        # in the boundary
+        neg_kp = np.random.rand(neg_per_iter, 2)  # randomly get 10 times
+        rescale_mtx = np.array([img.shape[1] - 1,
+                                img.shape[0] - 1],
+                               dtype='float').reshape([1, 2])
+        rescale_mtx = (rescale_mtx -
+                       2.0 * maxRatioScale * random_scales.reshape([-1, 1]))
+        neg_kp *= rescale_mtx
+
+        # concatenate scale
+        neg_kp = np.concatenate([neg_kp, random_scales], axis=1)
+
+        # compute overlaps
+        x2 = neg_kp[:, 0]
+        y2 = neg_kp[:, 1]
+        r2 = neg_kp[:, 2] * ratioDescSupport
+        # reshape x2, y2, r2 so that we do all the combinations
+        x2.shape = [-1, 1]
+        y2.shape = [-1, 1]
+        r2.shape = [-1, 1]
+        x1 = pos_kp[:, 0]
+        y1 = pos_kp[:, 1]
+        r1 = pos_kp[:, 2] * ratioDescSupport
+        # reshape x1, y1, r1 so that we do all the combinations
+        x1.shape = [1, -1]
+        y1.shape = [1, -1]
+        r1.shape = [1, -1]
+        max_overlap_with_pos = np.max(
+            getIoUOfRectangles(x1, y1, r1, x2, y2, r2),
+            axis=1
+        )
+        bCheckNeg = False
+        if all_neg_kp is not None:
+            bCheckNeg = len(all_neg_kp) > 0
+        if bCheckNeg:
+            # overlap with previously mined negatives
+            x1 = all_neg_kp[:, 0]
+            y1 = all_neg_kp[:, 1]
+            r1 = all_neg_kp[:, 2] * ratioDescSupport
+            # reshape x1, y1, r1 so that we do all the combinations
+            x1.shape = [1, -1]
+            y1.shape = [1, -1]
+            r1.shape = [1, -1]
+            max_overlap_with_neg = np.max(
+                getIoUOfRectangles(x1, y1, r1, x2, y2, r2),
+                axis=1
+            )
+            # now get overlap in terms of ratio
+            r2.shape = [-1]
+            max_overlap = np.maximum(max_overlap_with_pos,
+                                     max_overlap_with_neg)
+        else:
+            r2.shape = [-1]
+            max_overlap = max_overlap_with_pos
+
+        # remove too close keypoints
+        idx_far_enough = max_overlap <= neg_overlap_th
+        neg_kp = neg_kp[idx_far_enough]
+
+        # random shuffle and take neg_2_mine
+        idx_shuffle = np.random.permutation(len(neg_kp))
+        neg_kp = neg_kp[idx_shuffle[:min(neg_2_mine, len(idx_shuffle))]]
+
+        # concatenate dummy values
+        neg_kp = np.concatenate([neg_kp,
+                                 np.zeros((len(neg_kp), 1)),
+                                 -1.0 * np.ones((len(neg_kp), 1)),
+                                 np.zeros((len(neg_kp), 1))], axis=1)
+
+        # concatenate to the final list
+        if all_neg_kp is None:
+            all_neg_kp = neg_kp
+        else:
+            all_neg_kp = np.concatenate([all_neg_kp, neg_kp])
+
+        # update neg_2_mine
+        neg_2_mine = param.dataset.nNegPerImg - len(all_neg_kp)
+
+        # update number of iterations
+        num_iter += 1
+
+        # # if num_iter == 1 and len(all_neg_kp) == 0:
+        # import pdb
+        # pdb.set_trace()
+
+        if num_iter > max_iter:
+            print('\nRan {} iterations, but could not mine {} on this image'
+                  ''.format(num_iter, neg_2_mine))
+            break
+
+        neg_kp = all_neg_kp[:param.dataset.nNegPerImg]
+
+    return neg_kp
+
+
+def get_list_of_img(train_data_dir, dump_data_dir, param, mode):
+
+    # Check if split file exists
+    split_prefix = train_data_dir + 'split-'
+    split_prefix += str(param.dataset.nTrainPercent) + '-'
+    split_prefix += str(param.dataset.nValidPercent) + '-'
+    split_prefix += str(param.dataset.nTestPercent) + '-'
+
+    # If it does not exist, create one
+    if not os.path.exists(split_prefix + mode + '.txt'):
+
+        # Read list of images
+        list_png_file = []
+        for files in os.listdir(train_data_dir):
+            if files.endswith(".png"):
+                list_png_file = list_png_file + [files]
+
+        # Shuffle the image list
+        if not os.path.exists(dump_data_dir + 'permute_png_idx.h5'):
+            print(' -- ' + mode + ': '
+                  'Creating new shuffle for reading images')
+            permute_png_idx = np.random.permutation(len(list_png_file))
+            to_save = {"saveval": permute_png_idx}
+            saveh5(to_save,
+                   dump_data_dir + 'permute_png_idx.h5')
+            # dt.save(permute_png_idx,
+            #         dump_data_dir + 'permute_png_idx.h5')
+        else:
+            print(' -- ' + mode + ': '
+                  'Loading shuffle for reading images from '
+                  '{}'.format(dump_data_dir))
+            to_load = loadh5(dump_data_dir +
+                             'permute_png_idx.h5')
+            permute_png_idx = to_load["saveval"]
+            # permute_png_idx = dt.load(dump_data_dir +
+            #                           'permute_png_idx.h5')
+
+        list_png_file = [list_png_file[permute_png_idx[idx]] for idx in
+                         range(len(list_png_file))]
+
+        # Write to file (all three)
+        f_train = open(split_prefix + 'train.txt', 'w')
+        f_valid = open(split_prefix + 'valid.txt', 'w')
+        f_test = open(split_prefix + 'test.txt', 'w')
+
+        train_end = int(float(param.dataset.nTrainPercent) / 100.0 *
+                        len(list_png_file))
+        valid_end = int(float(param.dataset.nTrainPercent +
+                              param.dataset.nValidPercent) / 100.0 *
+                        len(list_png_file))
+
+        for idx_png in six.moves.xrange(len(list_png_file)):
+            if idx_png > valid_end:
+                print(list_png_file[idx_png], file=f_test)
+            elif idx_png > train_end:
+                print(list_png_file[idx_png], file=f_valid)
+            else:
+                print(list_png_file[idx_png], file=f_train)
+
+        f_train.close()
+        f_valid.close()
+        f_test.close()
+
+    # Read the list
+    list_png_file = list(np.loadtxt(split_prefix + mode + '.txt', dtype='str'))
+
+    return list_png_file
+
+
+def get_scale_hist(train_data_dir, param):
+
+    # Check if scale histogram file exists
+    hist_file_name = train_data_dir + 'scales-histogram-minsc-' \
+        + str(param.dataset.fMinKpSize) + '.h5'
+    if not os.path.exists(hist_file_name):
+
+        # read all positive keypoint scales
+        list_png_file = []
+        for files in os.listdir(train_data_dir):
+            if files.endswith(".png"):
+                list_png_file = list_png_file + [files]
+        all_scales = []
+        for png_file in list_png_file:
+            kp_file_name = train_data_dir + png_file.replace('.png', '_P.mat')
+            cur_pos_kp = scipy.io.loadmat(kp_file_name)['TFeatures']
+            cur_pos_kp = np.asarray(cur_pos_kp, dtype='float')
+            all_scales += [cur_pos_kp[5, :].flatten()]
+        all_scales = np.concatenate(all_scales)
+
+        # make histogram
+        hist, bin_edges = np.histogram(all_scales, bins=100)
+        hist_c = (bin_edges[1:] + bin_edges[:-1]) * 0.5
+
+        # save to h5 file
+        with h5py.File(hist_file_name, 'w') as hist_file:
+            hist_file['all_scales'] = all_scales
+            hist_file['histogram_bins'] = hist
+            hist_file['histogram_centers'] = hist_c
+
+    # Load from the histogram file
+    with h5py.File(hist_file_name, 'r') as hist_file:
+        scale_hist = np.asarray(hist_file['histogram_bins'],
+                                dtype=float).flatten()
+        scale_hist /= np.sum(scale_hist)
+        scale_hist_c = np.asarray(hist_file['histogram_centers']).flatten()
+
+    return scale_hist, scale_hist_c
+
+#
+# helper.py ends here
diff --git a/datasets/eccv2016/math_tools.py b/datasets/eccv2016/math_tools.py
new file mode 100644
index 0000000..d37bb65
--- /dev/null
+++ b/datasets/eccv2016/math_tools.py
@@ -0,0 +1,359 @@
+# math_tools.py ---
+#
+# Filename: math_tools.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Fri Feb 19 18:04:58 2016 (+0100)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+import numpy as np
+import six
+
+
+# ------------------------------------------------------------------------------
+# Math functions
+def softmax(val, axis, softmax_strength):
+    ''' Soft max function used for cost function '''
+
+    import theano
+    import theano.tensor as T
+    floatX = theano.config.floatX
+    softmax_strength = np.cast[floatX](softmax_strength)
+
+    if softmax_strength < 0:
+        res_after_max = T.max(val, axis=axis)
+    else:
+        res_after_max = np.cast[floatX](1.0) / softmax_strength \
+            * T.log(T.mean(T.exp(
+                softmax_strength * (val - T.max(val,
+                                                axis=axis,
+                                                keepdims=True))
+            ), axis=axis)) \
+            + T.max(val, axis=axis)
+    return res_after_max
+
+
+def softargmax(val, axis, softargmax_strength):
+    ''' Soft argmax function used for cost function '''
+
+    import theano
+    import theano.tensor as T
+    floatX = theano.config.floatX
+
+    # The implmentation only works for single axis
+    assert isinstance(axis, int)
+
+    if softargmax_strength < 0:
+        res = T.argmax(val, axis=axis)
+    else:
+        safe_exp = T.exp(softargmax_strength * (
+            val - T.max(val, axis=axis, keepdims=True)))
+        prob = safe_exp / T.sum(safe_exp, axis=axis, keepdims=True)
+        ind = T.arange(val.shape[axis], dtype=floatX)
+        res = T.sum(prob * ind, axis=axis) / T.sum(prob, axis=axis)
+
+    return res
+
+
+def batch_inc_mean(data, batch_size, axis=None):
+    """Computes standard deviation of the data using batches.
+
+    Parameters
+    ----------
+    data: np.ndarray or h5py.Dataset
+        Data to compute the standar deviation for.
+
+    batch_size: int
+        Size of the working batch.
+
+    axis: None or tuple (optional)
+        Axis in which we should take the standar deviation on
+
+    Returns
+    -------
+    mean: np.ndarray or float
+        Mean. ndarray if axis is not None, float otherwise.
+
+    """
+
+    if axis is not None:
+        raise NotImplementedError('Working on arbitrary axis'
+                                  ' is not yet implemented')
+
+    else:
+        numel = np.prod(data.shape)
+        dim1_batch_size = int(np.floor(batch_size / np.prod(data.shape[1:])))
+        num_batch = int(np.ceil(float(data.shape[0]) / float(dim1_batch_size)))
+
+        mean = 0.0
+        for idx_batch in six.moves.xrange(num_batch):
+            idx_s = idx_batch * dim1_batch_size
+            idx_e = min((idx_batch + 1) * dim1_batch_size, data.shape[0])
+            x = data[idx_s:idx_e].flatten()
+            mean += float(np.sum(x))
+
+        mean = mean / float(numel)
+
+    return mean
+
+
+def batch_inc_std(data, mean, batch_size, axis=None):
+    """Computes standard deviation of the data using batches.
+
+    Parameters
+    ----------
+    data: np.ndarray or h5py.Dataset
+        Data to compute the standar deviation for.
+
+    mean: np.ndarray
+        Precomputed mean of the data.
+
+    batch_size: int
+        Size of the working batch.
+
+    axis: None or tuple (optional)
+        Axis in which we should take the standar deviation on
+
+    Returns
+    -------
+    std: np.ndarray or float
+        Standar deviation. ndarray if axis is not None, float otherwise.
+
+    """
+
+    if axis is not None:
+        raise NotImplementedError('Working on arbitrary axis'
+                                  ' is not yet implemented')
+
+    else:
+        numel = np.prod(data.shape)
+        dim1_batch_size = int(np.floor(batch_size / np.prod(data.shape[1:])))
+        num_batch = int(np.ceil(float(data.shape[0]) / float(dim1_batch_size)))
+
+        M2 = 0.0
+        for idx_batch in six.moves.xrange(num_batch):
+            idx_s = idx_batch * dim1_batch_size
+            idx_e = min((idx_batch + 1) * dim1_batch_size, data.shape[0])
+            x = data[idx_s:idx_e].flatten()
+            M2 += float(np.sum((x - mean)**2))
+
+        std = np.sqrt(M2 / float(numel - 1))
+
+    return std
+
+
+# ------------------------------------------------------------------------------
+# Circle related functions
+def getIntersectionOfCircles(x1, y1, r1, x2, y2, r2):
+
+    # see http://mathworld.wolfram.com/Circle-CircleIntersection.html
+
+    d = np.sqrt((x1 - x2)**2 + (y1 - y2)**2)
+
+    # if d >= r1 + r2:
+    #     return 0
+
+    intersection = np.zeros_like(d)
+    valid = np.where(d >= r1 + r2)[0]
+
+    d = d[valid]
+    r1 = r1[valid]
+    r2 = r2[valid]
+
+    intersection[valid] = (
+        r2**2 * np.arccos((d**2 + r2**2 - r1**2) / (2.0 * d * r2)) +
+        r1**2 * np.arccos((d**2 + r1**2 - r2**2) / (2.0 * d * r1)) -
+        0.5 * np.sqrt(
+            (-d + r2 + r1) * (d + r2 - r1) * (d - r2 + r1) * (d + r2 + r1)
+        )
+    )
+
+    return intersection
+
+
+def getInterNUnionOfCircles(x1, y1, r1, x2, y2, r2):
+
+    intersection = getIntersectionOfCircles(x1, y1, r1, x2, y2, r2)
+    union = np.pi * r1**2 + np.pi * r2**2 - intersection
+
+    return intersection, union
+
+
+# ------------------------------------------------------------------------------
+# Rectangle related functions
+def getIntersectionOfRectangles(x1, y1, r1, x2, y2, r2):
+    """Computes Intersection of Rectangles
+
+    Parameters
+    ----------
+    x1: ndarray or tensor
+        x coordinates of the first rectangle.
+
+    y1: ndarray or tensor
+        y coordinates of the first rectangle.
+
+    r1: ndarray or tensor
+        Half width of the first rectangle (just to be consistent with
+        circle impl).
+
+    x2: ndarray or tensor
+        x coordinates of the second rectangle.
+
+    y2: ndarray or tensor
+        y coordinates of the second rectangle.
+
+    r2: ndarray or tensor
+        Half width of the second rectangle (just to be consistent with
+        circle impl).
+    """
+
+    # use theano or numpy depending on the variable
+    if isinstance(x1, np.ndarray):
+        # Checking if ndarray will prevent us from loading theano
+        # unnecessarily.
+        cm = np
+    else:
+        # import theano
+        import theano.tensor as T
+
+        cm = T
+
+    # intersection computation
+    inter_w = cm.minimum(x1 + r1, x2 + r2) - cm.maximum(x1 - r1, x2 - r2)
+    inter_w = cm.maximum(0, inter_w)
+    inter_h = cm.minimum(y1 + r1, y2 + r2) - cm.maximum(y1 - r1, y2 - r2)
+    inter_h = cm.maximum(0, inter_h)
+
+    return inter_w * inter_h
+
+
+def getIoUOfRectangles(x1, y1, r1, x2, y2, r2):
+    """Computes Intersection over Union of Rectangles
+
+    Parameters
+    ----------
+    x1: ndarray or tensor
+        x coordinates of the first rectangle.
+
+    y1: ndarray or tensor
+        y coordinates of the first rectangle.
+
+    r1: ndarray or tensor
+        Half width of the first rectangle (just to be consistent with
+        circle impl).
+
+    x2: ndarray or tensor
+        x coordinates of the second rectangle.
+
+    y2: ndarray or tensor
+        y coordinates of the second rectangle.
+
+    r2: ndarray or tensor
+        Half width of the second rectangle (just to be consistent with
+        circle impl).
+    """
+
+    # compute intersection and union
+    intersection = getIntersectionOfRectangles(x1, y1, r1, x2, y2, r2)
+    union = (2.0 * r1)**2.0 + (2.0 * r2)**2.0 - intersection
+
+    return intersection / union
+
+
+# -----------------------------------------------------------------------------
+# For normalization
+def _subtractive_norm_make_coef(norm_kernel, input_hw):
+    """Creates the coef matrix for accounting borders when applying
+    subtractive normalization.
+
+    Parameters
+    ----------
+    norm_kernel : np.ndarray, 2d
+        Normalized kernel applied for subtractive normalization.
+
+    input_hw : tuple
+        Height and width of the input image (patch) for the
+        SubtractiveNormalizationLayer.
+
+    """
+
+    assert np.isclose(np.sum(norm_kernel), 1.0)
+
+    # This allows our mean computation to compensate for the border area,
+    # where you have less terms adding up. Torch used convolution with a
+    # ``one'' image, but since we do not want the library to depend on
+    # other libraries with convolutions, we do it manually here.
+    coef = np.ones(input_hw, dtype='float32')
+    pad_x = norm_kernel.shape[1] // 2
+    pad_y = norm_kernel.shape[0] // 2
+
+    # Corners
+    # for the top-left corner
+    tl_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::-1, ::-1], axis=0), axis=1)[::1, ::1]
+    coef[:pad_y + 1, :pad_x + 1] = tl_cumsum_coef[pad_y:, pad_x:]
+    # for the top-right corner
+    tr_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::-1, ::1], axis=0), axis=1)[::1, ::-1]
+    coef[:pad_y + 1, -pad_x - 1:] = tr_cumsum_coef[pad_y:, :pad_x + 1]
+    # for the bottom-left corner
+    bl_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::1, ::-1], axis=0), axis=1)[::-1, ::1]
+    coef[-pad_y - 1:, :pad_x + 1] = bl_cumsum_coef[:pad_y + 1, pad_x:]
+    # for the bottom-right corner
+    br_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::1, ::1], axis=0), axis=1)[::-1, ::-1]
+    coef[-pad_y - 1:, -pad_x - 1:] = br_cumsum_coef[:pad_y + 1, :pad_x + 1]
+
+    # Sides
+    tb_slice = slice(pad_y + 1, -pad_y - 1)
+    # for the left side
+    fill_value = tl_cumsum_coef[-1, pad_x:]
+    coef[tb_slice, :pad_x + 1] = fill_value.reshape([1, -1])
+    # for the right side
+    fill_value = br_cumsum_coef[0, :pad_x + 1]
+    coef[tb_slice, -pad_x - 1:] = fill_value.reshape([1, -1])
+    lr_slice = slice(pad_x + 1, -pad_x - 1)
+    # for the top side
+    fill_value = tl_cumsum_coef[pad_y:, -1]
+    coef[:pad_y + 1, lr_slice] = fill_value.reshape([-1, 1])
+    # for the right side
+    fill_value = br_cumsum_coef[:pad_y + 1, 0]
+    coef[-pad_y - 1:, lr_slice] = fill_value.reshape([-1, 1])
+
+    # # code for validation of above
+    # img = np.ones_like(input, dtype='float32')
+    # import cv2
+    # coef_cv2 = cv2.filter2D(img, -1, norm_kernel,
+    #                         borderType=cv2.BORDER_CONSTANT)
+
+    return coef
+
+_lcn_make_coef = _subtractive_norm_make_coef
+
+
+#
+# math_tools.py ends here
diff --git a/datasets/eccv2016/wrapper.py b/datasets/eccv2016/wrapper.py
new file mode 100644
index 0000000..a003211
--- /dev/null
+++ b/datasets/eccv2016/wrapper.py
@@ -0,0 +1,127 @@
+# wrapper.py ---
+#
+# Filename: wrapper.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jun 29 14:55:21 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import numpy as np
+
+import datasets.eccv2016.eccv as old_impl
+from datasets.eccv2016.custom_types import paramStruct
+from utils import get_patch_size, get_patch_size_no_aug, get_ratio_scale
+
+
+def config_to_param(config):
+    """The function that takes care of the transfer to the new framework"""
+
+    param = paramStruct()
+
+    # Param Group "dataset"
+    param.dataset.nTestPercent = int(20)
+    param.dataset.dataType = "ECCV"
+    param.dataset.nValidPercent = int(20)
+    param.dataset.fMinKpSize = float(2.0)
+    param.dataset.nPosPerImg = int(-1)
+    # Note that we are passing a list. This module actually supports
+    # concatenating datsets.
+    param.dataset.trainSetList = ["ECCV/" + config.data_name]
+    param.dataset.nNegPerImg = int(1000)
+    param.dataset.nTrainPercent = int(60)
+
+    # Param Group "patch"
+    if config.old_data_compat:
+        param.patch.nPatchSize = int(get_patch_size(config))
+    else:
+        param.patch.nPatchSize = int(get_patch_size_no_aug(config))
+        param.patch.nPatchSizeAug = int(get_patch_size(config))
+    param.patch.noscale = False
+    param.patch.fNegOverlapTh = float(0.1)
+    param.patch.sNegMineMethod = "use_all_SIFT_points"
+    param.patch.fRatioScale = float(get_ratio_scale(config))
+    param.patch.fPerturbInfo = np.array([0.2, 0.2, 0.0]).astype(float)
+    if config.old_data_compat:
+        param.patch.nMaxRandomNegMineIter = int(500)
+    else:
+        param.patch.nMaxRandomNegMineIter = int(100)
+    param.patch.fMaxScale = 1.0
+    param.patch.bPerturb = 1.0
+
+    # Param Group "model"
+    param.model.nDescInputSize = int(config.desc_input_size)
+
+    # override folders from config
+    setattr(param, "data_dir", config.data_dir)
+    setattr(param, "temp_dir", config.temp_dir)
+    setattr(param, "scratch_dir", config.scratch_dir)
+
+    return param
+
+
+class Wrapper(object):
+    """The wrapper class for eccv data.
+
+    Since we want to re-use the code for loading and storing data, but do *not*
+    want to bother integrating it into the new framework, we simply create a
+    wrapper on top, which translates the old format to the new one. This is
+    likely to cause some unecessary overhead, but is not so critical right now.
+
+    The role of this class is to:
+
+    * Create a dictionary that allows easy access to raw data
+
+    """
+
+    def __init__(self, config, rng):
+
+        # Placeholder for the data dictionary
+        self.data = {}
+
+        # Use the old  data module to load data. Processing data loading for
+        # differen tasks
+        for task in ["train", "valid", "test"]:
+            param = config_to_param(config)
+            old_data = old_impl.data_obj(param, task)
+            # Some sanity check to make sure that the data module is behaving
+            # as intended.
+            assert old_data.patch_height == old_data.patch_width
+            assert old_data.patch_height == get_patch_size(config)
+            assert old_data.num_channel == config.nchannel
+            assert old_data.out_dim == config.nchannel
+            self.data[task] = {
+                "patch": old_data.x,
+                "xyz": old_data.pos,
+                "angle": old_data.angle.reshape(-1, 1),
+                "ID": old_data.ID,
+            }
+
+        # data ordering of this class
+        self.data_order = "NCHW"
+
+        # Save the hash, for the pairs folder
+        self.hash = old_data.hash
+#
+# wrapper.py ends here
diff --git a/datasets/lift.py b/datasets/lift.py
new file mode 100644
index 0000000..611099f
--- /dev/null
+++ b/datasets/lift.py
@@ -0,0 +1,334 @@
+# lift.py ---
+#
+# Filename: lift.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Wed Jul  5 19:29:17 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+from __future__ import print_function
+
+import importlib
+import os
+import sys
+
+import numpy as np
+import h5py
+
+from six.moves import xrange
+from utils import loadh5, saveh5
+
+
+class Dataset(object):
+    """The Dataset Module
+
+    LATER: For validation data, next_batch should also consider the pos-to-neg
+           ratio. For now, we just weight them
+
+    LATER: Smarter cache system
+    LATER: Optimize LUT generation
+
+    """
+
+    def __init__(self, config, rng):
+        self.config = config
+        self.rng = rng
+
+        # Hard-code the 3D distance threshold
+        # 3D points can be too close to be non-matches
+        if self.config.data_type == "chalmers":
+            if self.config.data_name == "oxford":
+                self.th_dist = 2e-3
+            else:
+                raise RuntimeError(
+                    "Unknown data_name: '{}'".format(
+                        self.config.data_name))
+
+        # Look Up Tables
+        self.LUT_kp = {}
+        self.LUT_nonkp = {}
+        if hasattr(self, "th_dist"):
+            self.LUT_below_th = {}
+
+        # Initialize the appropriate data module
+        self.data_module = importlib.import_module(
+            "datasets.{}.wrapper".format(self.config.data_type))
+
+        # Initialize the wrapper class. This should also make the data
+        # available on demand. For now, we just get the handles to the data
+        # stored in h5 files since it's too large
+        self.data_wrapper = self.data_module.Wrapper(config, rng)
+        # Alias data for easy access
+        self.data = self.data_wrapper.data
+
+        # Overwrite pairs directory with dataset name
+        self.pair_dir = '{}/{}/{}/'.format(
+            config.pair_dir, config.data_type, config.data_name)
+        # And with the hash
+        self.pair_dir = '{}/{}'.format(
+            self.pair_dir, self.data_wrapper.hash)
+
+        # Pointers for each task
+        self.batch_start = {}
+        self.epoch = {}
+        self.ind = {}
+        self.pairs = {}
+        for task in self.data:
+            # Counters to indicate the position of the batch in epoch
+            self.batch_start[task] = 0
+            self.epoch[task] = 0
+
+            # Mark LUT to un-init
+            self.LUT_kp[task] = None
+            self.LUT_nonkp[task] = None
+            if hasattr(self, "th_dist"):
+                self.LUT_below_th[task] = None
+
+            # Create initial pairs
+            self.pairs[task] = self._create_pairs(
+                task, self.config.pair_use_cache)
+            # Create Initial random permutation
+            self.ind[task] = np.random.permutation(
+                len(self.pairs[task]["P1"]))
+
+    def next_batch(self, task, subtask, batch_size, aug_rot=0, num_empty=0):
+
+        # Dictionary to return
+        cur_data = {}
+
+        assert batch_size > 0
+
+        # Check if we can fill the data
+        if self.batch_start[task] + batch_size >= len(self.ind[task]):
+            # If we can't shuffle the indices, reset pointer, and increase
+            # epoch
+            self.ind[task] = np.random.permutation(
+                len(self.pairs[task]["P1"]))
+            self.batch_start[task] = 0
+            self.epoch[task] += 1
+            # Every N epochs, recreate pairs. Note that the number of pairs
+            # will NEVER change
+            if self.config.pair_interval > 0 and task == "train":
+                # Always active for oxford (small set)
+                if self.config.data_name is "oxford" or self.config.regen_pairs:
+                    if self.epoch[task] % self.config.pair_interval == 0:
+                        self.pairs[task] = self._create_pairs(
+                            task, self.config.pair_use_cache, overwrite=False)
+
+        # Pack the data according to pairs
+        for _type in ["patch", "xyz", "angle"]:
+            cur_data[_type] = {}
+            for _name in ["P1", "P2", "P3", "P4"]:
+                _pair = self.pairs[task][_name]
+                cur_data[_type][_name] = np.asarray([
+                    self.data[task][_type][_pair[
+                        self.ind[task][_i + self.batch_start[task]]
+                    ]] for _i in xrange(batch_size)
+                ])
+
+                # Work-around for partial batches: add empty samples
+                if num_empty > 0:
+                    cur_data[_type][_name].resize(
+                        (cur_data[_type][_name].shape[0] + num_empty,) +
+                        cur_data[_type][_name].shape[1:])
+
+                # If it's the patch, make it into NHWC format
+                if _type == "patch":
+                    if self.data_wrapper.data_order == "NCHW":
+                        cur_data[_type][_name] = np.array(
+                            np.transpose(cur_data[_type][_name],
+                                         (0, 2, 3, 1))
+                        )
+
+        # Augment rotations: -1 (zeros), 0 (no aug ST, do nothing), 1 (random)
+        # Allows us to separate train/val (not doing it for now)
+        if aug_rot > 0:
+            cur_data["aug_rot"] = {}
+            for _name in ["P1", "P2", "P3", "P4"]:
+                rot = 2 * np.pi * np.random.rand(batch_size)
+                cur_data["aug_rot"][_name] = {}
+                cur_data["aug_rot"][_name]["angle"] = rot
+                cur_data["aug_rot"][_name]["cs"] = np.concatenate(
+                    (np.cos(rot)[..., None], np.sin(rot)[..., None]),
+                    axis=1)
+        elif aug_rot < 0:
+            cur_data["aug_rot"] = {}
+            for _name in ["P1", "P2", "P3", "P4"]:
+                rot = np.zeros(batch_size)
+                cur_data["aug_rot"][_name] = {}
+                cur_data["aug_rot"][_name]["angle"] = rot
+                cur_data["aug_rot"][_name]["cs"] = np.concatenate(
+                    (np.cos(rot)[..., None], np.sin(rot)[..., None]),
+                    axis=1)
+
+        # Update the batch start location
+        self.batch_start[task] += batch_size
+
+        return cur_data
+
+    def _create_pairs(self, task, use_cache=True, overwrite=False):
+        """Generate the pairs from ID
+
+        This function should return the pair indices given the task. The return
+        type is expected to be a dictionary, where you can access by doing
+        res["P1"], for example.
+
+        """
+
+        # Use the cache file if asked
+        if use_cache:
+            pairs_file = os.path.join(
+                self.pair_dir, "{}.h5".format(task))
+            if not os.path.exists(self.pair_dir):
+                os.makedirs(self.pair_dir)
+            if os.path.exists(pairs_file) and not overwrite:
+                if self.config.use_augmented_set:
+                    # Add rotation augmentation if it does not exist (compat)
+                    _f = h5py.File(pairs_file, "r+")
+                    for name in ["P1", "P2", "P3", "P4"]:
+                        if (name + "_rot_aug") not in _f:
+                            _f.create_dataset(
+                                name + "_rot_aug", data=2 *
+                                np.pi * self.rng.rand(_f[name].size))
+                    _f.close()
+                return loadh5(pairs_file)
+
+        # Make a lookup table for getting the indices of each sample. This can
+        # be easily done by sorting by ID, and storing the indices to a
+        # dictionary
+        if self.LUT_kp[task] is None:
+            # Create empty dictionary
+            self.LUT_kp[task] = {}
+            self.LUT_nonkp[task] = {}
+            if hasattr(self, "th_dist"):
+                self.LUT_below_th[task] = {}
+                # Build list of indices
+                dist_idx_reverse = np.zeros(
+                    self.data[task]["ID"].max() + 1,
+                    dtype=np.int64)
+                dist_idx_reverse[self.data[task]["dist_idx"]] = range(
+                    1, self.data[task]["dist_idx"].size + 1)
+                dist_idx_reverse -= 1
+            # Argsort the ID
+            print("[{}] sorting IDs...".format(task))
+            ID = self.data[task]["ID"]
+            ind = np.argsort(ID)
+            # Go through them one by one -- the easy way
+            print("[{}] building LUT...".format(task))
+            start = 0
+            start_ID = ID[ind[start]]
+            for end in xrange(1, len(ind)):
+                if end % 100 == 0:
+                    print("\r --- working on {}/{}".format(
+                        end + 1, len(ind)), end="")
+                    sys.stdout.flush()
+
+                # Check if indices have changed or we reached the end
+                if start_ID != ID[ind[end]] or end == len(ind) - 1:
+                    # Store them in proper LUT
+                    if start_ID < 0:
+                        self.LUT_nonkp[task][-1] = ind[start:end]
+                    else:
+                        self.LUT_kp[task][start_ID] = ind[start:end]
+                        if hasattr(self, "th_dist"):
+                            v = dist_idx_reverse[start_ID]
+                            assert v >= 0, "Invalid index"
+                            d = self.data[task]["dist_mat"][v]
+                            # 3D points are 1-indexed
+                            self.LUT_below_th[task][start_ID] = 1 + \
+                                np.where((d > 0) & (d < self.th_dist))[0]
+                    # Update start position
+                    start = end
+                    start_ID = ID[ind[start]]
+            print("\r --- done.                              ")
+
+        # The dictionary to return
+        cur_pairs = {}
+        for name in ["P1", "P2", "P3", "P4"]:
+            cur_pairs[name] = []
+
+        # For each kp item, create a random pair
+        #
+        # Note that this is different from what we used to do. This way seems
+        # better as we will look at each keypoint once.
+        print("[{}] creating pairs...".format(task))
+        num_kp = len(self.LUT_kp[task])
+        for i in xrange(num_kp):
+            if i % 100 == 0:
+                print("\r --- working on {}/{}".format(
+                    i + 1, num_kp), end="")
+                sys.stdout.flush()
+            _kp = list(self.LUT_kp[task].keys())[i]
+
+            # Check if we have enough views of this point and skip
+            if len(self.LUT_kp[task][_kp]) < 2:
+                continue
+
+            # For P1 and P2 -- Select two random patches
+            P1, P2 = self.rng.choice(
+                self.LUT_kp[task][_kp], 2, replace=False)
+
+            # For P3 -- Select a different keypoint randomly
+            # Generate a list of keys
+            valid_keys = set(self.LUT_kp[task])
+            # Remove self
+            valid_keys -= set([_kp])
+            # Remove points below the distance threshold
+            if hasattr(self, "th_dist"):
+                valid_keys -= set(valid_keys.intersection(
+                    self.LUT_below_th[task][_kp]))
+
+            _kp2 = self.rng.choice(list(valid_keys))
+            P3 = self.rng.choice(self.LUT_kp[task][_kp2], 1)[0]
+
+            # For P4 -- Select a random nonkp
+            P4 = self.rng.choice(self.LUT_nonkp[task][-1], 1)[0]
+
+            # Append to the list
+            for name in ["P1", "P2", "P3", "P4"]:
+                cur_pairs[name].append(eval(name))
+        print("\r --- done.                              ")
+
+        # Convert them to np arrays
+        for name in ["P1", "P2", "P3", "P4"]:
+            cur_pairs[name] = np.asarray(cur_pairs[name])
+
+        # Augment pairs with rotation data
+        if self.config.use_augmented_set:
+            for name in ["P1", "P2", "P3", "P4"]:
+                cur_pairs[name + "_rot_aug"] = 2 * np.pi * \
+                        self.rng.rand(cur_pairs[name].size)
+
+        # Save to cache if asked
+        if use_cache:
+            if not os.path.exists(self.pair_dir):
+                os.makedirs(self.pair_dir)
+            saveh5(cur_pairs, pairs_file)
+
+        return cur_pairs
+
+#
+# lift.py ends here
diff --git a/datasets/test.py b/datasets/test.py
new file mode 100644
index 0000000..defafdc
--- /dev/null
+++ b/datasets/test.py
@@ -0,0 +1,150 @@
+# test.py ---
+#
+# Filename: test.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 13:43:44 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import os
+import time
+from copy import deepcopy
+
+import cv2
+import numpy as np
+
+from datasets.eccv2016.helper import load_patches
+from utils import IDX_ANGLE, get_patch_size, get_ratio_scale, loadKpListFromTxt
+
+
+class Dataset(object):
+    """The Dataset Module
+
+
+    """
+
+    def __init__(self, config, rng):
+        self.config = config
+        self.rng = rng
+
+    def load_image(self):
+        """ Returns the image to work with """
+
+        image_file_name = self.config.test_img_file
+
+        # ------------------------------------------------------------------------
+        # Run learned network
+        start_time = time.clock()
+        # resize_scale = 1.0
+        # If there is not gray image, load the color one and convert to gray
+        # read the image
+        if os.path.exists(image_file_name.replace(
+                "image_color", "image_gray"
+        )) and "image_color" in image_file_name:
+            image_gray = cv2.imread(image_file_name.replace(
+                "image_color", "image_gray"
+            ), 0)
+            image_color = deepcopy(image_gray)
+            image_resized = image_gray
+            # ratio_h = float(
+            #     image_resized.shape[0]) / float(image_gray.shape[0])
+            # ratio_w = float(
+            #     image_resized.shape[1]) / float(image_gray.shape[1])
+        else:
+            # read the image
+            image_color = cv2.imread(image_file_name)
+            image_resized = image_color
+            # ratio_h = float(
+            #     image_resized.shape[0]) / float(image_color.shape[0])
+            # ratio_w = float(
+            #     image_resized.shape[1]) / float(image_color.shape[1])
+            image_gray = cv2.cvtColor(
+                image_resized,
+                cv2.COLOR_BGR2GRAY).astype("float32")
+
+        assert len(image_gray.shape) == 2
+
+        end_time = time.clock()
+        load_prep_time = (end_time - start_time) * 1000.0
+        print("Time taken to read and prepare the image is {} ms".format(
+            load_prep_time
+        ))
+
+        return image_color, image_gray, load_prep_time
+
+    def load_data(self):
+        """Returns the patch, given the keypoint structure
+
+        LATER: Cleanup. We currently re-use the utils we had from data
+               extraction.
+
+        """
+
+        # Load image
+        img = cv2.imread(self.config.test_img_file)
+        # If color image, turn it to gray
+        if len(img.shape) == 3:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        in_dim = 1
+
+        # Load keypoints
+        kp = np.asarray(loadKpListFromTxt(self.config.test_kp_file))
+
+        # Use load patches function
+        # Assign dummy values to y, ID, angle
+        y = np.zeros((len(kp),))
+        ID = np.zeros((len(kp),), dtype='int64')
+        # angle = np.zeros((len(kp),))
+        angle = np.pi / 180.0 * kp[:, IDX_ANGLE]  # store angle in radians
+
+        # load patches with id (drop out of boundary)
+        bPerturb = False
+        fPerturbInfo = np.zeros((3,))
+        dataset = load_patches(img, kp, y, ID, angle,
+                               get_ratio_scale(self.config), 1.0,
+                               int(get_patch_size(self.config)),
+                               int(self.config.desc_input_size), in_dim,
+                               bPerturb, fPerturbInfo, bReturnCoords=True)
+
+        # Change old dataset return structure to necessary data
+        x = dataset[0]
+        # y = dataset[1]
+        # ID = dataset[2]
+        pos = dataset[3]
+        angle = dataset[4]
+        coords = dataset[5]
+
+        # Return the dictionary structure
+        cur_data = {}
+        cur_data["patch"] = np.transpose(x, (0, 2, 3, 1)) # In NHWC
+        cur_data["kps"] = coords
+        cur_data["xyz"] = pos
+        # Make sure that angle is a Nx1 vector
+        cur_data["angle"] = np.reshape(angle, (-1, 1))
+
+        return cur_data
+
+#
+# test.py ends here
diff --git a/layers.py b/layers.py
new file mode 100644
index 0000000..bd95b40
--- /dev/null
+++ b/layers.py
@@ -0,0 +1,316 @@
+# layers.py ---
+#
+# Filename: layers.py
+# Description: Special layers not included in tensorflow
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Thu Jun 29 12:23:35 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.contrib.layers as tcl
+
+from utils import get_tensor_shape, get_W_b_conv2d, get_W_b_fc
+
+
+def leaky_relu(x, alpha=0.2):
+    return tf.maximum(tf.minimum(0.0, alpha * x), x)
+
+
+def relu(x):
+    return tf.nn.relu(x)
+
+
+def batch_norm(x, training, data_format="NHWC"):
+    # return tcl.batch_norm(x, center=True, scale=True, is_training=training, data_format=data_format)
+    if data_format == "NHWC":
+        axis = -1
+    else:
+        axis = 1
+    return tf.layers.batch_normalization(x, training=training, trainable=False, axis=axis)
+
+
+def norm_spatial_subtractive(inputs, sub_kernel, data_format="NHWC"):
+    """Performs the spatial subtractive normalization
+
+    Parameters
+    ----------
+
+    inputs: tensorflow 4D tensor, NHWC format
+
+    input to the network
+
+    sub_kernel: numpy.ndarray, 2D matrix
+
+    the subtractive normalization kernel
+
+    """
+
+    raise NotImplementedError(
+        "This function is buggy! don't use before extensive debugging!")
+
+    # ----------
+    # Normalize kernel.
+    # Note that unlike Torch, we don't divide the kernel here. We divide
+    # when it is fed to the convolution, since we use it to generate the
+    # coefficient map.
+    kernel = sub_kernel.astype("float32")
+    norm_kernel = (kernel / np.sum(kernel))
+
+    # ----------
+    # Compute the adjustment coef.
+    # This allows our mean computation to compensate for the border area,
+    # where you have less terms adding up. Torch used convolution with a
+    # ``one'' image, but since we do not want the library to depend on
+    # other libraries with convolutions, we do it manually here.
+    input_shape = get_tensor_shape(inputs)
+    assert len(input_shape) == 4
+    if data_format == "NHWC":
+        coef = np.ones(input_shape[1:3], dtype="float32")
+    else:
+        coef = np.ones(input_shape[2:], dtype="float32")
+    pad_x = norm_kernel.shape[1] // 2
+    pad_y = norm_kernel.shape[0] // 2
+
+    # Corners
+    # for the top-left corner
+    tl_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::-1, ::-1], axis=0), axis=1)[::1, ::1]
+    coef[:pad_y + 1, :pad_x + 1] = tl_cumsum_coef[pad_y:, pad_x:]
+    # for the top-right corner
+    tr_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::-1, ::1], axis=0), axis=1)[::1, ::-1]
+    coef[:pad_y + 1, -pad_x - 1:] = tr_cumsum_coef[pad_y:, :pad_x + 1]
+    # for the bottom-left corner
+    bl_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::1, ::-1], axis=0), axis=1)[::-1, ::1]
+    coef[-pad_y - 1:, :pad_x + 1] = bl_cumsum_coef[:pad_y + 1, pad_x:]
+    # for the bottom-right corner
+    br_cumsum_coef = np.cumsum(np.cumsum(
+        norm_kernel[::1, ::1], axis=0), axis=1)[::-1, ::-1]
+    coef[-pad_y - 1:, -pad_x - 1:] = br_cumsum_coef[:pad_y + 1, :pad_x + 1]
+
+    # Sides
+    tb_slice = slice(pad_y + 1, -pad_y - 1)
+    # for the left side
+    fill_value = tl_cumsum_coef[-1, pad_x:]
+    coef[tb_slice, :pad_x + 1] = fill_value.reshape([1, -1])
+    # for the right side
+    fill_value = br_cumsum_coef[0, :pad_x + 1]
+    coef[tb_slice, -pad_x - 1:] = fill_value.reshape([1, -1])
+    lr_slice = slice(pad_x + 1, -pad_x - 1)
+    # for the top side
+    fill_value = tl_cumsum_coef[pad_y:, -1]
+    coef[:pad_y + 1, lr_slice] = fill_value.reshape([-1, 1])
+    # for the right side
+    fill_value = br_cumsum_coef[:pad_y + 1, 0]
+    coef[-pad_y - 1:, lr_slice] = fill_value.reshape([-1, 1])
+
+    # # code for validation of above
+    # img = np.ones_like(input, dtype='float32')
+    # import cv2
+    # coef_cv2 = cv2.filter2D(img, -1, norm_kernel,
+    #                         borderType=cv2.BORDER_CONSTANT)
+
+    # ----------
+    # Extract convolutional mean
+    # Make filter a c01 filter by repeating. Note that we normalized above
+    # with the number of repetitions we are going to do.
+    if data_format == "NHWC":
+        norm_kernel = np.tile(norm_kernel, [input_shape[-1], 1, 1])
+    else:
+        norm_kernel = np.tile(norm_kernel, [input_shape[1], 1, 1])
+    # Re-normlize the kernel so that the sum is one.
+    norm_kernel /= np.sum(norm_kernel)
+    # add another axis in from to make oc01 filter, where o is the number
+    # of output dimensions (in our case, 1!)
+    norm_kernel = norm_kernel[np.newaxis, ...]
+    # # To pad with zeros, half the size of the kernel (only for 01 dims)
+    # border_mode = tuple(s // 2 for s in norm_kernel.shape[2:])
+    # Convolve the mean filter. Results in shape of (batch_size,
+    # input_shape[1], input_shape[2], 1).
+    # For tensorflow, the kernel shape is 01co, which is different.... why?!
+    conv_mean = tf.nn.conv2d(
+        inputs,
+        norm_kernel.astype("float32").transpose(2, 3, 1, 0),
+        strides=[1, 1, 1, 1],
+        padding="SAME",
+        data_format=data_format,
+    )
+
+    # ----------
+    # Adjust convolutional mean with precomputed coef
+    # This is to prevent border values being too small.
+    if data_format == "NHWC":
+        coef = coef[None][..., None].astype("float32")
+    else:
+        coef = coef[None, None].astype("float32")
+    adj_mean = conv_mean / coef
+    # # Make second dimension broadcastable as we are going to
+    # # subtract for all channels.
+    # adj_mean = T.addbroadcast(adj_mean, 1)
+
+    # ----------
+    # Subtract mean
+    sub_normalized = inputs - adj_mean
+
+    # # line for debugging
+    # test = theano.function(inputs=[input], outputs=[sub_normalized])
+
+    return sub_normalized
+
+
+def pool_l2(inputs, ksize, stride, padding, data_format="NHWC"):
+    """L2 pooling, NHWC"""
+
+    if data_format == "NHWC":
+        ksizes = [1, ksize, ksize, 1]
+        strides = [1, stride, stride, 1]
+    else:
+        ksizes = [1, 1, ksize, ksize]
+        strides = [1, 1, stride, stride]
+
+    scaler = tf.cast(ksize * ksize, tf.float32)
+
+    return tf.sqrt(
+        scaler *                # Multiply since we want to sum
+        tf.nn.avg_pool(
+            tf.square(inputs),
+            ksize=ksizes,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+        ))
+
+
+def pool_max(inputs, ksize, stride, padding, data_format="NHWC"):
+    """max pooling, NHWC"""
+
+    if data_format == "NHWC":
+        ksizes = [1, ksize, ksize, 1]
+        strides = [1, stride, stride, 1]
+    else:
+        ksizes = [1, 1, ksize, ksize]
+        strides = [1, 1, stride, stride]
+
+    return tf.nn.max_pool(
+        inputs,
+        ksize=ksizes,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+    )
+
+
+def conv_2d(inputs, ksize, nchannel, stride, padding, data_format="NHWC"):
+    """conv 2d, NHWC"""
+
+    if data_format == "NHWC":
+        fanin = get_tensor_shape(inputs)[-1]
+        strides = [1, stride, stride, 1]
+    else:
+        fanin = get_tensor_shape(inputs)[1]
+        strides = [1, 1, stride, stride]
+    W, b = get_W_b_conv2d(ksize=ksize, fanin=fanin, fanout=nchannel)
+    conv = tf.nn.conv2d(
+        inputs, W, strides=strides,
+        padding=padding, data_format=data_format)
+
+    return tf.nn.bias_add(conv, b, data_format=data_format)
+
+
+def fc(inputs, fanout):
+    """fully connected, NC """
+
+    inshp = get_tensor_shape(inputs)
+    fanin = np.prod(inshp[1:])
+
+    # Flatten input if needed
+    if len(inshp) > 2:
+        inputs = tf.reshape(inputs, (inshp[0], fanin))
+
+    W, b = get_W_b_fc(fanin=fanin, fanout=fanout)
+    mul = tf.matmul(inputs, W)
+
+    return tf.nn.bias_add(mul, b)
+
+
+def ghh(inputs, num_in_sum, num_in_max, data_format="NHWC"):
+    """GHH layer
+
+    LATER: Make it more efficient
+
+    """
+
+    # Assert NHWC
+    assert data_format == "NHWC"
+
+    # Check validity
+    inshp = get_tensor_shape(inputs)
+    num_channels = inshp[-1]
+    pool_axis = len(inshp) - 1
+    assert (num_channels % (num_in_sum * num_in_max)) == 0
+
+    # Abuse cur_in
+    cur_in = inputs
+
+    # # Okay the maxpooling and avgpooling functions do not like weird
+    # # pooling. Just reshape to avoid this issue.
+    # inshp = get_tensor_shape(inputs)
+    # numout = int(inshp[1] / (num_in_sum * num_in_max))
+    # cur_in = tf.reshape(cur_in, [
+    #     inshp[0], numout, num_in_sum, num_in_max, inshp[2], inshp[3]
+    # ])
+
+    # Reshaping does not work for undecided input sizes. use split instead
+    cur_ins_to_max = tf.split(
+        cur_in, num_channels // num_in_max, axis=pool_axis)
+
+    # Do max and concat them back
+    cur_in = tf.concat([
+        tf.reduce_max(cur_ins, axis=pool_axis, keep_dims=True) for
+        cur_ins in cur_ins_to_max
+    ], axis=pool_axis)
+
+    # Create delta
+    delta = (1.0 - 2.0 * (np.arange(num_in_sum) % 2)).astype("float32")
+    delta = tf.reshape(delta, [1] * (len(inshp) - 1) + [num_in_sum])
+
+    # Again, split into multiple pieces
+    cur_ins_to_sum = tf.split(
+        cur_in, num_channels // (num_in_max * num_in_sum),
+        axis=pool_axis)
+
+    # Do delta multiplication, sum, and concat them back
+    cur_in = tf.concat([
+        tf.reduce_sum(cur_ins * delta, axis=pool_axis, keep_dims=True) for
+        cur_ins in cur_ins_to_sum
+    ], axis=pool_axis)
+
+    return cur_in
+
+#
+# layers.py ends here
diff --git a/losses.py b/losses.py
new file mode 100644
index 0000000..46b58f0
--- /dev/null
+++ b/losses.py
@@ -0,0 +1,129 @@
+# losses.py ---
+#
+# Filename: losses.py
+# Description: WRITEME
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Wed Jun 28 20:06:43 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import tensorflow as tf
+
+from utils import get_rect_inter, rebase_xyz
+
+
+def loss_overlap(kp_pos1, gt_pos1, kp_pos2, gt_pos2, r_base,
+                 alpha_non_overlap=1.0):
+    """Loss from the overlap between keypoints detected in P1 and P2
+
+    Note that due to the random perturbation, we need to compensate back the
+    scale and the positions.
+
+    Parameters
+    ----------
+
+    WRITEME
+
+    """
+
+    # Rebase the coordinates to the same base.
+    xyz1 = rebase_xyz(kp_pos1, gt_pos1)
+    xyz2 = rebase_xyz(kp_pos2, gt_pos2)
+
+    # x,y and r for first patch
+    scaler = 2.0**xyz1[:, 2]
+    x1 = xyz1[:, 0] * scaler
+    y1 = xyz1[:, 1] * scaler
+    r1 = r_base * scaler
+
+    # x,y and r for second patch
+    scaler = 2.0**xyz2[:, 2]
+    x2 = xyz2[:, 0] * scaler
+    y2 = xyz2[:, 1] * scaler
+    r2 = r_base * scaler
+
+    # compute intersection and union
+    intersection = get_rect_inter(x1, y1, r1, x2, y2, r2)
+    union = (2.0 * r1)**2.0 + (2.0 * r2)**2.0 - intersection
+
+    # add to cost (this has max value of 1)
+    cost = 1.0 - intersection / union
+
+    # compute the non-overlapping region cost
+    dx = abs(x1 - x2)
+    gap_x = tf.nn.relu(dx - (r1 + r2))
+    dy = abs(y1 - y2)
+    gap_y = tf.nn.relu(dy - (r1 + r2))
+
+    # divide by the sqrt(union) to more-or-less be in same range as
+    # above cost
+    cost += alpha_non_overlap * (gap_x + gap_y) / (union**0.5)
+
+    return cost
+
+
+def loss_classification(s1, s2, s3, s4):
+    """Loss from the classification
+
+    Note s1, s2, and s3 are positive whereas s4 is negative. We therefore need
+    to balance the cost that comes out of this. The original implementation for
+    doing this is not identical to the new implementation, which may cause some
+    minor differences.
+
+    """
+
+    # Get cost with l2 hinge loss
+    cost_p = tf.add_n([
+        tf.nn.relu(1.0 - s1), tf.nn.relu(1.0 - s2), tf.nn.relu(1.0 - s3)])
+    cost_n = tf.nn.relu(1.0 + s4)
+    # Make sure the elements sum to one. i.e. balance them.
+    cost = cost_p / 6.0 + cost_n / 2.0
+
+    return cost
+
+
+def loss_desc_pair(d1, d2):
+    """Loss using the euclidean distance
+
+    """
+
+    # return tf.norm(d1 - d2, ord="euclidean", axis=1)
+    return tf.sqrt(tf.reduce_sum(tf.square(d1 - d2), axis=1))
+
+
+def loss_desc_non_pair(d1, d3, margin, d2=None):
+    """Loss using the euclidean distance and the margin
+
+    """
+
+    pair_dist_1_to_3 = tf.sqrt(tf.reduce_sum(tf.square(d1 - d3), axis=1))
+    if d2 is not None:
+        pair_dist_2_to_3 = tf.sqrt(tf.reduce_sum(tf.square(d2 - d3), axis=1))
+        return tf.nn.relu(margin - tf.minimum(pair_dist_1_to_3, pair_dist_2_to_3))
+    else:
+        return tf.nn.relu(margin - pair_dist_1_to_3)
+
+
+#
+# losses.py ends here
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..223bbe8
--- /dev/null
+++ b/main.py
@@ -0,0 +1,81 @@
+# main.py ---
+#
+# Filename: main.py
+# Description: WRITEME
+# Author: Kwang Moo Yi, Lin Chen
+# Maintainer: Kwang Moo Yi
+# Created: ???
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+import os
+import sys
+
+import numpy as np
+import tensorflow as tf
+
+import getpass
+
+from config import get_config, save_config
+
+config = None
+
+
+def main(_):
+
+    # Create a random state using the random seed given by the config. This
+    # should allow reproducible results.
+    rng = np.random.RandomState(config.random_seed)
+    tf.set_random_seed(config.random_seed)
+
+    # Train / Test
+    if config.task == "train":
+        # Import trainer module
+        from trainer import Trainer
+
+        # Create a trainer object
+        task = Trainer(config, rng)
+        save_config(config.logdir, config)
+
+    else:
+        # Import tester module
+        from tester import Tester
+
+        # Create a tester object
+        task = Tester(config, rng)
+
+    # Run the task
+    task.run()
+
+
+if __name__ == "__main__":
+    config, unparsed = get_config(sys.argv)
+
+    if len(unparsed) > 0:
+        raise RuntimeError("Unknown arguments were given! Check the command line!")
+
+    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+
+#
+# main.py ends here
diff --git a/modules/__init__.py b/modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modules/bypass.py b/modules/bypass.py
new file mode 100644
index 0000000..462d4c3
--- /dev/null
+++ b/modules/bypass.py
@@ -0,0 +1,69 @@
+# bypass.py ---
+#
+# Filename: bypass.py
+# Description: WRITEME
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Thu Jun 29 14:13:15 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import tensorflow as tf
+
+
+def bypass_kp(xyz):
+    """Use GT information for keypoint detector
+
+    Should return the xyz coordinates, as well as a dummy score.
+
+    """
+
+    # we always expect a dictionary as return value to be more explicit
+    res = {}
+
+    # Bypass for xy coordiantes
+    res["xyz"] = xyz
+
+    # Bypass for score (a dummy)
+    res["score"] = xyz[:, 0]
+
+    return res
+
+
+def bypass_ori(angle):
+    """Use GT information for orientation estimator
+
+    Should return the cosine and the sine
+
+    """
+
+    # we always expect a dictionary as return value to be more explicit
+    res = {}
+
+    # Bypass for angle
+    res["cs"] = tf.concat([tf.cos(angle), tf.sin(angle)], axis=1)
+
+    return res
+
+#
+# bypass.py ends here
diff --git a/modules/lift_desc.py b/modules/lift_desc.py
new file mode 100644
index 0000000..38242ef
--- /dev/null
+++ b/modules/lift_desc.py
@@ -0,0 +1,127 @@
+# lift_desc.py ---
+#
+# Filename: lift_desc.py
+# Description: WRITEME
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Wed Jun 28 20:02:58 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+import os
+
+import tensorflow as tf
+
+from layers import batch_norm, conv_2d, norm_spatial_subtractive, pool_l2
+from utils import image_summary_nhwc, loadh5
+
+
+def process(inputs, bypass, name, skip, config, is_training):
+    """WRITEME
+
+    inputs: input to the network
+    bypass: gt to by used when trying to bypass
+    name: name of the siamese branch
+    skip: whether to apply the bypass information
+
+    Note
+    ----
+
+    We don't have to worry about the reuse flag here, since it is already dealt
+    with in the higher level. We just need to inherit it.
+
+    """
+
+    # We never skip descriptor
+    assert skip is False
+
+    # we always expect a dictionary as return value to be more explicit
+    res = {}
+
+    # let's look at the inputs that get fed into this layer
+    image_summary_nhwc(name + "-input", inputs)
+
+    # Import the lift_desc_sub_kernel.h5 to get the kernel file
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    sub_kernel = loadh5(script_dir + "/lift_desc_sub_kernel.h5")["kernel"]
+
+    # activation
+    if config.desc_activ == "tanh":
+        activ = tf.nn.tanh
+    elif config.desc_activ == "relu":
+        activ = tf.nn.relu
+    else:
+        raise RuntimeError('Unknown activation type')
+
+    # pooling
+    def pool(cur_in, desc_pool, ksize):
+        if desc_pool == "l2_pool":
+            return pool_l2(cur_in, ksize, ksize, "VALID")
+        elif desc_pool == "max_pool":
+            return tf.nn.max_pool(cur_in, (1, ksize, ksize, 1), (1, ksize, ksize, 1), "VALID")
+        elif desc_pool == "avg_pool":
+            return tf.nn.avg_pool(cur_in, (1, ksize, ksize, 1), (1, ksize, ksize, 1), "VALID")
+        else:
+            raise RuntimeError('Unknown pooling type')
+
+    # now abuse cur_in so that we can simply copy paste
+    cur_in = inputs
+
+    # lets apply batch normalization on the input - we did not normalize the
+    # input range!
+    with tf.variable_scope("input-bn"):
+        if config.use_input_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+
+    with tf.variable_scope("conv-act-pool-norm-1"):
+        cur_in = conv_2d(cur_in, 7, 32, 1, "VALID")
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = activ(cur_in)
+        cur_in = pool(cur_in, config.desc_pool, 2)
+        if config.use_subtractive_norm:
+            cur_in = norm_spatial_subtractive(cur_in, sub_kernel)
+
+    with tf.variable_scope("conv-act-pool-norm-2"):
+        cur_in = conv_2d(cur_in, 6, 64, 1, "VALID")
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = activ(cur_in)
+        cur_in = pool(cur_in, config.desc_pool, 3)
+        if config.use_subtractive_norm:
+            cur_in = norm_spatial_subtractive(cur_in, sub_kernel)
+
+    with tf.variable_scope("conv-act-pool-3"):
+        cur_in = conv_2d(cur_in, 5, 128, 1, "VALID")
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = activ(cur_in)
+        cur_in = pool(cur_in, config.desc_pool, 4)
+
+    res["desc"] = tf.reshape(cur_in, (-1, 128))
+
+    return res
+
+#
+# lift_desc.py ends here
diff --git a/modules/lift_kp.py b/modules/lift_kp.py
new file mode 100644
index 0000000..6c65d2d
--- /dev/null
+++ b/modules/lift_kp.py
@@ -0,0 +1,181 @@
+# lift_kp.py ---
+#
+# Filename: lift_kp.py
+# Description: WRITEME
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Wed Jun 28 20:01:16 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import numpy as np
+import tensorflow as tf
+
+from layers import conv_2d, ghh, batch_norm
+from modules.bypass import bypass_kp
+from utils import get_patch_size_no_aug, get_tensor_shape, \
+    image_summary_nhwc, softmax
+
+
+def process(inputs, bypass, name, skip, config, is_training):
+    """WRITEME.
+
+    LATER: Clean up
+
+    inputs: input to the network
+    bypass: gt to by used when trying to bypass
+    name: name of the siamese branch
+    skip: whether to apply the bypass information
+
+    """
+
+    # let's look at the inputs that get fed into this layer except when we are
+    # looking at the whole image
+    if name != "img":
+        image_summary_nhwc(name + "-input", inputs)
+
+    if skip:
+        return bypass_kp(bypass)
+
+    # we always expect a dictionary as return value to be more explicit
+    res = {}
+
+    # now abuse cur_in so that we can simply copy paste
+    cur_in = inputs
+
+    # lets apply batch normalization on the input - we did not normalize the
+    # input range!
+    # with tf.variable_scope("input-bn"):
+    #     if config.use_input_batch_norm:
+    #         cur_in = batch_norm(cur_in, training=is_training)
+
+    with tf.variable_scope("conv-ghh-1"):
+        nu = 1
+        ns = 4
+        nm = 4
+        cur_in = conv_2d(
+            cur_in, config.kp_filter_size, nu * ns * nm, 1, "VALID")
+        # batch norm on the output of convolutions!
+        # if config.use_batch_norm:
+        #     cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = ghh(cur_in, ns, nm)
+
+    res["scoremap-uncut"] = cur_in
+
+    # ---------------------------------------------------------------------
+    # Check how much we need to cut
+    kp_input_size = config.kp_input_size
+    patch_size = get_patch_size_no_aug(config)
+    desc_input_size = config.desc_input_size
+    rf = float(kp_input_size) / float(patch_size)
+
+    input_shape = get_tensor_shape(inputs)
+    uncut_shape = get_tensor_shape(cur_in)
+    req_boundary = np.ceil(rf * np.sqrt(2) * desc_input_size / 2.0).astype(int)
+    cur_boundary = (input_shape[2] - uncut_shape[2]) // 2
+    crop_size = req_boundary - cur_boundary
+
+    # Stop building the network outputs if we are building for the full image
+    if name == "img":
+        return res
+
+    # # Debug messages
+    # resized_shape = get_tensor_shape(inputs)
+    # print(' -- kp_info: output score map shape {}'.format(uncut_shape))
+    # print(' -- kp_info: input size after resizing {}'.format(resized_shape[2]))
+    # print(' -- kp_info: output score map size {}'.format(uncut_shape[2]))
+    # print(' -- kp info: required boundary {}'.format(req_boundary))
+    # print(' -- kp info: current boundary {}'.format(cur_boundary))
+    # print(' -- kp_info: additional crop size {}'.format(crop_size))
+    # print(' -- kp_info: additional crop size {}'.format(crop_size))
+    # print(' -- kp_info: final cropped score map size {}'.format(
+    #     uncut_shape[2] - 2 * crop_size))
+    # print(' -- kp_info: movement ratio will be {}'.format((
+    #     float(uncut_shape[2] - 2.0 * crop_size) /
+    #     float(kp_input_size - 1))))
+
+    # Crop center
+    cur_in = cur_in[:, crop_size:-crop_size, crop_size:-crop_size, :]
+    res["scoremap"] = cur_in
+
+    # ---------------------------------------------------------------------
+    # Mapping layer to x,y,z
+    com_strength = config.kp_com_strength
+    # eps = 1e-10
+    scoremap_shape = get_tensor_shape(cur_in)
+
+    od = len(scoremap_shape)
+    # CoM to get the coordinates
+    pos_array_x = tf.range(scoremap_shape[2], dtype=tf.float32)
+    pos_array_y = tf.range(scoremap_shape[1], dtype=tf.float32)
+
+    out = cur_in
+    max_out = tf.reduce_max(
+        out, axis=list(range(1, od)), keep_dims=True)
+    o = tf.exp(com_strength * (out - max_out))  # + eps
+    sum_o = tf.reduce_sum(
+        o, axis=list(range(1, od)), keep_dims=True)
+    x = tf.reduce_sum(
+        o * tf.reshape(pos_array_x, [1, 1, -1, 1]),
+        axis=list(range(1, od)), keep_dims=True
+    ) / sum_o
+    y = tf.reduce_sum(
+        o * tf.reshape(pos_array_y, [1, -1, 1, 1]),
+        axis=list(range(1, od)), keep_dims=True
+    ) / sum_o
+
+    # Remove the unecessary dimensions (i.e. flatten them)
+    x = tf.reshape(x, (-1,))
+    y = tf.reshape(y, (-1,))
+
+    # --------------
+    # Turn x, and y into range -1 to 1, where the patch size is
+    # mapped to -1 and 1
+    orig_patch_width = (
+        scoremap_shape[2] + np.cast["float32"](req_boundary * 2.0))
+    orig_patch_height = (
+        scoremap_shape[1] + np.cast["float32"](req_boundary * 2.0))
+
+    x = ((x + np.cast["float32"](req_boundary)) / np.cast["float32"](
+        (orig_patch_width - 1.0) * 0.5) -
+        np.cast["float32"](1.0))
+    y = ((y + np.cast["float32"](req_boundary)) / np.cast["float32"](
+        (orig_patch_height - 1.0) * 0.5) -
+        np.cast["float32"](1.0))
+
+    # --------------
+    # No movement in z direction
+    z = tf.zeros_like(x)
+
+    res["xyz"] = tf.stack([x, y, z], axis=1)
+
+    # ---------------------------------------------------------------------
+    # Mapping layer to x,y,z
+    res["score"] = softmax(
+        res["scoremap"], axis=list(range(1, od)),
+        softmax_strength = config.kp_scoremap_softmax_strength)
+
+    return res
+
+#
+# lift_kp.py ends here
diff --git a/modules/lift_ori.py b/modules/lift_ori.py
new file mode 100644
index 0000000..6f4430a
--- /dev/null
+++ b/modules/lift_ori.py
@@ -0,0 +1,151 @@
+# lift_ori.py ---
+#
+# Filename: lift_ori.py
+# Description: WRITEME
+# Author: Kwang Moo Yi
+# Maintainer: Kwang Moo Yi
+# Created: Wed Jun 28 20:02:50 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import tensorflow as tf
+
+from layers import batch_norm, conv_2d, fc, ghh, pool_max
+from modules.bypass import bypass_ori
+from utils import image_summary_nhwc
+
+
+def process(inputs, bypass, name, skip, config, is_training):
+    """WRITEME.
+
+    inputs: input to the network
+    bypass: gt to by used when trying to bypass
+    name: name of the siamese branch
+    skip: whether to apply the bypass information
+
+    """
+
+    # let's look at the inputs that get fed into this layer
+    image_summary_nhwc(name + "-input", inputs)
+
+    if skip:
+        return bypass_ori(bypass)
+
+    # we always expect a dictionary as return value to be more explicit
+    res = {}
+
+    # now abuse cur_in so that we can simply copy paste
+    cur_in = inputs
+
+    # lets apply batch normalization on the input - we did not normalize the
+    # input range!
+    with tf.variable_scope("input-bn"):
+        if config.use_input_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+
+    with tf.variable_scope("conv-act-pool-1"):
+        cur_in = conv_2d(cur_in, 5, 10, 1, "VALID")
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = tf.nn.relu(cur_in)
+        cur_in = pool_max(cur_in, 2, 2, "VALID")
+
+    with tf.variable_scope("conv-act-pool-2"):
+        cur_in = conv_2d(cur_in, 5, 20, 1, "VALID")
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = tf.nn.relu(cur_in)
+        cur_in = pool_max(cur_in, 2, 2, "VALID")
+
+    with tf.variable_scope("conv-act-pool-3"):
+        cur_in = conv_2d(cur_in, 3, 50, 1, "VALID")
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        cur_in = tf.nn.relu(cur_in)
+        cur_in = pool_max(cur_in, 2, 2, "VALID")
+    # res["ori_out3"] = cur_in
+
+    with tf.variable_scope("fc-ghh-drop-4"):
+        nu = 100
+        ns = 4
+        nm = 4
+        cur_in = fc(cur_in, nu * ns * nm)
+        # cur_in = fc(cur_in, nu)
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        if config.ori_activation == 'ghh':
+            cur_in = ghh(cur_in, ns, nm)
+        elif config.ori_activation == 'tanh':
+            cur_in = tf.nn.tanh(cur_in)
+        else:
+            raise RuntimeError("Bad orientation rectifier")
+        # cur_in = tf.nn.relu(cur_in)
+        if config.use_dropout_ori:
+            raise RuntimeError('Dropout not working properly!')
+            cur_in = tf.nn.dropout(
+                cur_in,
+                keep_prob=1.0 - (0.3 * tf.cast(is_training, tf.float32)),
+            )
+    # res["ori_out4"] = cur_in
+
+    with tf.variable_scope("fc-ghh-5"):
+        nu = 2
+        ns = 4
+        nm = 4
+        cur_in = fc(cur_in, nu * ns * nm)
+        # cur_in = fc(cur_in, nu)
+        if config.use_batch_norm:
+            cur_in = batch_norm(cur_in, training=is_training)
+        if config.ori_activation == 'ghh':
+            cur_in = ghh(cur_in, ns, nm)
+        elif config.ori_activation == 'tanh':
+            cur_in = tf.nn.tanh(cur_in)
+        else:
+            raise RuntimeError("Bad orientation rectifier")
+        # cur_in = tf.nn.relu(cur_in)
+    # res["ori_out5"] = cur_in
+
+    # with tf.variable_scope("fc-ghh-6"):
+    #     cur_in = fc(cur_in, nu)
+    # res["ori_out6"] = cur_in
+
+    with tf.variable_scope("cs-norm"):
+        eps = 1e-10
+        # First, normalize according to the maximum of the two
+        cur_in_abs_max = tf.reduce_max(tf.abs(cur_in), axis=1, keep_dims=True)
+        cur_in = cur_in / tf.maximum(eps, cur_in_abs_max)
+        # Add an epsilon to avoid singularity
+        eps = 1e-3
+        cur_in += tf.to_float(cur_in >= 0) * eps - tf.to_float(cur_in < 0) * eps
+        # Now make norm one without worrying about div by zero
+        cur_in_norm = tf.sqrt(tf.reduce_sum(tf.square(
+            cur_in), axis=1, keep_dims=True))
+        cur_in /= cur_in_norm
+
+    res["cs"] = tf.reshape(cur_in, (-1, 2))
+
+    return res
+
+
+#
+# lift_ori.py ends here
diff --git a/modules/spatial_transformer.py b/modules/spatial_transformer.py
new file mode 100644
index 0000000..5a14fb0
--- /dev/null
+++ b/modules/spatial_transformer.py
@@ -0,0 +1,222 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications by Kwang Moo Yi
+# - Range fix
+# - Compatibility for Python 3
+# - Reshape added at the end to ensure that we have a proper shape
+
+
+import tensorflow as tf
+
+# Included to be compatile
+from six.moves import xrange
+
+
+def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
+    """Spatial Transformer Layer
+
+    Implements a spatial transformer layer as described in [1]_.
+    Based on [2]_ and edited by David Dao for Tensorflow.
+
+    Parameters
+    ----------
+    U : float
+        The output of a convolutional net should have the
+        shape [num_batch, height, width, num_channels].
+    theta: float
+        The output of the
+        localisation network should be [num_batch, 6].
+    out_size: tuple of two ints
+        The size of the output of the network (height, width)
+
+    References
+    ----------
+    .. [1]  Spatial Transformer Networks
+            Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu
+            Submitted on 5 Jun 2015
+    .. [2]  https://github.com/skaae/transformer_network/blob/master/transformerlayer.py
+
+    Notes
+    -----
+    To initialize the network to the identity transform init
+    ``theta`` to :
+        identity = np.array([[1., 0., 0.],
+                             [0., 1., 0.]])
+        identity = identity.flatten()
+        theta = tf.Variable(initial_value=identity)
+
+    """
+
+    def _repeat(x, n_repeats):
+        with tf.variable_scope('_repeat'):
+            rep = tf.transpose(
+                tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0])
+            rep = tf.cast(rep, 'int32')
+            x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
+            return tf.reshape(x, [-1])
+
+    def _interpolate(im, x, y, out_size):
+        with tf.variable_scope('_interpolate'):
+            # constants
+            num_batch = tf.shape(im)[0]
+            height = tf.shape(im)[1]
+            width = tf.shape(im)[2]
+            channels = tf.shape(im)[3]
+
+            x = tf.cast(x, 'float32')
+            y = tf.cast(y, 'float32')
+            height_f = tf.cast(height, 'float32')
+            width_f = tf.cast(width, 'float32')
+            out_height = out_size[0]
+            out_width = out_size[1]
+            zero = tf.zeros([], dtype='int32')
+            max_y = tf.cast(tf.shape(im)[1] - 1, 'int32')
+            max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')
+
+            # scale indices from [-1, 1] to [0, width/height - 1]
+            # Fix range to not go out of bound
+            x = (x + 1.0) * (width_f - 1.0) / 2.0
+            y = (y + 1.0) * (height_f - 1.0) / 2.0
+
+            # do sampling
+            x0 = tf.cast(tf.floor(x), 'int32')
+            x1 = x0 + 1
+            y0 = tf.cast(tf.floor(y), 'int32')
+            y1 = y0 + 1
+
+            x0 = tf.clip_by_value(x0, zero, max_x)
+            x1 = tf.clip_by_value(x1, zero, max_x)
+            y0 = tf.clip_by_value(y0, zero, max_y)
+            y1 = tf.clip_by_value(y1, zero, max_y)
+            dim2 = width
+            dim1 = width * height
+            base = _repeat(tf.range(num_batch) * dim1, out_height * out_width)
+            base_y0 = base + y0 * dim2
+            base_y1 = base + y1 * dim2
+            idx_a = base_y0 + x0
+            idx_b = base_y1 + x0
+            idx_c = base_y0 + x1
+            idx_d = base_y1 + x1
+
+            # use indices to lookup pixels in the flat image and restore
+            # channels dim
+            im_flat = tf.reshape(im, tf.stack([-1, channels]))
+            im_flat = tf.cast(im_flat, 'float32')
+            Ia = tf.gather(im_flat, idx_a)
+            Ib = tf.gather(im_flat, idx_b)
+            Ic = tf.gather(im_flat, idx_c)
+            Id = tf.gather(im_flat, idx_d)
+
+            # and finally calculate interpolated values
+            x0_f = tf.cast(x0, 'float32')
+            x1_f = tf.cast(x1, 'float32')
+            y0_f = tf.cast(y0, 'float32')
+            y1_f = tf.cast(y1, 'float32')
+            wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1)
+            wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1)
+            wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1)
+            wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1)
+            output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
+
+            return output
+
+    def _meshgrid(height, width):
+        with tf.variable_scope('_meshgrid'):
+            # This should be equivalent to:
+            #  x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
+            #                         np.linspace(-1, 1, height))
+            #  ones = np.ones(np.prod(x_t.shape))
+            #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
+            x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
+                            tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
+            y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
+                            tf.ones(shape=tf.stack([1, width])))
+
+            x_t_flat = tf.reshape(x_t, (1, -1))
+            y_t_flat = tf.reshape(y_t, (1, -1))
+
+            ones = tf.ones_like(x_t_flat)
+            grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones])
+            return grid
+
+    def _transform(theta, input_dim, out_size):
+        with tf.variable_scope('_transform'):
+            num_batch = tf.shape(input_dim)[0]
+            height = tf.shape(input_dim)[1]
+            width = tf.shape(input_dim)[2]
+            num_channels = tf.shape(input_dim)[3]
+            theta = tf.reshape(theta, (-1, 2, 3))
+            theta = tf.cast(theta, 'float32')
+
+            # grid of (x_t, y_t, 1), eq (1) in ref [1]
+            height_f = tf.cast(height, 'float32')
+            width_f = tf.cast(width, 'float32')
+            out_height = out_size[0]
+            out_width = out_size[1]
+            grid = _meshgrid(out_height, out_width)
+            grid = tf.expand_dims(grid, 0)
+            grid = tf.reshape(grid, [-1])
+            grid = tf.tile(grid, tf.stack([num_batch]))
+            grid = tf.reshape(grid, tf.stack([num_batch, 3, -1]))
+
+            # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
+            T_g = tf.matmul(theta, grid)
+            x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1])
+            y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1])
+            x_s_flat = tf.reshape(x_s, [-1])
+            y_s_flat = tf.reshape(y_s, [-1])
+
+            input_transformed = _interpolate(
+                input_dim, x_s_flat, y_s_flat,
+                out_size)
+
+            output = tf.reshape(
+                input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
+            return output
+
+    with tf.variable_scope(name):
+        output = _transform(theta, U, out_size)
+
+        # let output go through a reshape operation to ensure shapes
+        U_shape = [_s if _s is not None else -1 for _s in
+                   U.get_shape().as_list()]
+
+        output_shape = [U_shape[0], out_size[0], out_size[1], U_shape[3]]
+
+        return tf.reshape(output, output_shape)
+
+
+def batch_transformer(U, theta, out_size, name='BatchSpatialTransformer'):
+    """Batch Spatial Transformer Layer
+
+    Parameters
+    ----------
+
+    U : float
+        tensor of inputs [num_batch,height,width,num_channels]
+    theta : float
+        a set of transformations for each input [num_batch,num_transforms,6]
+    out_size : int
+        the size of the output [out_height,out_width]
+
+    Returns: float
+        Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels]
+    """
+    with tf.variable_scope(name):
+        num_batch, num_transforms = map(int, theta.get_shape().as_list()[:2])
+        indices = [[i] * num_transforms for i in xrange(num_batch)]
+        input_repeated = tf.gather(U, tf.reshape(indices, [-1]))
+        return transformer(input_repeated, theta, out_size)
diff --git a/networks/__init__.py b/networks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/networks/lift.py b/networks/lift.py
new file mode 100644
index 0000000..82484bd
--- /dev/null
+++ b/networks/lift.py
@@ -0,0 +1,899 @@
+# lift.py ---
+#
+# Filename: lift.py
+# Description: WRITEME
+# Author: Kwang Moo Yi, Lin Chen
+# Maintainer: Kwang Moo Yi
+# Created: ?????
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+
+# Code:
+
+import importlib
+
+import numpy as np
+import tensorflow as tf
+
+from losses import (loss_classification, loss_desc_non_pair, loss_desc_pair,
+                    loss_overlap)
+from modules.spatial_transformer import transformer as transformer
+from six.moves import xrange
+from utils import (eval_descs, eval_kps, get_patch_size, get_patch_size_no_aug,
+                   get_tensor_shape, image_summary_nhwc, make_theta,
+                   show_all_variables)
+from utils.legacy import build_legacy
+
+
+class Network(object):
+    """The LIFT Network
+
+    LATER
+    - Do proper mean/std normalization?
+
+    """
+
+    def __init__(self, sess, config, dataset):
+        # Save pointer to the tensorflow session
+        self.sess = sess
+        # Save pointer to config
+        self.config = config
+        # Save pointer to the data module
+        self.dataset = dataset
+        # # Summaries to compute for this network
+        # self.summary = []
+
+        # Normalizer for the input data (they are raw images)
+        # Currently normalized to be between -1 and 1
+        self.mean = {}
+        self.std = {}
+        for _module in ["kp", "ori", "desc"]:
+            self.mean[_module] = 128.0
+            self.std[_module] = 128.0
+
+        if self.config.use_old_mean_std:
+            self.mean["kp"] = 116.4368117568544249706974369473755359649658203125
+            self.std["kp"] = 88.083076379771597430590190924704074859619140625
+            self.mean["ori"] = 116.4368117568544249706974369473755359649658203125
+            self.std["ori"] = 88.083076379771597430590190924704074859619140625
+            self.mean["desc"] = 110.75389862060546875
+            self.std["desc"] = 61.53688812255859375
+
+        # Account for the keypoint scale change while augmenting rotations
+        self.scale_aug = float(get_patch_size(self.config)) / \
+            float(get_patch_size_no_aug(self.config))
+
+        # Allocate placeholders
+        with tf.variable_scope("placeholders"):
+            self._build_placeholders()
+        # Build the network
+        with tf.variable_scope("network"):
+            self._build_network()
+        # Build loss
+        with tf.variable_scope("loss"):
+            self._build_loss()
+        # Build the optimization op
+        with tf.variable_scope("optimization"):
+            self._build_optim()
+
+        # Build the legacy component. This is only used for accessing old
+        # framework weights. You can safely ignore this part
+        build_legacy(self)
+
+        # Show all variables in the network
+        show_all_variables()
+
+        # Add all variables into histogram summary
+        for _module in ["kp", "ori", "desc"]:
+            for _param in self.params[_module]:
+                tf.summary.histogram(_param.name, _param)
+
+        # Collect all summary (Lazy...)
+        self.summary = tf.summary.merge_all()
+
+    def forward(self, subtask, cur_data):
+        """Forward pass of the network.
+
+        Runs the network and returns the losses for the subtask.
+
+        """
+
+        # parse cur_data and create a feed_dict
+        feed_dict = self._get_feed_dict(subtask, cur_data)
+        # set to train mode
+        feed_dict[self.is_training] = True
+
+        # import IPython
+        # IPython.embed()
+
+        # specify which loss we are going to retrieve
+        fetch = {
+            "loss": self.loss[subtask],
+        }
+
+        # run on the tensorflow session
+        res = self.sess.run(fetch, feed_dict=feed_dict)
+
+        # return the losses
+        return res["loss"]
+
+    def backward(self, subtask, cur_data, provide_summary=False):
+        """Backward pass of the network.
+
+        Runs the network and updates the parameters, as well as returning the
+        summary if asked to do so.
+
+        """
+
+        # parse cur_data and create a feed_dict
+        feed_dict = self._get_feed_dict(subtask, cur_data)
+        # set to train mode
+        feed_dict[self.is_training] = True
+
+        # specify which optim we run depending on the subtask
+        fetch = {
+            "optim": self.optim[subtask],
+        }
+        if provide_summary:
+            fetch["summary"] = self.summary
+
+        # run on the tensorflow session
+        try:
+            res = self.sess.run(fetch, feed_dict=feed_dict)
+        except:
+            print("backward pass had an error, this iteration did nothing.")
+            return None
+
+        # return summary
+        if provide_summary:
+            return res["summary"]
+        else:
+            return None
+
+    def validate(self, subtask, cur_data):
+        """Validation process.
+
+        Runs the network for the given data and returns validation results. The
+        return value should be a single scalar, which lower is better.
+
+        """
+
+        # Get feed_dict for this batch
+        feed_dict = self._get_feed_dict(subtask, cur_data)
+        # set to train mode
+        feed_dict[self.is_training] = False
+
+        # Obtain xyz, scores, descs, whatever you need
+        fetch = {}
+        if subtask == "kp":
+            # Obtain xyz's and scores
+            for _i in xrange(1, 5):
+                # Detector output
+                fetch["xyz{}".format(_i)] \
+                    = self.outputs["kp"]["P{}".format(_i)]["xyz"]
+                # Warp ground truth if necessary
+                aug_rot = self.inputs["aug_rot"] \
+                    if self.config.augment_rotations else None
+                fetch["pos{}".format(_i)] = self.transform_xyz(
+                    self.inputs["xyz"],
+                    aug_rot,
+                    self.config.batch_size,
+                    self.scale_aug,
+                    transpose=True,
+                    names=["P{}".format(_i)])
+                # fetch["pos{}".format(_i)] \
+                #     = self.inputs["xyz"]["P{}".format(_i)]
+                fetch["score{}".format(_i)] \
+                    = self.outputs["kp"]["P{}".format(_i)]["score"]
+            res = self.sess.run(fetch, feed_dict=feed_dict)
+
+            # Compute the AUC
+            # neg_weight = self.config.neg_per_pos
+            prauc = eval_kps(
+                [res["xyz{}".format(_i + 1)] for _i in xrange(4)],
+                [res["pos{}".format(_i + 1)]['P{}'.format(_i + 1)]
+                    for _i in xrange(4)],
+                [res["score{}".format(_i + 1)] for _i in xrange(4)],
+                self.r_base,
+            )
+            # prauc = eval_kps(
+            #     [res["xyz{}".format(_i + 1)] for _i in xrange(4)],
+            #     [res["pos{}".format(_i + 1)] * self.scale_aug
+            #         for _i in xrange(4)],
+            #     [res["score{}".format(_i + 1)] for _i in xrange(4)],
+            #     self.r_base,
+            # )
+
+            return 1.0 - prauc
+
+        else:
+            # Obtain descriptors for each patch
+            for _i in xrange(1, 4):
+                fetch["d{}".format(_i)] \
+                    = self.outputs["desc"]["P{}".format(_i)]["desc"]
+            res = self.sess.run(fetch, feed_dict=feed_dict)
+
+            # Compute the AUC
+            neg_weight = self.config.neg_per_pos
+            prauc = eval_descs(
+                res["d1"], res["d2"], res["d3"], neg_weight)
+
+            return 1.0 - prauc
+
+    def test(self, subtask, cur_data):
+        """The test function to retrieve results for a single patch.
+
+        LATER: Comment
+
+        """
+
+        if subtask == "kp":
+            feed_dict = {
+                self.inputs["img"]["img"]: cur_data,
+                self.is_training: False,
+            }
+            fetch = {
+                "scoremap": self.outputs["kp"]["img"]["scoremap-uncut"]
+            }
+            res = self.sess.run(fetch, feed_dict=feed_dict)
+            return res["scoremap"]
+        elif subtask == "ori":
+            feed_dict = {
+                self.inputs["patch"]["P1"]: cur_data["patch"],
+                self.inputs["xyz"]["P1"]: cur_data["xyz"],
+                self.is_training: False,
+            }
+            fetch = {
+                "cs": self.outputs["ori"]["P1"]["cs"]
+            }
+            res = self.sess.run(fetch, feed_dict=feed_dict)
+            return np.arctan2(res["cs"][:, 1], res["cs"][:, 0])
+        elif subtask == "desc":
+            feed_dict = {
+                self.inputs["patch"]["P1"]: cur_data["patch"],
+                self.inputs["xyz"]["P1"]: cur_data["xyz"],
+                self.inputs["angle"]["P1"]: cur_data["angle"],
+                self.is_training: False,
+            }
+            fetch = {
+                "desc": self.outputs["desc"]["P1"]["desc"]
+            }
+            res = self.sess.run(fetch, feed_dict=feed_dict)
+            return res["desc"]
+
+    def _build_placeholders(self):
+        """Builds Tensorflow Placeholders"""
+
+        # The inputs placeholder dictionary
+        self.inputs = {}
+        # multiple types
+        # LATER: label might not be necessary
+        types = ["patch", "xyz", "angle"]
+        if self.config.use_augmented_set:
+            types += ["aug_rot"]
+        for _type in types:
+            self.inputs[_type] = {}
+
+        # We *ARE* going to specify the input size, since the spatial
+        # transformer implementation *REQUIRES* us to do so. Note that this
+        # has to be dealt with in the validate loop.
+
+        # batch_size = self.config.batch_size
+
+        # Use variable batch size
+        batch_size = None
+
+        # We also read nchannel from the configuration. Make sure that the data
+        # module is behaving accordingly
+        nchannel = self.config.nchannel
+
+        # Get the input patch size from config
+        patch_size = float(get_patch_size(self.config))
+
+        # Compute the r_base (i.e. overlap radius when computing the keypoint
+        # overlaps.
+        self.r_base = (float(self.config.desc_input_size) /
+                       float(get_patch_size_no_aug(self.config)))
+
+        # P1, P2, P3, P4 in the paper. P1, P2, P3 are keypoints, P1, P2
+        # correspond, P1, and P3 don't correspond, P4 is a non-keypoint patch.
+        for _name in ["P1", "P2", "P3", "P4"]:
+            self.inputs["patch"][_name] = tf.placeholder(
+                tf.float32, shape=[
+                    batch_size, patch_size, patch_size, nchannel
+                ], name=_name,
+            )
+            self.inputs["xyz"][_name] = tf.placeholder(
+                tf.float32, shape=[
+                    batch_size, 3,
+                ], name=_name,
+            )
+            self.inputs["angle"][_name] = tf.placeholder(
+                tf.float32, shape=[
+                    batch_size, 1,
+                ], name=_name,
+            )
+            if self.config.use_augmented_set:
+                self.inputs["aug_rot"][_name] = {
+                    "cs": tf.placeholder(
+                        tf.float32, shape=[
+                            batch_size, 2,
+                        ], name=_name,
+                    ),
+                    "angle": tf.placeholder(
+                        tf.float32, shape=[
+                            batch_size, 1,
+                        ], name=_name,
+                    )}
+            # Add to summary to view them
+            image_summary_nhwc(
+                "input/" + _name,
+                self.inputs["patch"][_name],
+            )
+
+        # For Image based test
+        self.inputs["img"] = {"img": tf.placeholder(
+            tf.float32, shape=[
+                None, None, None, nchannel
+            ], name="img",
+        )}
+
+        # For runmode in dropout and batch_norm
+        self.is_training = tf.placeholder(
+            tf.bool, shape=(),
+            name="is_training",
+        )
+
+    def _build_network(self):
+        """Define all the architecture here. Use the modules if necessary."""
+
+        # Import modules according to the configurations
+        self.modules = {}
+        for _key in ["kp", "ori", "desc"]:
+            self.modules[_key] = importlib.import_module(
+                "modules.{}".format(
+                    getattr(self.config, "module_" + _key)))
+
+        # prepare dictionary for the output and parameters of each module
+        self.outputs = {}
+        self.params = {}
+        self.allparams = {}
+        for _key in self.modules:
+            self.outputs[_key] = {}
+            self.params[_key] = []
+            self.allparams[_key] = []
+        # create a joint params list
+        # NOTE: params is a list, not a dict!
+        self.params["joint"] = []
+        self.allparams["joint"] = []
+        # create outputs placeholder for crop and rot
+        self.outputs["resize"] = {}
+        self.outputs["crop"] = {}
+        self.outputs["rot"] = {}
+
+        # Actual Network definition
+        with tf.variable_scope("lift"):
+            # Graph construction depends on the subtask
+            subtask = self.config.subtask
+
+            # ----------------------------------------
+            # Initial resize for the keypoint module
+            # Includes rotation when augmentations are used
+            #
+            if self.config.use_augmented_set:
+                rot = self.inputs["aug_rot"]
+            else:
+                rot = None
+            self._build_st(
+                module="resize",
+                xyz=None,
+                cs=rot,
+                names=["P1", "P2", "P3", "P4"],
+                out_size=self.config.kp_input_size,
+                reduce_ratio=float(get_patch_size_no_aug(self.config)) /
+                float(get_patch_size(self.config)),
+            )
+
+            # ----------------------------------------
+            # Keypoint Detector
+            #
+            # The keypoint detector takes each patch input and outputs (1)
+            # "score": the score of the patch, (2) "xy": keypoint position in
+            # side the patch. The score output is the soft-maximum (not a
+            # softmax) of the scores. The position output from the network
+            # should be in the form friendly to the spatial
+            # transformer. Outputs are always dictionaries.
+            # Rotate ground truth coordinates when augmenting rotations.
+            aug_rot = self.inputs["aug_rot"] \
+                if self.config.augment_rotations else None
+            xyz_gt_scaled = self.transform_xyz(
+                self.inputs["xyz"],
+                aug_rot,
+                self.config.batch_size,
+                self.scale_aug,
+                transpose=True,
+                names=["P1", "P2", "P3", "P4"])
+            self._build_module(
+                module="kp",
+                inputs=self.outputs["resize"],
+                bypass=xyz_gt_scaled,
+                names=["P1", "P2", "P3", "P4"],
+                skip=subtask == "ori" or subtask == "desc",
+            )
+
+            # For image based test
+            # self._build_module(
+            #     module="kp",
+            #     inputs=self.inputs["img"],
+            #     bypass=self.inputs["img"],  # This is a dummy
+            #     names=["img"],
+            #     skip=subtask != "kp",
+            #     reuse=True,
+            #     test_only=True,
+            # )
+
+            # ----------------------------------------
+            # The Crop Spatial Transformer
+            # Output: use the same support region as for the descriptor
+            #
+            xyz_kp_scaled = self.transform_kp(
+                self.outputs["kp"],
+                aug_rot,
+                self.config.batch_size,
+                1 / self.scale_aug,
+                transpose=False,
+                names=["P1", "P2", "P3"])
+            self._build_st(
+                module="crop",
+                xyz=xyz_kp_scaled,
+                cs=aug_rot,
+                names=["P1", "P2", "P3"],
+                out_size=self.config.ori_input_size,
+                reduce_ratio=float(self.config.desc_input_size) /
+                float(get_patch_size(self.config)),
+            )
+
+            # ----------------------------------------
+            # Orientation Estimator
+            #
+            # The orientation estimator takes the crop outputs as input and
+            # outputs orientations for the spatial transformer to
+            # use. Actually, since we output cos and sin, we can simply use the
+            # *UNNORMALIZED* version of the two, normalize them, and directly
+            # use it for our affine transform. In short it returns "cs": the
+            # cos and the sin, but unnormalized. Outputs are always
+            # dictionaries.
+            # Bypass: just the GT angle
+            if self.config.augment_rotations:
+                rot = {}
+                for name in ["P1", "P2", "P3"]:
+                    rot[name] = self.inputs["angle"][name] - \
+                        self.inputs["aug_rot"][name]["angle"]
+            else:
+                rot = self.inputs["angle"]
+            self._build_module(
+                module="ori",
+                inputs=self.outputs["crop"],
+                bypass=rot,
+                names=["P1", "P2", "P3"],
+                skip=subtask == "kp" or subtask == "desc",
+            )
+
+            # ----------------------------------------
+            # The Rot Spatial Transformer.
+            # - No rotation augmentation: 
+            # Operates over the original patch with the ground truth angle when
+            # bypassing. Otherwise, we combine the augmented angle and the
+            # output of the orientation module.
+            # We do not consider rotation augmentations for the descriptor.
+            if self.config.augment_rotations:
+                rot = self.chain_cs(
+                    self.inputs["aug_rot"],
+                    self.outputs["ori"],
+                    names=["P1", "P2", "P3"])
+                # rot = self.outputs["ori"]
+                # xyz_desc_scaled = self.transform_kp(
+                #     self.outputs["kp"],
+                #     rot,
+                #     self.config.batch_size,
+                #     1 / self.scale_aug,
+                #     transpose=False,
+                #     names=["P1", "P2", "P3"])
+            elif self.config.use_augmented_set:
+                rot = self.outputs["ori"]
+                # xyz_desc_scaled = self.transform_kp(
+                #     self.outputs["kp"],
+                #     rot,
+                #     self.config.batch_size,
+                #     1 / self.scale_aug,
+                #     transpose=False,
+                #     names=["P1", "P2", "P3"])
+            else:
+                rot = None
+                # xyz_desc_scaled = self.inputs["xyz"]
+            self._build_st(
+                module="rot",
+                xyz=xyz_kp_scaled,
+                cs=rot,
+                names=["P1", "P2", "P3"],
+                out_size=self.config.desc_input_size,
+                reduce_ratio=float(self.config.desc_input_size) /
+                float(get_patch_size(self.config)),
+            )
+
+            # ----------------------------------------
+            # Feature Descriptor
+            #
+            # The descriptor simply computes the descriptors, given the patch.
+            self._build_module(
+                module="desc",
+                inputs=self.outputs["rot"],
+                bypass=self.outputs["rot"],
+                names=["P1", "P2", "P3"],
+                skip=False,
+            )
+
+    def _build_module(self, module, inputs, bypass, names, skip, reuse=None,
+                      test_only=False):
+        """Subroutine for building each module"""
+
+        is_first = True
+        # _module = "kp"
+        for name in names:
+            # reuse if not the first time
+            if not is_first:
+                cur_reuse = True
+            else:
+                cur_reuse = reuse
+            with tf.variable_scope(module, reuse=cur_reuse) as sc:
+                cur_inputs = (
+                    (inputs[name] - self.mean[module]) /
+                    self.std[module]
+                )
+                if test_only:
+                    is_training = False
+                else:
+                    is_training = self.is_training
+                self.outputs[module][name] = self.modules[module].process(
+                    inputs=cur_inputs,
+                    bypass=bypass[name],
+                    name=name,
+                    skip=skip,
+                    config=self.config,
+                    is_training=is_training,
+                )
+                # Store variables if it is the first time
+                if is_first:
+                    self.params[module] = tf.get_collection(
+                        tf.GraphKeys.TRAINABLE_VARIABLES, scope=sc.name)
+                    self.allparams[module] = tf.get_collection(
+                        tf.GraphKeys.GLOBAL_VARIABLES, scope=sc.name)
+                    # Also append to the global list
+                    self.params["joint"] += self.params[module]
+                    self.allparams["joint"] += self.allparams[module]
+                    # Mark that it is initialized
+                    is_first = False
+
+    def _build_st(self, module, xyz, cs, names, out_size,
+                  reduce_ratio=None, do_rotate=False, do_reverse=False):
+        """Subroutine for building spatial transformer"""
+
+        for name in names:
+            with tf.variable_scope(module):
+                cur_inputs = self.inputs["patch"][name]
+                # Get xy and cs
+                if xyz is not None:
+                    _xyz = xyz[name]["xyz"]
+                else:
+                    batch_size = tf.shape(cur_inputs)[0]
+                    _xyz = tf.zeros((batch_size, 3))
+                if cs is not None:
+                    _cs = cs[name]["cs"]
+                else:
+                    _cs = None
+                # transform coordinates
+                # if do_rotate:
+                #     _xyz[:2] = self.transform_xyz(_xyz,
+                #                                   _cs,
+                #                                   config.batch_size,
+                #                                   reverse=do_reverse)
+                input_size = get_tensor_shape(cur_inputs)[2]
+                if reduce_ratio is None:
+                    reduce_ratio = float(out_size) / float(input_size)
+                # apply the spatial transformer
+                self.outputs[module][name] = transformer(
+                    U=cur_inputs,
+                    # Get the output from the keypoint layer
+                    theta=make_theta(xyz=_xyz, cs=_cs, rr=reduce_ratio),
+                    out_size=(out_size, out_size),
+                )
+
+    def chain_cs(self, cs1, cs2, names):
+        cs = {}
+        for name in names:
+            c1 = cs1[name]["cs"][:, 0]
+            s1 = cs1[name]["cs"][:, 1]
+            c2 = cs2[name]["cs"][:, 0]
+            s2 = cs2[name]["cs"][:, 1]
+            z = tf.zeros_like(c1)
+            o = tf.ones_like(c1)
+            mat1 = tf.transpose(tf.stack([[c1, -s1, z],
+                                          [s1, c1, z],
+                                          [z, z, o]]),
+                                (2, 0, 1))
+            mat2 = tf.transpose(tf.stack([[c2, -s2, z],
+                                          [s2, c2, z],
+                                          [z, z, o]]),
+                                (2, 0, 1))
+            joint = tf.matmul(mat1, mat2)
+            cs[name] = {"cs": tf.stack(
+                [joint[:, 0, 0], joint[:, 1, 0]],
+                axis=1)}
+        return cs
+
+    def transform_xyz(self, xyz, cs, batch_size, scale, transpose, names):
+        """Rotate a set of coordinates and apply scaling if necessary."""
+
+        xyz_rot = {}
+        for name in names:
+            if cs is None:
+                xyz_rot[name] = xyz[name] * scale
+            else:
+                c = cs[name]["cs"][:, 0]
+                s = cs[name]["cs"][:, 1]
+                z = tf.zeros_like(c)
+                o = tf.ones_like(c)
+                mat = tf.transpose(tf.stack([[c, -s, z],
+                                             [s, c, z],
+                                             [z, z, o]]),
+                                   (2, 0, 1))
+                xyz_rot[name] = tf.squeeze(
+                    tf.matmul(mat,
+                              tf.expand_dims(xyz[name], 2),
+                              transpose_a=transpose),
+                    axis=2) * scale
+        return xyz_rot
+
+    def transform_kp(self, kp, cs, batch_size, scale, transpose, names):
+        """Rotate/scale keypoint coordinates while preserving the score."""
+
+        # A bit inelegant but I'm tired
+        kp_rot = {}
+        for name in names:
+            if cs is None:
+                kp_rot[name] = {"xyz": kp[name]["xyz"] * scale,
+                                "score": kp[name]["score"]}
+            else:
+                c = cs[name]["cs"][:, 0]
+                s = cs[name]["cs"][:, 1]
+                z = tf.zeros_like(c)
+                o = tf.ones_like(c)
+                mat = tf.transpose(tf.stack([[c, -s, z],
+                                             [s, c, z],
+                                             [z, z, o]]),
+                                   (2, 0, 1))
+                kp_rot[name] = {
+                    "xyz": tf.squeeze(
+                        tf.matmul(mat,
+                                  tf.expand_dims(kp[name]["xyz"], 2),
+                                  transpose_a=transpose),
+                        axis=2) * scale,
+                    "score": kp[name]["score"]}
+        return kp_rot
+
+    def _build_loss(self):
+        """Build losses related to each subtask."""
+
+        self.loss = {}
+
+        # Indivisual loss components
+        with tf.variable_scope("kp-overlap"):
+            aug_rot = self.inputs["aug_rot"] if self.config.augment_rotations \
+                else None
+            gt = self.transform_xyz(self.inputs["xyz"],
+                                    aug_rot,
+                                    self.config.batch_size,
+                                    self.scale_aug,
+                                    transpose=True,
+                                    names=["P1", "P2"])
+            gt1 = gt["P1"]
+            gt2 = gt["P2"]
+            # gt1 = self.inputs["xyz"]["P1"] * self.scale_aug
+            # gt2 = self.inputs["xyz"]["P2"] * self.scale_aug
+            _loss_overlap = loss_overlap(
+                kp_pos1=self.outputs["kp"]["P1"]["xyz"],
+                gt_pos1=gt1,
+                kp_pos2=self.outputs["kp"]["P2"]["xyz"],
+                gt_pos2=gt2,
+                r_base=self.r_base,
+            )
+        with tf.variable_scope("kp-classification"):
+            _loss_classification = loss_classification(
+                s1=self.outputs["kp"]["P1"]["score"],
+                s2=self.outputs["kp"]["P2"]["score"],
+                s3=self.outputs["kp"]["P3"]["score"],
+                s4=self.outputs["kp"]["P4"]["score"],
+            )
+        with tf.variable_scope("desc-pair"):
+            _loss_desc_pair = loss_desc_pair(
+                d1=self.outputs["desc"]["P1"]["desc"],
+                d2=self.outputs["desc"]["P2"]["desc"],
+            )
+        with tf.variable_scope("desc-non-pair"):
+            if self.config.use_triplet_loss:
+                _loss_desc_non_pair = loss_desc_non_pair(
+                    d1=self.outputs["desc"]["P1"]["desc"],
+                    d2=self.outputs["desc"]["P2"]["desc"],
+                    d3=self.outputs["desc"]["P3"]["desc"],
+                    margin=self.config.alpha_margin,
+                )
+            else:
+                _loss_desc_non_pair = loss_desc_non_pair(
+                    d1=self.outputs["desc"]["P1"]["desc"],
+                    d3=self.outputs["desc"]["P3"]["desc"],
+                    margin=self.config.alpha_margin,
+                )
+
+        # Loss for each task
+        self.loss["kp"] = (
+            self.config.alpha_overlap * _loss_overlap +
+            self.config.alpha_classification * _loss_classification
+        )
+        self.loss["ori"] = _loss_desc_pair
+        self.loss["desc"] = _loss_desc_pair + _loss_desc_non_pair
+        self.loss["joint"] = (
+            self.config.alpha_kp * _loss_classification +
+            self.config.alpha_desc * _loss_desc_pair +
+            self.config.alpha_desc * _loss_desc_non_pair
+        )
+
+        # Loss for weight decay
+        #
+        # LATER: we actually did not use it
+
+        # Add summary for the subtask loss
+        tf.summary.scalar(
+            "losses/loss-" + self.config.subtask,
+            tf.reduce_mean(self.loss[self.config.subtask]))
+
+    def _build_optim(self):
+        """Build the optimization op
+
+        Note that we only construct it for the task at hand. This is to avoid
+        having to deal with bypass layers
+
+        """
+        self.optim = {}
+
+        # Optimizer depending on the option
+        optimizer = self.config.optimizer
+        learning_rate = self.config.learning_rate
+        if optimizer == "sgd":
+            optim = tf.train.GradientDescentOptimizer(learning_rate)
+        elif optimizer == "adam":
+            optim = tf.train.AdamOptimizer(learning_rate)
+        elif optimizer == "rmsprop":
+            optim = tf.train.RMSPropOptimizer(learning_rate)
+        else:
+            raise Exception("[!] Unkown optimizer: {}".format(optimizer))
+
+        # All gradient computation should be done *after* the batchnorm update
+        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+        with tf.control_dependencies(update_ops):
+            # Compute gradient using the optimizer
+            subtask = self.config.subtask
+            loss = self.loss[subtask]
+            params = self.params[subtask]
+            # The subtask loss to optimize
+            if subtask != "joint":
+                # In case of other losses, simply do so
+                grads_and_vars = optim.compute_gradients(loss, var_list=params)
+            elif subtask == "joint":
+                # In case of the joint optimization, be sure to treat
+                # orientation in a different way.
+
+                # for keypoint
+                gv_kp = optim.compute_gradients(
+                    self.loss["joint"], var_list=self.params["kp"])
+                # for orientation
+                gv_ori = optim.compute_gradients(
+                    self.loss["ori"], var_list=self.params["ori"])
+                # for descriptor
+                gv_desc = optim.compute_gradients(
+                    self.loss["joint"], var_list=self.params["desc"])
+
+                # Concatenate the list
+                # grads_and_vars = gv_kp + gv_ori + gv_desc
+                grads_and_vars = []
+                if 'kp' in self.config.finetune.split('+'):
+                    grads_and_vars += gv_kp
+                if 'ori' in self.config.finetune.split('+'):
+                    grads_and_vars += gv_ori
+                if 'desc' in self.config.finetune.split('+'):
+                    grads_and_vars += gv_desc
+                if len(grads_and_vars) == 0:
+                    raise RuntimeError("Nothing to finetune? Check --finetune")
+
+            else:
+                raise ValueError("Wrong subtask {}".format(subtask))
+
+            # Clip gradients if necessary
+            if self.config.max_grad_norm > 0.0:
+                new_grads_and_vars = []
+                # check whether gradients contain large value (then clip), NaN
+                # and InF
+                for idx, (grad, var) in enumerate(grads_and_vars):
+                    if grad is not None and var in params:
+                        grad = tf.clip_by_norm(grad, self.config.max_grad_norm)
+                        new_grads_and_vars.append((grad, var))
+                grads_and_vars = new_grads_and_vars
+
+            # Check numerics and report if something is going on. This will
+            # make the backward pass stop and skip the batch
+            if self.config.check_numerics is True:
+                new_grads_and_vars = []
+                # check whether gradients contain large value (then clip), NaN
+                # and InF
+                for idx, (grad, var) in enumerate(grads_and_vars):
+                    if grad is not None and var in params:
+                        grad = tf.check_numerics(
+                            grad, "Numerical error in gradient for {}".format(
+                                var.name))
+                        new_grads_and_vars.append((grad, var))
+                grads_and_vars = new_grads_and_vars
+
+            # Summarize all gradients
+            for grad, var in grads_and_vars:
+                tf.summary.histogram(var.name + '/gradient', grad)
+
+            # Make the optim op
+            self.optim[subtask] = optim.apply_gradients(grads_and_vars)
+
+    def _get_feed_dict(self, subtask, cur_data):
+        """Returns feed_dict"""
+
+        #
+        # To simplify things, we feed everything for now.
+        #
+        # LATER: make feed_dict less redundant.
+
+        feed_dict = {}
+
+        types = ["patch", "xyz", "angle"]
+        if self.config.augment_rotations \
+                or self.config.use_augmented_set:
+            types += ["aug_rot"]
+
+        for _type in types:
+            for _name in ["P1", "P2", "P3", "P4"]:
+                if _type == 'aug_rot':
+                    key_cs = self.inputs[_type][_name]["cs"]
+                    feed_dict[key_cs] = cur_data[_type][_name]["cs"]
+                    key_angle = self.inputs[_type][_name]["angle"]
+                    feed_dict[key_angle] = cur_data[_type][
+                        _name]["angle"][..., None]
+                else:
+                    key = self.inputs[_type][_name]
+                    feed_dict[key] = cur_data[_type][_name]
+
+        return feed_dict
+
+#
+# lift.py ends here
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..bbe1271
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,112 @@
+# TF-LIFT: Tensorflow Implmentation for Learned Invariant Feature Transform #
+
+## Basic Usage ##
+
+### Setting the environment ###
+
+Python Version : 3
+OpenCV Version : 3
+
+You'll need to install the dependencies, something like the following:
+
+```
+pip install numpy h5py tensorflow tensorflow-gpu
+
+```
+We will later provide a requirements.txt for you to use with `pip`.
+
+Also, you need to setup your work directories. Edit the `config.py` for a
+convenient default argument setting. See help for more information on what the
+configurations do.
+
+### Training ###
+
+`main.py` is the entry point for all your needs. Simply define the task you
+want to do, the where to save the results (logs) and the training subtask you
+want to perform.
+
+For example, to train the descriptor
+```
+python main.py --task=train --subtask=desc
+```
+
+Note: this will save the logs at `logs/main.py---task=train---subtask=desc`. If
+you don't want this behavior, you can also add `--logdir=logs/test` in the
+command line argument, for example.
+
+### Testing ###
+
+Testing is even more simple, just provide the input image location, the output
+file name, keypoint file name (for ori and desc). For example, the following
+command will run the entire pipeline for `image1.jpg`, using the model at `logs/test`.
+
+```
+python main.py --task=test --subtask=kp --logdir=logs/test --test_img_file=image1.jpg \
+  --test_out_file=image1_kp.txt
+python main.py --task=test --subtask=ori --logdir=logs/test --test_img_file=image1.jpg \
+  --test_out_file=image1_ori.txt --test_kp_file=image1_kp.txt
+python main.py --task=test --subtask=desc --logdir=logs/test --test_img_file=image1.jpg \
+  --test_out_file=image1_desc.h5 --test_kp_file=image1_ori.txt
+```
+
+Note: when trying to load the model, it will always look for the `joint`
+trained model first, and fall back to the subtask it is trying to test for.
+
+## More notes on training ##
+
+### Saving the network ###
+
+When training, the network is automatically saved in the `logdir`. If you don't
+set this manually, it defaults to
+`logs/{concatenation-of-all-arguments}`. The things that are saved are:
+
+* Tensorflow checkpoint
+* Tensorflow graph metadata
+* `mean` and `std` used for input data normalization
+* best validation loss
+* best validation iteration
+
+All these are loaded back when we want to continue.
+
+### Loading the network ###
+
+On all runs, the framework automatically resumes from where it left. In other
+words, it will **always** try to load network weights and resume. If the
+framework cannot find the expected weights, it will just tell you that it could
+not find weights in the expected locations, and **will try to go on its merry
+way**. Note that this is something that you want to keep in mind. For example,
+if you run the subtask `ori`, with a typo in `logdir` pointing you to a
+directory without the pretrained descriptor weights, the framework will simply
+try to learn the orientation estimator **with random descriptors**. This is
+intended, as this might be something that you actually want to try.
+
+Network loading is performed in the following order, **overwriting** the previously
+loaded weights:
+
+1. Loads the pretrained weights, in the **old framework** format, from
+   directories defined in `pretrained_{subtask}` in the configuration. **This
+   feature is deprecated and should not be used**
+
+2. Loads the pretrained weights, in the **new framework** format, from
+   directories defined in `pretrained_{subtask}` in the configuration.
+
+2. Loads the weights from the `logdir`, which is either automatically determined
+   by the command line arguments, or can be given manually.
+   
+## Differences from the original version ##
+
+### Batch normalization ###
+
+In the original version, we did not apply batch normalization. In the new
+version, bach normalization is applied to all layers. This significantly
+speeds-up the learning process, and makes learning stable. This also eliminates
+the need for us to normalize the dataset when training, and we can instead
+simply put the data range in a reasonable range, say -1 to 1 and be done with
+it. Note that since we do this, we also perform batch normalization on the
+input.
+
+### L2-pooling and Spatial subtractive normalization ###
+
+We found that these layers can be replaced with normal relus and spatial
+pooling without significant difference. They are removed.
+
diff --git a/test_desc.sh b/test_desc.sh
new file mode 100755
index 0000000..d748e69
--- /dev/null
+++ b/test_desc.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_kp_file=./test/fountain_ori.txt --test_out_file=./test/fountain_desc.h5 --task=test --subtask=desc --logdir="logs/main.py"
diff --git a/test_kp.sh b/test_kp.sh
new file mode 100755
index 0000000..1b6b7d7
--- /dev/null
+++ b/test_kp.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_out_file=./test/fountain_kp.txt --task=test --subtask=kp --logdir="logs/main.py"
diff --git a/test_kp_legacy.sh b/test_kp_legacy.sh
new file mode 100755
index 0000000..19f8844
--- /dev/null
+++ b/test_kp_legacy.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_out_file=./test/fountain_kp.txt --task=test --subtask=kp --logdir="logs/legacy" --pretrained_kp="models/legacy" --use_batch_norm=False
diff --git a/test_ori.sh b/test_ori.sh
new file mode 100755
index 0000000..5b59307
--- /dev/null
+++ b/test_ori.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+CUDA_VISIBLE_DEVICES=3 OMP_NUM_THREADS=10 python main.py --test_img_file=./test/fountain.png --test_kp_file=./test/fountain_kp.txt --test_out_file=./test/fountain_ori.txt --task=test --subtask=ori --logdir="logs/main.py"
diff --git a/test_orig_lift.sh b/test_orig_lift.sh
new file mode 100755
index 0000000..272ac5f
--- /dev/null
+++ b/test_orig_lift.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+# ------------------------------------------------------------
+# Example script for running LIFT
+
+# Open MP Settings
+export OMP_NUM_THREADS=1
+
+# Cuda Settings
+export CUDA_VISIBLE_DEVICES=0
+
+# Theano Flags 
+export THEANO_FLAGS="device=gpu0,${THEANO_FLAGS}"
+
+# ------------------------------------------------------------
+# LIFT code settings
+
+# Number of keypoints
+_LIFT_NUM_KEYPOINT=1000
+
+# Whether to save debug image for keypoints
+_LIFT_SAVE_PNG=1
+
+# Whether the use Theano when keypoint testing. CuDNN is required when turned
+# on
+_LIFT_USE_THEANO=1
+
+# The base path of the code
+export _LIFT_BASE_PATH="/cvlabdata2/home/kyi/GitData/LIFT"
+
+_LIFT_PYTHON_CODE_PATH="${_LIFT_BASE_PATH}/python-code"
+
+# Test image and model settings
+# _LIFT_TEST_IMG_NAME="img1"
+# _LIFT_TEST_IMG_NAME="rect_001_max"
+_LIFT_TEST_IMG_NAME="fountain"
+_LIFT_TEST_IMG="${_LIFT_BASE_PATH}/data/testimg/${_LIFT_TEST_IMG_NAME}.png"
+_LIFT_TEST_CONFIG="${_LIFT_BASE_PATH}/models/configs/picc-finetune-nopair.config"
+_LIFT_MODEL_DIR="${_LIFT_BASE_PATH}/models/picc-best/"
+
+# Output Settings
+_LIFT_RES_DIR="${_LIFT_BASE_PATH}/results"
+_LIFT_KP_FILE_NAME="${_LIFT_RES_DIR}/${_LIFT_TEST_IMG_NAME}_kp.txt"
+_LIFT_ORI_FILE_NAME="${_LIFT_RES_DIR}/${_LIFT_TEST_IMG_NAME}_ori.txt"
+_LIFT_DESC_FILE_NAME="${_LIFT_RES_DIR}/${_LIFT_TEST_IMG_NAME}_desc.h5"
+
+
+(cd $_LIFT_PYTHON_CODE_PATH; \
+ python compute_detector.py \
+	$_LIFT_TEST_CONFIG \
+	$_LIFT_TEST_IMG \
+	$_LIFT_KP_FILE_NAME \
+	$_LIFT_SAVE_PNG \
+	$_LIFT_USE_THEANO \
+	0 \
+	$_LIFT_MODEL_DIR \
+	$_LIFT_NUM_KEYPOINT \
+ )
+
+# (cd $_LIFT_PYTHON_CODE_PATH; \
+#  python compute_orientation.py \
+# 	$_LIFT_TEST_CONFIG \
+# 	$_LIFT_TEST_IMG \
+# 	$_LIFT_KP_FILE_NAME \
+# 	$_LIFT_ORI_FILE_NAME \
+# 	0 \
+# 	0 \
+# 	$_LIFT_MODEL_DIR
+# )
+
+# (cd $_LIFT_PYTHON_CODE_PATH; \
+#  python compute_descriptor.py \
+# 	$_LIFT_TEST_CONFIG \
+# 	$_LIFT_TEST_IMG \
+# 	$_LIFT_ORI_FILE_NAME \
+# 	$_LIFT_DESC_FILE_NAME \
+# 	0 \
+# 	0 \
+# 	$_LIFT_MODEL_DIR
+# )
+
diff --git a/tester.py b/tester.py
new file mode 100644
index 0000000..b552c65
--- /dev/null
+++ b/tester.py
@@ -0,0 +1,405 @@
+# tester.py ---
+#
+# Filename: tester.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 13:34:04 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+import time
+
+import cv2
+import h5py
+import numpy as np
+import tensorflow as tf
+
+from datasets.test import Dataset
+from networks.lift import Network
+from six.moves import xrange
+from utils import (IDX_ANGLE, XYZS2kpList, draw_XYZS_to_img, get_patch_size,
+                   get_ratio_scale, get_XYZS_from_res_list, restore_network,
+                   saveh5, saveKpListToTxt, update_affine)
+
+
+class Tester(object):
+    """The Tester Class
+
+    LATER: Clean up unecessary dictionaries
+    LATER: Make a superclass for Tester and Trainer
+
+    """
+
+    def __init__(self, config, rng):
+        self.config = config
+        self.rng = rng
+
+        # Open a tensorflow session. I like keeping things simple, so I don't
+        # use a supervisor. I'm just going to do everything manually. I also
+        # will just allow the gpu memory to grow
+        tfconfig = tf.ConfigProto()
+        tfconfig.gpu_options.allow_growth = True
+        self.sess = tf.Session(config=tfconfig)
+
+        # Create the dataset instance
+        self.dataset = Dataset(self.config, rng)
+        # Create the model instance
+        self.network = Network(self.sess, self.config, self.dataset)
+        # Make individual saver instances for each module.
+        self.saver = {}
+        self.best_val_loss = {}
+        self.best_step = {}
+        # Create the saver instance for both joint and the current subtask
+        for _key in ["joint", self.config.subtask]:
+            self.saver[_key] = tf.train.Saver(self.network.allparams[_key])
+
+        # We have everything ready. We finalize and initialie the network here.
+        self.sess.run(tf.global_variables_initializer())
+
+    def run(self):
+
+        subtask = self.config.subtask
+
+        # Load the network weights for the module of interest
+        print("-------------------------------------------------")
+        print(" Loading Trained Network ")
+        print("-------------------------------------------------")
+        # Try loading the joint version, and then fall back to the current task
+        # silently if failed.
+        try:
+            restore_res = restore_network(self, "joint")
+        except:
+            pass
+        if not restore_res:
+            restore_res = restore_network(self, subtask)
+        if not restore_res:
+            raise RuntimeError("Could not load network weights!")
+
+        # Run the appropriate compute function
+        print("-------------------------------------------------")
+        print(" Testing ")
+        print("-------------------------------------------------")
+
+        eval("self._compute_{}()".format(subtask))
+
+    def _compute_kp(self):
+        """Compute Keypoints.
+
+        LATER: Clean up code
+
+        """
+
+        total_time = 0.0
+
+        # Read image
+        image_color, image_gray, load_prep_time = self.dataset.load_image()
+
+        # check size
+        image_height = image_gray.shape[0]
+        image_width = image_gray.shape[1]
+
+        # Multiscale Testing
+        scl_intv = self.config.test_scl_intv
+        # min_scale_log2 = 1  # min scale = 2
+        # max_scale_log2 = 4  # max scale = 16
+        min_scale_log2 = self.config.test_min_scale_log2
+        max_scale_log2 = self.config.test_max_scale_log2
+        # Test starting with double scale if small image
+        min_hw = np.min(image_gray.shape[:2])
+        if min_hw <= 1600:
+            print("INFO: Testing double scale")
+            min_scale_log2 -= 1
+        # range of scales to check
+        num_division = (max_scale_log2 - min_scale_log2) * (scl_intv + 1) + 1
+        scales_to_test = 2**np.linspace(min_scale_log2, max_scale_log2,
+                                        num_division)
+
+        # convert scale to image resizes
+        resize_to_test = ((float(self.config.kp_input_size - 1) / 2.0) /
+                          (get_ratio_scale(self.config) * scales_to_test))
+
+        # check if resize is valid
+        min_hw_after_resize = resize_to_test * np.min(image_gray.shape[:2])
+        is_resize_valid = min_hw_after_resize > self.config.kp_filter_size + 1
+
+        # if there are invalid scales and resizes
+        if not np.prod(is_resize_valid):
+            # find first invalid
+            first_invalid = np.where(True - is_resize_valid)[0][0]
+
+            # remove scales from testing
+            scales_to_test = scales_to_test[:first_invalid]
+            resize_to_test = resize_to_test[:first_invalid]
+
+        print('resize to test is {}'.format(resize_to_test))
+        print('scales to test is {}'.format(scales_to_test))
+
+        # Run for each scale
+        test_res_list = []
+        for resize in resize_to_test:
+
+            # resize according to how we extracted patches when training
+            new_height = np.cast['int'](np.round(image_height * resize))
+            new_width = np.cast['int'](np.round(image_width * resize))
+            start_time = time.clock()
+            image = cv2.resize(image_gray, (new_width, new_height))
+            end_time = time.clock()
+            resize_time = (end_time - start_time) * 1000.0
+            print("Time taken to resize image is {}ms".format(
+                resize_time
+            ))
+            total_time += resize_time
+
+            # run test
+            # LATER: Compatibility with the previous implementations
+            start_time = time.clock()
+
+            # Run the network to get the scoremap (the valid region only)
+            scoremap = None
+            if self.config.test_kp_use_tensorflow:
+                scoremap = self.network.test(
+                    self.config.subtask,
+                    image.reshape(1, new_height, new_width, 1)
+                ).squeeze()
+            else:
+                # OpenCV Version
+                raise NotImplementedError(
+                    "TODO: Implement OpenCV Version")
+
+            end_time = time.clock()
+            compute_time = (end_time - start_time) * 1000.0
+            print("Time taken for image size {}"
+                  " is {} milliseconds".format(
+                      image.shape, compute_time))
+
+            total_time += compute_time
+
+            # pad invalid regions and add to list
+            start_time = time.clock()
+            test_res_list.append(
+                np.pad(scoremap, int((self.config.kp_filter_size - 1) / 2),
+                       mode='constant',
+                       constant_values=-np.inf)
+            )
+            end_time = time.clock()
+            pad_time = (end_time - start_time) * 1000.0
+            print("Time taken for padding and stacking is {} ms".format(
+                pad_time
+            ))
+            total_time += pad_time
+
+        # ------------------------------------------------------------------------
+        # Non-max suppresion and draw.
+
+        # The nonmax suppression implemented here is very very slow. Consider
+        # this as just a proof of concept implementation as of now.
+
+        # Standard nearby : nonmax will check approximately the same area as
+        # descriptor support region.
+        nearby = int(np.round(
+            (0.5 * (self.config.kp_input_size - 1.0) *
+             float(self.config.desc_input_size) /
+             float(get_patch_size(self.config)))
+        ))
+        fNearbyRatio = self.config.test_nearby_ratio
+        # Multiply by quarter to compensate
+        fNearbyRatio *= 0.25
+        nearby = int(np.round(nearby * fNearbyRatio))
+        nearby = max(nearby, 1)
+
+        nms_intv = self.config.test_nms_intv
+        edge_th = self.config.test_edge_th
+
+        print("Performing NMS")
+        start_time = time.clock()
+        res_list = test_res_list
+        XYZS = get_XYZS_from_res_list(
+            res_list, resize_to_test, scales_to_test, nearby, edge_th,
+            scl_intv, nms_intv, do_interpolation=True,
+        )
+        end_time = time.clock()
+        XYZS = XYZS[:self.config.test_num_keypoint]
+
+        # For debugging
+        # TODO: Remove below
+        draw_XYZS_to_img(XYZS, image_color, self.config.test_out_file + '.jpg')
+
+        nms_time = (end_time - start_time) * 1000.0
+        print("NMS time is {} ms".format(nms_time))
+        total_time += nms_time
+        print("Total time for detection is {} ms".format(total_time))
+        # if bPrintTime:
+        #     # Also print to a file by appending
+        #     with open("../timing-code/timing.txt", "a") as timing_file:
+        #         print("------ Keypoint Timing ------\n"
+        #               "NMS time is {} ms\n"
+        #               "Total time is {} ms\n".format(
+        #                   nms_time, total_time
+        #               ),
+        #               file=timing_file)
+
+        # # resize score to original image size
+        # res_list = [cv2.resize(score,
+        #                        (image_width, image_height),
+        #                        interpolation=cv2.INTER_NEAREST)
+        #             for score in test_res_list]
+        # # make as np array
+        # res_scores = np.asarray(res_list)
+        # with h5py.File('test/scores.h5', 'w') as score_file:
+        #     score_file['score'] = res_scores
+
+        # ------------------------------------------------------------------------
+        # Save as keypoint file to be used by the oxford thing
+        print("Turning into kp_list")
+        kp_list = XYZS2kpList(XYZS)  # note that this is already sorted
+
+        # ------------------------------------------------------------------------
+        # LATER: take care of the orientations somehow...
+        # # Also compute angles with the SIFT method, since the keypoint
+        # # component alone has no orientations.
+        # print("Recomputing Orientations")
+        # new_kp_list, _ = recomputeOrientation(image_gray, kp_list,
+        #                                       bSingleOrientation=True)
+
+        print("Saving to txt")
+        saveKpListToTxt(kp_list, None, self.config.test_out_file)
+
+    def _compute_ori(self):
+        """Compute Orientations """
+
+        total_time = 0.0
+
+        # Read image
+        start_time = time.clock()
+        cur_data = self.dataset.load_data()
+        end_time = time.clock()
+        load_time = (end_time - start_time) * 1000.0
+        print("Time taken to load patches is {} ms".format(
+            load_time
+        ))
+        total_time += load_time
+
+        # -------------------------------------------------------------------------
+        # Test using the test function
+        start_time = time.clock()
+        oris = self._test_multibatch(cur_data)
+        end_time = time.clock()
+        compute_time = (end_time - start_time) * 1000.0
+        print("Time taken to compute is {} ms".format(
+            compute_time
+        ))
+        total_time += compute_time
+
+        # update keypoints and save as new
+        start_time = time.clock()
+        kps = cur_data["kps"]
+        for idxkp in xrange(len(kps)):
+            kps[idxkp][IDX_ANGLE] = oris[idxkp] * 180.0 / np.pi % 360.0
+            kps[idxkp] = update_affine(kps[idxkp])
+        end_time = time.clock()
+        update_time = (end_time - start_time) * 1000.0
+        print("Time taken to update is {} ms".format(
+            update_time
+        ))
+        total_time += update_time
+        print("Total time for orientation is {} ms".format(total_time))
+
+        # save as new keypoints
+        saveKpListToTxt(
+            kps, self.config.test_kp_file, self.config.test_out_file)
+
+    def _compute_desc(self):
+        """Compute Descriptors """
+
+        total_time = 0.0
+
+        # Read image
+        start_time = time.clock()
+        cur_data = self.dataset.load_data()
+        end_time = time.clock()
+        load_time = (end_time - start_time) * 1000.0
+        print("Time taken to load patches is {} ms".format(
+            load_time
+        ))
+        total_time += load_time
+
+        # -------------------------------------------------------------------------
+        # Test using the test function
+        start_time = time.clock()
+        descs = self._test_multibatch(cur_data)
+        end_time = time.clock()
+        compute_time = (end_time - start_time) * 1000.0
+        print("Time taken to compute is {} ms".format(
+            compute_time
+        ))
+        total_time += compute_time
+        print("Total time for descriptor is {} ms".format(total_time))
+
+        # save as h5 file
+        save_dict = {}
+        save_dict['keypoints'] = cur_data["kps"]
+        save_dict['descriptors'] = descs
+
+        saveh5(save_dict, self.config.test_out_file)
+
+    def _test_multibatch(self, cur_data):
+        """A sub test routine.
+
+        We do this since the spatial transformer implementation in tensorflow
+        does not like undetermined batch sizes.
+
+        LATER: Bypass the spatial transformer...somehow
+        LATER: Fix the multibatch testing
+
+        """
+
+        subtask = self.config.subtask
+        batch_size = self.config.batch_size
+        num_patch = len(cur_data["patch"])
+        num_batch = int(np.ceil(float(num_patch) / float(batch_size)))
+        # Initialize the batch items
+        cur_batch = {}
+        for _key in cur_data:
+            cur_batch[_key] = np.zeros_like(cur_data[_key][:batch_size])
+
+        # Do muiltiple times
+        res = []
+        for _idx_batch in xrange(num_batch):
+            # start of the batch
+            bs = _idx_batch * batch_size
+            # end of the batch
+            be = min(num_patch, (_idx_batch + 1) * batch_size)
+            # number of elements in batch
+            bn = be - bs
+            for _key in cur_data:
+                cur_batch[_key][:bn] = cur_data[_key][bs:be]
+            cur_res = self.network.test(subtask, cur_batch).squeeze()[:bn]
+            # Append
+            res.append(cur_res)
+
+        return np.concatenate(res, axis=0)
+
+#
+# tester.py ends here
diff --git a/trainer.py b/trainer.py
new file mode 100644
index 0000000..f5f6d37
--- /dev/null
+++ b/trainer.py
@@ -0,0 +1,250 @@
+# trainer.py ---
+#
+# Filename: trainer.py
+# Description:
+# Author: Kwang Moo Yi, Lin Chen
+# Maintainer: Kwang Moo Yi
+# Created: ???
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+# Tue 27 Jun 15:04:34 CEST 2017, Kwang Moo Yi
+#
+# - Removed comments.
+# - Renamed Dataset import
+#
+#
+
+# Code:
+
+
+import os
+
+import numpy as np
+import tensorflow as tf
+from tqdm import trange
+
+from datasets.lift import Dataset
+from networks.lift import Network
+from six.moves import xrange
+from utils import get_hard_batch, restore_network, save_network
+
+
+class Trainer(object):
+    """The Trainer Class
+
+    LATER: Remove all unecessary "dictionarization"
+
+    """
+
+    def __init__(self, config, rng):
+        self.config = config
+        self.rng = rng
+
+        # Open a tensorflow session. I like keeping things simple, so I don't
+        # use a supervisor. I'm just going to do everything manually. I also
+        # will just allow the gpu memory to grow
+        tfconfig = tf.ConfigProto()
+        if self.config.usage > 0:
+            tfconfig.gpu_options.allow_growth = False
+            tfconfig.gpu_options.per_process_gpu_memory_fraction = \
+                self.config.usage
+        else:
+            tfconfig.gpu_options.allow_growth = True
+        self.sess = tf.Session(config=tfconfig)
+
+        # Create the dataset instance
+        self.dataset = Dataset(self.config, rng)
+        # import IPython
+        # IPython.embed()
+        # Create the model instance
+        self.network = Network(self.sess, self.config, self.dataset)
+        # Make individual saver instances and summary writers for each module
+        self.saver = {}
+        self.summary_writer = {}
+        self.best_val_loss = {}
+        self.best_step = {}
+        # Saver (only if there are params)
+        for _key in self.network.allparams:
+            if len(self.network.allparams[_key]) > 0:
+                with tf.variable_scope("saver-{}".format(_key)):
+                    self.saver[_key] = tf.train.Saver(
+                        self.network.allparams[_key])
+        # Summary Writer
+        self.summary_writer[self.config.subtask] = tf.summary.FileWriter(
+            os.path.join(self.config.logdir, self.config.subtask),
+            graph=self.sess.graph)
+        # validation loss
+        self.best_val_loss[self.config.subtask] = np.inf
+        # step for each module
+        self.best_step[self.config.subtask] = 0
+
+        # We have everything ready. We finalize and initialie the network here.
+        self.sess.run(tf.global_variables_initializer())
+
+        # Enable augmentations and/or force the use of the augmented set
+        self.use_aug_rot = 0
+        if self.config.augment_rotations:
+            self.use_aug_rot = 1
+        elif self.config.use_augmented_set:
+            self.use_aug_rot = -1
+
+    def run(self):
+        # For each module, check we have pre-trained modules and load them
+        print("-------------------------------------------------")
+        print(" Looking for previous results ")
+        print("-------------------------------------------------")
+        for _key in ["kp", "ori", "desc", "joint"]:
+            restore_network(self, _key)
+
+        print("-------------------------------------------------")
+        print(" Training ")
+        print("-------------------------------------------------")
+
+        subtask = self.config.subtask
+        batch_size = self.config.batch_size
+        for step in trange(int(self.best_step[subtask]),
+                           int(self.config.max_step),
+                           desc="Subtask = {}".format(subtask),
+                           ncols=self.config.tqdm_width):
+            # ----------------------------------------
+            # Forward pass: Note that we only compute the loss in the forward
+            # pass. We don't do summary writing or saving
+            fw_data = []
+            fw_loss = []
+            batches = self.hardmine_scheduler(self.config, step)
+            for num_cur in batches:
+                cur_data = self.dataset.next_batch(
+                    task="train",
+                    subtask=subtask,
+                    batch_size=num_cur,
+                    aug_rot=self.use_aug_rot)
+                cur_loss = self.network.forward(subtask, cur_data)
+                # Sanity check
+                if min(cur_loss) < 0:
+                    raise RuntimeError('Negative loss while mining?')
+                # Data may contain empty (zero-value) samples: set loss to zero
+                if num_cur < batch_size:
+                    cur_loss[num_cur - batch_size:] = 0
+                fw_data.append(cur_data)
+                fw_loss.append(cur_loss)
+            # Fill a single batch with hardest
+            if len(batches) > 1:
+                cur_data = get_hard_batch(fw_loss, fw_data)
+            # ----------------------------------------
+            # Backward pass: Note that the backward pass returns summary only
+            # when it is asked. Also, we manually keep note of step here, and
+            # not use the tensorflow version. This is to simplify the migration
+            # to another framework, if needed.
+            do_validation = step % self.config.validation_interval == 0
+            cur_summary = self.network.backward(
+                subtask, cur_data, provide_summary=do_validation)
+            if do_validation and cur_summary is not None:
+                # Make sure we have the summary data
+                assert cur_summary is not None
+                # Write training summary
+                self.summary_writer[subtask].add_summary(cur_summary, step)
+                # Do multiple rounds of validation
+                cur_val_loss = np.zeros(self.config.validation_rounds)
+                for _val_round in xrange(self.config.validation_rounds):
+                    # Fetch validation data
+                    cur_data = self.dataset.next_batch(
+                        task="valid",
+                        subtask=subtask,
+                        batch_size=batch_size,
+                        aug_rot=self.use_aug_rot)
+                    # Perform validation of the model using validation data
+                    cur_val_loss[_val_round] = self.network.validate(
+                        subtask, cur_data)
+                cur_val_loss = np.mean(cur_val_loss)
+                # Inject validation result to summary
+                summaries = [
+                    tf.Summary.Value(
+                        tag="validation/err-{}".format(subtask),
+                        simple_value=cur_val_loss,
+                    )
+                ]
+                self.summary_writer[subtask].add_summary(
+                    tf.Summary(value=summaries), step)
+                # Flush the writer
+                self.summary_writer[subtask].flush()
+
+                # TODO: Repeat without augmentation if necessary
+                # ...
+
+                if cur_val_loss < self.best_val_loss[subtask]:
+                    self.best_val_loss[subtask] = cur_val_loss
+                    self.best_step[subtask] = step
+                    save_network(self, subtask)
+
+    def hardmine_scheduler(self, config, step, recursion=True):
+        """The hard mining scheduler.
+
+        Modes ("--mining-sched"):
+            "none": no mining.
+            "step": increase one batch at a time.
+            "smooth": increase one sample at a time, filling the rest of the
+            batch with zeros if necessary.
+
+        Returns a list with the number of samples for every batch.
+        """
+
+        sched = config.mining_sched
+        if sched == 'none':
+            return [config.batch_size]
+        elif sched not in ['step', 'smooth']:
+            raise RuntimeError('Unknown scheduler')
+
+        # Nothing to do if mining_step is not defined
+        if config.mining_step <= 0:
+            return [config.batch_size]
+
+        max_batches = config.mining_ceil if config.mining_ceil > 0 else 32
+        num_samples = int(round(config.batch_size *
+                          (config.mining_base + step / config.mining_step)))
+        if num_samples > max_batches * config.batch_size:
+            # Limit has been reached
+            batches = [config.batch_size] * max_batches
+        else:
+            batches = [config.batch_size] * int(num_samples // config.batch_size)
+            # Do the remainder on the last batch
+            remainder = num_samples % config.batch_size
+            if remainder > 0:
+                # 'smooth': add remainder to the last batch
+                if sched == 'smooth':
+                    batches[-1] += remainder
+                # 'step': add a full batch when the remainder goes above 50%
+                elif sched == 'step' and remainder >= config.batch_size / 2:
+                    batches += [config.batch_size]
+
+        # Feedback
+        if recursion and step > 0:
+            prev = self.hardmine_scheduler(config, step - 1, recursion=False)
+            if sum(prev) < sum(batches):
+                print(('\n[{}] Mining: increasing number of samples: ' +
+                       '{} -> {} (batches: {} -> {}, last batch size: {})').format(
+                    config.subtask,
+                    sum(prev),
+                    sum(batches),
+                    len(prev),
+                    len(batches),
+                    batches[-1]))
+
+        return batches
+
+#
+# trainer.py ends here
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..0500b71
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1,45 @@
+# __init__.py ---
+#
+# Filename: __init__.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 15:33:06 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+from utils.config import *
+from utils.dump import *
+from utils.kp_tools import *
+from utils.layer import *
+from utils.network import *
+from utils.nms import *
+from utils.test import *
+from utils.tf import *
+from utils.train import *
+
+# Legacy module is not loaded automatically
+# from utils.legacy import *
+
+#
+# __init__.py ends here
diff --git a/utils/config.py b/utils/config.py
new file mode 100644
index 0000000..4f0a836
--- /dev/null
+++ b/utils/config.py
@@ -0,0 +1,68 @@
+# config.py ---
+#
+# Filename: config.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 15:39:25 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import numpy as np
+
+
+def get_ratio_scale(config):
+    """Calculate ratio_scale from other configs"""
+
+    kp_input_size = config.kp_input_size
+    kp_base_scale = config.kp_base_scale
+    ratio_scale = (float(kp_input_size) / 2.0) / kp_base_scale
+
+    return ratio_scale
+
+
+def get_patch_size_no_aug(config):
+    """Determine large patch size without rotation augmentations"""
+
+    desc_input_size = config.desc_input_size
+    desc_support_ratio = config.desc_support_ratio
+
+    ratio_scale = get_ratio_scale(config)
+    patch_size = np.round(
+        float(desc_input_size) * ratio_scale / desc_support_ratio)
+
+    return patch_size
+
+
+def get_patch_size(config):
+    """Get the large patch size from other configs"""
+
+    patch_size = get_patch_size_no_aug(config)
+    if config.use_augmented_set:
+        patch_size = np.ceil(np.sqrt(2) * patch_size)
+
+    return patch_size
+
+
+#
+# config.py ends here
diff --git a/utils/dump.py b/utils/dump.py
new file mode 100644
index 0000000..bcc48e4
--- /dev/null
+++ b/utils/dump.py
@@ -0,0 +1,199 @@
+# dump.py ---
+#
+# Filename: dump.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 15:36:36 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import os
+
+import h5py
+import tensorflow as tf
+
+from utils.legacy import load_legacy_network
+
+# Some constant strings
+best_val_loss_filename = "best_val_loss.h5"
+best_step_filename = "step.h5"
+
+
+def saveh5(dict_to_dump, dump_file_full_name):
+    ''' Saves a dictionary as h5 file '''
+
+    with h5py.File(dump_file_full_name, 'w') as h5file:
+        if isinstance(dict_to_dump, list):
+            for i, d in enumerate(dict_to_dump):
+                newdict = {'dict' + str(i): d}
+                writeh5(newdict, h5file)
+        else:
+            writeh5(dict_to_dump, h5file)
+
+
+def writeh5(dict_to_dump, h5node):
+    ''' Recursive function to write dictionary to h5 nodes '''
+
+    for _key in dict_to_dump.keys():
+        if isinstance(dict_to_dump[_key], dict):
+            h5node.create_group(_key)
+            cur_grp = h5node[_key]
+            writeh5(dict_to_dump[_key], cur_grp)
+        else:
+            h5node[_key] = dict_to_dump[_key]
+
+
+def loadh5(dump_file_full_name):
+    ''' Loads a h5 file as dictionary '''
+
+    with h5py.File(dump_file_full_name, 'r') as h5file:
+        dict_from_file = readh5(h5file)
+
+    return dict_from_file
+
+
+def readh5(h5node):
+    ''' Recursive function to read h5 nodes as dictionary '''
+
+    dict_from_file = {}
+    for _key in h5node.keys():
+        if isinstance(h5node[_key], h5py._hl.group.Group):
+            dict_from_file[_key] = readh5(h5node[_key])
+        else:
+            dict_from_file[_key] = h5node[_key].value
+
+    return dict_from_file
+
+
+def save_network(supervisor, subtask, verbose=True):
+    """Save the current training status"""
+
+    # Skip if there's no saver of this subtask
+    if subtask not in supervisor.saver:
+        return
+
+    cur_logdir = os.path.join(supervisor.config.logdir, subtask)
+    # Create save directory if it does not exist
+    if verbose:
+        print("")
+        print("[{}] Checking if save directory exists in {}"
+              "".format(subtask, cur_logdir))
+    if not os.path.exists(cur_logdir):
+        os.makedirs(cur_logdir)
+    # Save the model
+    supervisor.saver[subtask].save(
+        supervisor.sess,
+        os.path.join(cur_logdir, "network")
+    )
+    if verbose:
+        print("[{}] Saved model at {}"
+              "".format(subtask, cur_logdir))
+    # Save mean std
+    saveh5(supervisor.network.mean, os.path.join(cur_logdir, "mean.h5"))
+    saveh5(supervisor.network.std, os.path.join(cur_logdir, "std.h5"))
+    if verbose:
+        print("[{}] Saved input normalization at {}"
+              "".format(subtask, cur_logdir))
+    # Save the validation loss
+    saveh5({subtask: supervisor.best_val_loss[subtask]},
+           os.path.join(cur_logdir, best_val_loss_filename))
+    if verbose:
+        print("[{}] Saved best validation at {}"
+              "".format(subtask, cur_logdir))
+    # Save the step
+    saveh5({subtask: supervisor.best_step[subtask]},
+           os.path.join(cur_logdir, best_step_filename))
+    if verbose:
+        print("[{}] Saved best step at {}"
+              "".format(subtask, cur_logdir))
+
+    # We also save the network metadata here
+    supervisor.saver[subtask].export_meta_graph(
+        os.path.join(cur_logdir, "network.meta"))
+
+
+def restore_network(supervisor, subtask):
+    """Restore training status"""
+
+    # Skip if there's no saver of this subtask
+    if subtask not in supervisor.saver:
+        return False
+
+    is_loaded = False
+
+    # Check if pretrain weight file is specified
+    predir = getattr(supervisor.config, "pretrained_{}".format(subtask))
+    # Try loading the old weights
+    is_loaded += load_legacy_network(supervisor, subtask, predir)
+    # Try loading the tensorflow weights
+    is_loaded += load_network(supervisor, subtask, predir)
+
+    # Load network using tensorflow saver
+    logdir = os.path.join(supervisor.config.logdir, subtask)
+    is_loaded += load_network(supervisor, subtask, logdir)
+
+    return is_loaded
+
+
+def load_network(supervisor, subtask, load_dir):
+    """Load function for our new framework"""
+
+    print("[{}] Checking if previous Tensorflow run exists in {}"
+          "".format(subtask, load_dir))
+    latest_checkpoint = tf.train.latest_checkpoint(load_dir)
+    if latest_checkpoint is not None:
+        # Load parameters
+        supervisor.saver[subtask].restore(
+            supervisor.sess,
+            latest_checkpoint
+        )
+        print("[{}] Loaded previously trained weights".format(subtask))
+        # Save mean std (falls back to default if non-existent)
+        if os.path.exists(os.path.join(load_dir, "mean.h5")):
+            supervisor.network.mean = loadh5(os.path.join(load_dir, "mean.h5"))
+            supervisor.network.std = loadh5(os.path.join(load_dir, "std.h5"))
+            print("[{}] Loaded input normalizers".format(subtask))
+        # Load best validation result
+        supervisor.best_val_loss[subtask] = loadh5(
+            os.path.join(load_dir, best_val_loss_filename)
+        )[subtask]
+        print("[{}] Loaded best validation result = {}".format(
+            subtask, supervisor.best_val_loss[subtask]))
+        # Load best validation result
+        supervisor.best_step[subtask] = loadh5(
+            os.path.join(load_dir, best_step_filename)
+        )[subtask]
+        print("[{}] Loaded best step = {}".format(
+            subtask, supervisor.best_step[subtask]))
+
+        return True
+
+    else:
+        print("[{}] No previous Tensorflow result".format(subtask))
+
+        return False
+
+
+#
+# dump.py ends here
diff --git a/utils/kp_tools.py b/utils/kp_tools.py
new file mode 100644
index 0000000..0f92163
--- /dev/null
+++ b/utils/kp_tools.py
@@ -0,0 +1,682 @@
+# kp_tools.py ---
+#
+# Filename: kp_tools.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Sat Feb 20 10:11:28 2016 (+0100)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+
+from __future__ import print_function
+
+import cv2
+import numpy as np
+import scipy.ndimage
+from scipy.linalg import lu_factor, lu_solve
+
+from six.moves import xrange
+
+# Keypoint List Structure Index Info
+IDX_X, IDX_Y, IDX_SIZE, IDX_ANGLE, IDX_RESPONSE, IDX_OCTAVE = (
+    0, 1, 2, 3, 4, 5)  # , IDX_CLASSID not used
+IDX_a, IDX_b, IDX_c = (6, 7, 8)
+# NOTE the row-major colon-major adaptation here
+IDX_A0, IDX_A2, IDX_A1, IDX_A3 = (9, 10, 11, 12)
+# # IDX_CLASSID for KAZE
+# IDX_CLASSID = 13
+
+KP_LIST_LEN = 13
+
+# Note that the element in the IDX_SIZE field will be scale, not the opencv
+# keypoint's size. As open cv uses diameters, we need to multiply this with two
+# to get the opencv size.
+
+# ------------------------------------------------------------------------
+# Functions for Swapping between Opencv Kp and Our Kp List
+# ------------------------------------------------------------------------
+
+
+def kp_list_2_opencv_kp_list(kp_list):
+
+    opencv_kp_list = []
+    for kp in kp_list:
+        opencv_kp = cv2.KeyPoint(x=kp[IDX_X],
+                                 y=kp[IDX_Y],
+                                 _size=kp[IDX_SIZE] * 2.0,
+                                 _angle=kp[IDX_ANGLE],
+                                 _response=kp[IDX_RESPONSE],
+                                 _octave=np.int32(kp[IDX_OCTAVE]),
+                                 # _class_id=np.int32(kp[IDX_CLASSID])
+                                 )
+        opencv_kp_list += [opencv_kp]
+
+    return opencv_kp_list
+
+
+def opencv_kp_list_2_kp_list(opencv_kp_list):
+
+    # IMPORTANT: make sure this part corresponds to the one in
+    # loadKpListFromTxt
+
+    kp_list = []
+    for opencv_kp in opencv_kp_list:
+        kp = np.zeros((KP_LIST_LEN, ))
+        kp[IDX_X] = opencv_kp.pt[0]
+        kp[IDX_Y] = opencv_kp.pt[1]
+        kp[IDX_SIZE] = opencv_kp.size * 0.5
+        kp[IDX_ANGLE] = opencv_kp.angle
+        kp[IDX_RESPONSE] = opencv_kp.response
+        kp[IDX_OCTAVE] = opencv_kp.octave
+        # Compute a,b,c for vgg affine
+        kp[IDX_a] = 1. / (kp[IDX_SIZE]**2)
+        kp[IDX_b] = 0.
+        kp[IDX_c] = 1. / (kp[IDX_SIZE]**2)
+        # Compute A0, A1, A2, A3 and update
+        kp = update_affine(kp)
+        # kp[IDX_CLASSID] = opencv_kp.class_id
+
+        kp_list += [kp]
+
+    return kp_list
+
+
+def rescale_circ_kp(kp, fRescale):
+
+    kp[IDX_SIZE] *= fRescale
+
+    return update_affine(update_abc_from_scale(kp))
+
+
+def update_abc_from_scale(kp):
+
+    kp[IDX_a] = 1. / (kp[IDX_SIZE]**2)
+    kp[IDX_b] = 0.
+    kp[IDX_c] = 1. / (kp[IDX_SIZE]**2)
+
+    return kp
+
+
+def update_affine(kp):
+    """Returns an updated version of the keypoint.
+
+    Note
+    ----
+    This function should be applied only to individual keypoints, not a list.
+
+    """
+
+    # Compute A0, A1, A2, A3
+    S = np.asarray([[kp[IDX_a], kp[IDX_b]], [kp[IDX_b], kp[IDX_c]]])
+    invS = np.linalg.inv(S)
+    a = np.sqrt(invS[0, 0])
+    b = invS[0, 1] / max(a, 1e-18)
+    A = np.asarray([[a, 0], [b, np.sqrt(max(invS[1, 1] - b**2, 0))]])
+
+    # We need to rotate first!
+    cos_val = np.cos(np.deg2rad(kp[IDX_ANGLE]))
+    sin_val = np.sin(np.deg2rad(kp[IDX_ANGLE]))
+    R = np.asarray([[cos_val, -sin_val], [sin_val, cos_val]])
+
+    A = np.dot(A, R)
+
+    kp[IDX_A0] = A[0, 0]
+    kp[IDX_A1] = A[0, 1]
+    kp[IDX_A2] = A[1, 0]
+    kp[IDX_A3] = A[1, 1]
+
+    return kp
+
+
+# ------------------------------------------------------------------------
+# Keypoint I/O operations
+# ------------------------------------------------------------------------
+
+def writeFeature2OxFile(file_name, kp_list, desc_list):
+
+    # kp_list : len(kp_list) == number of keypoints, each element is a
+    #           opencv Keypoint object
+    # desc_list : kp_list.shape == [number of keypoints, dimension],
+    # descriptors for each keypoint
+
+    # pt, scale, orientation, desc):
+    # write the features in the vlfeat format
+
+    assert len(kp_list) == desc_list.shape[0]
+
+    fp = open(file_name, 'w')  # open the file object
+
+    # write the headings
+    # dimension of the descriptor (SIFT is 128)
+    fp.write('%d\n' % (desc_list.shape[1]))
+    # number of keypoints to be evaluated
+    fp.write('%d\n' % (desc_list.shape[0]))
+
+    # write the keypoint information
+    for idxKp in xrange(len(kp_list)):
+        u = kp_list[idxKp][IDX_X]
+        v = kp_list[idxKp][IDX_Y]
+        a = kp_list[idxKp][IDX_a]
+        b = kp_list[idxKp][IDX_b]
+        c = kp_list[idxKp][IDX_c]
+        desc = desc_list[idxKp, :]
+
+        # turn each kp into vlfeat format
+        writestr = '%f %f %f %f %f' % (u, v, a, b,
+                                       c) + " " + " ".join(desc.astype(str))
+
+        fp.write(writestr + '\n')
+
+    fp.close()  # close the file object
+
+
+def loadKpListFromTxt(kp_file_name):
+
+    # Open keypoint file for read
+    kp_file = open(kp_file_name, 'rb')
+
+    # skip the first two lines
+    kp_line = kp_file.readline()
+    kp_line = kp_file.readline()
+
+    kp_list = []
+    num_elem = -1
+    while True:
+        # read a line from file
+        kp_line = kp_file.readline()
+        # check EOF
+        if not kp_line:
+            break
+        # split read information
+        kp_info = kp_line.split()
+        parsed_kp_info = []
+        for idx in xrange(len(kp_info)):
+            parsed_kp_info += [float(kp_info[idx])]
+        parsed_kp_info = np.asarray(parsed_kp_info)
+
+        if num_elem == -1:
+            num_elem = len(parsed_kp_info)
+        else:
+            assert num_elem == len(parsed_kp_info)
+
+        # IMPORTANT: make sure this part corresponds to the one in
+        # opencv_kp_list_2_kp_list
+
+        # check if we have all the kp list info
+        if len(parsed_kp_info) == 6:  # if we only have opencv info
+            # Compute a,b,c for vgg affine
+            a = 1. / (parsed_kp_info[IDX_SIZE]**2)
+            b = 0.
+            c = 1. / (parsed_kp_info[IDX_SIZE]**2)
+            parsed_kp_info = np.concatenate((parsed_kp_info, [a, b, c]))
+
+        if len(parsed_kp_info) == 9:  # if we don't have the Affine warp
+            parsed_kp_info = np.concatenate((parsed_kp_info, np.zeros((4, ))))
+            parsed_kp_info = update_affine(parsed_kp_info)
+
+        # if len(parsed_kp_info) == 13:
+        #     # add dummy class id
+        #     parsed_kp_info = np.concatenate((parsed_kp_info, [0]))
+
+        # make sure we have everything!
+        assert len(parsed_kp_info) == KP_LIST_LEN
+
+        kp_list += [parsed_kp_info]
+
+    # Close keypoint file
+    kp_file.close()
+
+    return kp_list
+
+
+def saveKpListToTxt(kp_list, orig_kp_file_name, kp_file_name):
+
+    # first line KP_LIST_LEN to indicate we have the full
+    kp_line = str(KP_LIST_LEN) + '\n'
+
+    # Open keypoint file for write
+    kp_file = open(kp_file_name, 'w')
+
+    # write the first line
+    kp_file.write(kp_line)
+
+    # write the number of kp in second line
+    kp_file.write('{}\n'.format(len(kp_list)))
+
+    for kp in kp_list:
+
+        # Make sure we have all info for kp
+        assert len(kp) == KP_LIST_LEN
+
+        # Form the string to write
+        write_string = ""
+        for kp_elem, _i in zip(kp, range(len(kp))):
+            # if _i == IDX_OCTAVE or _i == IDX_CLASSID:
+            if _i == IDX_OCTAVE:  # in case of the octave
+                write_string += str(np.int32(kp_elem)) + " "
+            else:
+                write_string += str(kp_elem) + " "
+        write_string += "\n"
+
+        # Write the string
+        kp_file.write(write_string)
+
+    # Close keypoint file
+    kp_file.close()
+
+
+def XYZS2kpList(XYZS):
+
+    kp_list = [None] * XYZS.shape[0]
+    for idxKp in xrange(XYZS.shape[0]):
+
+        kp = np.zeros((KP_LIST_LEN, ))
+        kp[IDX_X] = XYZS[idxKp, 0]
+        kp[IDX_Y] = XYZS[idxKp, 1]
+        kp[IDX_SIZE] = XYZS[idxKp, 2]
+        kp[IDX_ANGLE] = 0
+        kp[IDX_RESPONSE] = XYZS[idxKp, 3]
+
+        # computing the octave should be dealt with caution. We compute in the
+        # SIFT way. The SIFT code of openCV computes scale in the following
+        # way.
+        # >>> scale = 1.6 * 2**((layer+xi) / 3) * 2**octave
+        # where octave is packed by octave = layer << 8 + octv
+        layer_n_octv = np.log2(kp[IDX_SIZE] / 1.6)
+        layer_n_octv = max(0, layer_n_octv)  # TODO: FOR NOW LET'S JUST DO THIS
+        octv = int(np.floor(layer_n_octv))
+        layer_n_xi = (layer_n_octv - np.floor(layer_n_octv)) * 3.0
+        layer = int(np.floor(layer_n_xi))
+        xi = layer_n_xi - np.floor(layer_n_xi)
+        # make sure we have the layer correctly by checking that it is smaller
+        # than 3
+        assert layer < 3
+        # pack octave in the same way as openCV
+        # kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16);
+        # also remember
+        # octave = octave < 128 ? octave : (-128 | octave);
+        # which means
+        # kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255)
+        # so if octave is negative & 255 will give otcv >= 128 later...
+        octv = octv & 255
+        octave = octv + (layer << 8) + (int(np.round((xi + 0.5) * 255.)) << 16)
+        kp[IDX_OCTAVE] = octave
+
+        # Compute a,b,c for vgg affine
+        kp[IDX_a] = 1. / (kp[IDX_SIZE]**2)
+        kp[IDX_b] = 0.
+        kp[IDX_c] = 1. / (kp[IDX_SIZE]**2)
+        # Compute A0, A1, A2, A3 and update
+        kp = update_affine(kp)
+        # kp[IDX_CLASSID] = 0
+
+        kp_list[idxKp] = kp
+
+    return kp_list
+
+
+# ------------------------------------------------------------------------
+# NonMaxSuppresion related
+# ------------------------------------------------------------------------
+
+
+def get_XYZS_from_res_list(res_list, resize_to_test, scales_to_test, nearby=1,
+                           edge_th=0, scl_intv=2, nms_intv=1,
+                           do_interpolation=False, fScaleEdgeness=0.0):
+    # NMS
+    nms_res = nonMaxSuppression(res_list, nearby=nearby,
+                                scl_intv=scl_intv, nms_intv=nms_intv)
+
+    XYZS = get_subpixel_XYZS(res_list, nms_res, resize_to_test,
+                             scales_to_test, edge_th, do_interpolation,
+                             fScaleEdgeness)
+    # sort by score
+    XYZS = XYZS[np.argsort(XYZS[:, 3])[::-1]]
+
+    return XYZS
+
+
+def get_subpixel_XYZS(score_list, nms_list, resize_to_test,
+                      scales_to_test, edge_th, do_interpolation,
+                      fScaleEdgeness):
+
+    log_scales = np.log2(scales_to_test)
+    log_scale_step = ((np.max(log_scales) - np.min(log_scales)) /
+                      (len(scales_to_test) - 1.0))
+
+    X = [()] * len(nms_list)
+    Y = [()] * len(nms_list)
+    Z = [()] * len(nms_list)
+    S = [()] * len(nms_list)
+    for idxScale in xrange(len(nms_list)):
+        nms = nms_list[idxScale]
+
+        pts = np.where(nms)
+        if len(pts[0]) == 0:
+            continue
+
+        assert idxScale > 0 and idxScale < len(nms_list) - 1
+
+        # compute ratio for coordinate conversion
+        fRatioUp = (
+            (np.asarray(score_list[idxScale + 1].shape, dtype='float') - 1.0) /
+            (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0)
+        ).reshape([2, -1])
+        fRatioDown = (
+            (np.asarray(score_list[idxScale - 1].shape, dtype='float') - 1.0) /
+            (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0)
+        ).reshape([2, -1])
+
+        # the conversion function
+        def at(dx, dy, ds):
+            if not isinstance(dx, np.ndarray):
+                dx = np.ones(len(pts[0]),) * dx
+            if not isinstance(dy, np.ndarray):
+                dy = np.ones(len(pts[0]),) * dy
+            if not isinstance(ds, np.ndarray):
+                ds = np.ones(len(pts[0]),) * ds
+            new_pts = (pts[0] + dy, pts[1] + dx)
+            ds = np.round(ds).astype(int)
+            fRatio = ((ds == 0).reshape([1, -1]) * 1.0 +
+                      (ds == -1).reshape([1, -1]) * fRatioDown +
+                      (ds == 1).reshape([1, -1]) * fRatioUp)
+            assert np.max(ds) <= 1 and np.min(ds) >= -1
+            new_pts = tuple([np.round(v * r).astype(int)
+                             for v, r in zip(new_pts, fRatio)])
+            scores_to_return = np.asarray([
+                score_list[idxScale + _ds][_y, _x]
+                for _ds, _x, _y in zip(
+                    ds, new_pts[1], new_pts[0]
+                )
+            ])
+            return scores_to_return
+
+        # compute the gradient
+        Dx = 0.5 * (at(+1, 0, 0) - at(-1, 0, 0))
+        Dy = 0.5 * (at(0, +1, 0) - at(0, -1, 0))
+        Ds = 0.5 * (at(0, 0, +1) - at(0, 0, -1))
+
+        # compute the Hessian
+        Dxx = (at(+1, 0, 0) + at(-1, 0, 0) - 2.0 * at(0, 0, 0))
+        Dyy = (at(0, +1, 0) + at(0, -1, 0) - 2.0 * at(0, 0, 0))
+        Dss = (at(0, 0, +1) + at(0, 0, -1) - 2.0 * at(0, 0, 0))
+
+        Dxy = 0.25 * (at(+1, +1, 0) + at(-1, -1, 0) -
+                      at(-1, +1, 0) - at(+1, -1, 0))
+        Dxs = 0.25 * (at(+1, 0, +1) + at(-1, 0, -1) -
+                      at(-1, 0, +1) - at(+1, 0, -1))
+        Dys = 0.25 * (at(0, +1, +1) + at(0, -1, -1) -
+                      at(0, -1, +1) - at(0, +1, -1))
+
+        # filter out all keypoints which we have inf
+        is_good = (np.isfinite(Dx) * np.isfinite(Dy) * np.isfinite(Ds) *
+                   np.isfinite(Dxx) * np.isfinite(Dyy) * np.isfinite(Dss) *
+                   np.isfinite(Dxy) * np.isfinite(Dxs) * np.isfinite(Dys))
+        Dx = Dx[is_good]
+        Dy = Dy[is_good]
+        Ds = Ds[is_good]
+        Dxx = Dxx[is_good]
+        Dyy = Dyy[is_good]
+        Dss = Dss[is_good]
+        Dxy = Dxy[is_good]
+        Dxs = Dxs[is_good]
+        Dys = Dys[is_good]
+        pts = tuple([v[is_good] for v in pts])
+        # check if empty
+        if len(pts[0]) == 0:
+            continue
+
+        # filter out all keypoints which are on edges
+        if edge_th > 0:
+
+            # # re-compute the Hessian
+            # Dxx = (at(b[:, 0] + 1, b[:, 1], b[:, 2]) +
+            #        at(b[:, 0] - 1, b[:, 1], b[:, 2]) -
+            #        2.0 * at(b[:, 0], b[:, 1], b[:, 2]))
+            # Dyy = (at(b[:, 0], b[:, 1] + 1, b[:, 2]) +
+            #        at(b[:, 0], b[:, 1] - 1, b[:, 2]) -
+            #        2.0 * at(b[:, 0], b[:, 1], b[:, 2]))
+
+            # Dxy = 0.25 * (at(b[:, 0] + 1, b[:, 1] + 1, b[:, 2]) +
+            #               at(b[:, 0] - 1, b[:, 1] - 1, b[:, 2]) -
+            #               at(b[:, 0] - 1, b[:, 1] + 1, b[:, 2]) -
+            #               at(b[:, 0] + 1, b[:, 1] - 1, b[:, 2]))
+
+            # H = np.asarray([[Dxx, Dxy, Dxs],
+            #                 [Dxy, Dyy, Dys],
+            #                 [Dxs, Dys, Dss]]).transpose([2, 0, 1])
+
+            edge_score = (Dxx + Dyy) * (Dxx + Dyy) / (Dxx * Dyy - Dxy * Dxy)
+            is_good = ((edge_score >= 0) *
+                       (edge_score < (edge_th + 1.0)**2 / edge_th))
+
+            if fScaleEdgeness > 0:
+                is_good = is_good * (
+                    abs(Dss) > fScaleEdgeness
+                )
+
+            Dx = Dx[is_good]
+            Dy = Dy[is_good]
+            Ds = Ds[is_good]
+            Dxx = Dxx[is_good]
+            Dyy = Dyy[is_good]
+            Dss = Dss[is_good]
+            Dxy = Dxy[is_good]
+            Dxs = Dxs[is_good]
+            Dys = Dys[is_good]
+            pts = tuple([v[is_good] for v in pts])
+            # check if empty
+            if len(pts[0]) == 0:
+                continue
+
+        b = np.zeros((len(pts[0]), 3))
+        if do_interpolation:
+            # from VLFEAT
+
+            # solve linear system
+            A = np.asarray([[Dxx, Dxy, Dxs],
+                            [Dxy, Dyy, Dys],
+                            [Dxs, Dys, Dss]]).transpose([2, 0, 1])
+
+            b = np.asarray([-Dx, -Dy, -Ds]).transpose([1, 0])
+
+            b_solved = np.zeros_like(b)
+            for idxPt in xrange(len(A)):
+                b_solved[idxPt] = lu_solve(lu_factor(A[idxPt]), b[idxPt])
+
+            b = b_solved
+
+        # throw away the ones with bad subpixel localizatino
+        is_good = ((abs(b[:, 0]) < 1.5) * (abs(b[:, 1]) < 1.5) *
+                   (abs(b[:, 2]) < 1.5))
+        b = b[is_good]
+        pts = tuple([v[is_good] for v in pts])
+        # check if empty
+        if len(pts[0]) == 0:
+            continue
+
+        x = pts[1] + b[:, 0]
+        y = pts[0] + b[:, 1]
+        log_ds = b[:, 2]
+
+        S[idxScale] = at(b[:, 0], b[:, 1], b[:, 2])
+        X[idxScale] = x / resize_to_test[idxScale]
+        Y[idxScale] = y / resize_to_test[idxScale]
+        Z[idxScale] = scales_to_test[idxScale] * 2.0**(log_ds * log_scale_step)
+
+    X = np.concatenate(X)
+    Y = np.concatenate(Y)
+    Z = np.concatenate(Z)
+    S = np.concatenate(S)
+
+    XYZS = np.concatenate([X.reshape([-1, 1]),
+                           Y.reshape([-1, 1]),
+                           Z.reshape([-1, 1]),
+                           S.reshape([-1, 1])],
+                          axis=1)
+
+    return XYZS
+
+
+def nonMaxSuppression(score_img_or_list, nearby=1, scl_intv=2, nms_intv=1):
+    """ Performs Non Maximum Suppression.
+
+    Parameters
+    ----------
+    score_img_or_list: nparray or list
+        WRITEME
+
+    nearby: int
+        Size of the neighborhood.
+
+    scl_intv: int
+        How many levels we have between half scale.
+
+    nms_intv: int
+        How many levels we use for scale space nms.
+
+    """
+
+    filter_size = (nearby * 2 + 1,) * 2
+
+    if isinstance(score_img_or_list, list):
+        bis2d = False
+    else:
+        bis2d = True
+
+    if bis2d:
+        score = score_img_or_list
+        # max score in region
+        max_score = scipy.ndimage.filters.maximum_filter(
+            score, filter_size, mode='constant', cval=-np.inf
+        )
+        # second score in region
+        second_score = scipy.ndimage.filters.rank_filter(
+            score, -2, filter_size, mode='constant', cval=-np.inf
+        )
+        # min score in region to check infs
+        min_score = scipy.ndimage.filters.minimum_filter(
+            score, filter_size, mode='constant', cval=-np.inf
+        )
+        nonmax_mask_or_list = ((score == max_score) *
+                               (max_score > second_score) *
+                               np.isfinite(min_score))
+
+    else:
+
+        max2d_list = [
+            scipy.ndimage.filters.maximum_filter(
+                score, filter_size, mode='constant', cval=-np.inf
+            )
+            for score in score_img_or_list
+        ]
+
+        second2d_list = [
+            scipy.ndimage.filters.rank_filter(
+                score, -2, filter_size, mode='constant', cval=-np.inf
+            )
+            for score in score_img_or_list
+        ]
+
+        min2d_list = [
+            scipy.ndimage.filters.minimum_filter(
+                score, filter_size, mode='constant', cval=-np.inf
+            )
+            for score in score_img_or_list
+        ]
+
+        nonmax2d_list = [(score == max_score) * (max_score > second_score) *
+                         np.isfinite(min_score)
+                         for score, max_score, second_score, min_score in
+                         zip(score_img_or_list,
+                             max2d_list,
+                             second2d_list,
+                             min2d_list)
+                         ]
+
+        nonmax_mask_or_list = [None] * len(nonmax2d_list)
+        for idxScale in xrange(len(nonmax2d_list)):
+
+            nonmax2d = nonmax2d_list[idxScale]
+            max2d = max2d_list[idxScale]
+
+            # prep output
+            nonmax_mask = np.zeros_like(nonmax2d)
+
+            # get coordinates of the local max positions of nonmax2d
+            coord2d_max = np.where(nonmax2d)
+            # print("2d nonmax = {} kp".format(len(coord2d_max[0])))
+
+            # range of other scales to look at
+            # scl_diffs = np.arange(-scl_intv, scl_intv + 1)
+            # scl_diffs = np.arange(-1, 1 + 1)
+            scl_diffs = np.arange(-nms_intv, nms_intv + 1)
+            scl_diffs = scl_diffs[scl_diffs != 0]
+
+            # skip if we don't have the complete set
+            if (idxScale + np.min(scl_diffs) < 0 or
+                    idxScale + np.max(scl_diffs) > len(nonmax2d_list) - 1):
+                continue
+
+            # Test on multiple scales to see if it is scalespace local max
+            for scl_diff in scl_diffs:
+
+                scl_to_compare = idxScale + scl_diff
+
+                # look at the other scales max
+                max2d_other = max2d_list[scl_to_compare]
+                # compute ratio for coordinate conversion
+                fRatio \
+                    = (np.asarray(max2d_other.shape, dtype='float') - 1.0) \
+                    / (np.asarray(nonmax2d.shape, dtype='float') - 1.0)
+                # get indices for lower layer
+                coord_other = tuple([np.round(v * r).astype(int)
+                                     for v, r in zip(coord2d_max, fRatio)])
+                # find good points which should survive
+                idxGood = np.where((max2d[coord2d_max] >
+                                    max2d_other[coord_other]) *
+                                   np.isfinite(max2d_other[coord_other])
+                                   )
+
+                # copy only the ones that are good
+                coord2d_max = tuple([v[idxGood] for v in coord2d_max])
+
+            # mark surviving points
+            nonmax_mask[coord2d_max] = 1.0
+
+            # special case when we are asked with single item in list
+            if len(nonmax2d_list) == 1:
+                # get coordinates of the local max positions of nonmax2d
+                coord2d_max = np.where(nonmax2d)
+                # mark surviving points
+                nonmax_mask[coord2d_max] = 1.0
+
+            # add to list
+            nonmax_mask_or_list[idxScale] = nonmax_mask
+
+    return nonmax_mask_or_list
+
+
+#
+# kp_tools.py ends here
diff --git a/utils/layer.py b/utils/layer.py
new file mode 100644
index 0000000..b864308
--- /dev/null
+++ b/utils/layer.py
@@ -0,0 +1,85 @@
+# layer.py ---
+#
+# Filename: layer.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 15:37:55 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import numpy as np
+import tensorflow as tf
+
+
+def get_W_b_conv2d(ksize, fanin, fanout):
+    W = tf.get_variable(
+        name="weights",
+        shape=[ksize, ksize, fanin, fanout],
+        # initializer=tf.random_normal_initializer(2e-2),
+        initializer=tf.truncated_normal_initializer(
+            stddev=2.0 / (ksize * ksize * fanin)),
+    )
+    b = tf.get_variable(
+        name="biases",
+        shape=[fanout],
+        initializer=tf.constant_initializer(0.0),
+    )
+
+    return W, b
+
+
+def get_W_b_fc(fanin, fanout):
+    W = tf.get_variable(
+        name="weights",
+        shape=[fanin, fanout],
+        # initializer=tf.random_normal_initializer(2e-2),
+        initializer=tf.truncated_normal_initializer(stddev=2.0 / fanin),
+    )
+    b = tf.get_variable(
+        name="biases",
+        shape=[fanout],
+        initializer=tf.constant_initializer(0.0),
+    )
+    return W, b
+
+
+def softmax(val, axis, softmax_strength):
+    ''' Soft max function used for cost function '''
+
+    softmax_strength = np.cast["float32"](softmax_strength)
+
+    if softmax_strength < 0:
+        res_after_max = tf.reduce_max(val, axis=axis)
+    else:
+        res_after_max = np.cast["float32"](1.0) / softmax_strength \
+            * tf.log(tf.reduce_mean(tf.exp(
+                softmax_strength * (
+                    val - tf.reduce_max(val, axis=axis, keep_dims=True)
+                )), axis=axis)) + tf.reduce_max(val, axis=axis)
+
+    return res_after_max
+
+
+#
+# layer.py ends here
diff --git a/utils/legacy.py b/utils/legacy.py
new file mode 100644
index 0000000..17f4651
--- /dev/null
+++ b/utils/legacy.py
@@ -0,0 +1,183 @@
+# legacy.py ---
+#
+# Filename: legacy.py
+# Description: Functions related to using the old framework
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Tue Jul 11 14:52:25 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import os
+
+import tensorflow as tf
+
+
+def build_legacy(network):
+    """Builds the tensorflow ops related to loading legacy weights.
+
+    Note that implementation for this function is **intentionally** left out
+    from the original class as it is only a hack to allow old weights to be
+    loaded into tensorflow.
+
+    """
+
+    # Lazy import to prevent import issues
+    from utils.tf import get_tensor_shape
+
+    # Check the current subtask
+    subtask = network.config.subtask
+
+    # Mapping from the old framework variable name to the new tensorflow
+    # variable names
+    name_map = {}
+    if subtask == "kp" or subtask == "joint":
+        # Load the keypoint only when necessary
+        name_map["kp-c0"] = "network/lift/kp/conv-ghh-1"
+    if subtask == "ori" or subtask == "joint":
+        # Load the orientation only when necessary
+        name_map["ori-c0"] = "network/lift/ori/conv-act-pool-1"
+        name_map["ori-c1"] = "network/lift/ori/conv-act-pool-2"
+        name_map["ori-c2"] = "network/lift/ori/conv-act-pool-3"
+        name_map["ori-f3"] = "network/lift/ori/fc-ghh-drop-4"
+        name_map["ori-f4"] = "network/lift/ori/fc-ghh-5"
+    if subtask != "kp":
+        # Load the descriptor only when necessary
+        name_map["desc-1-conv"] = "network/lift/desc/conv-act-pool-norm-1"
+        name_map["desc-5-conv"] = "network/lift/desc/conv-act-pool-norm-2"
+        name_map["desc-9-conv"] = "network/lift/desc/conv-act-pool-3"
+
+    # # Save name_map
+    # network.legacy_name_map = name_map
+
+    # Build placeholders from the original variable shapes and create assign
+    # ops related to it
+    network.legacy_weights_in = {}
+    network.legacy_assign_op = {}
+    # _old: variable scope name for the old framework
+    # _new: variable scope name for the new framework
+    for _old in name_map:
+        _new = name_map[_old]
+        for _param_name in ["/weights", "/biases"]:
+            with tf.variable_scope("", reuse=True):
+                cur_param = tf.get_variable(_new + _param_name)
+            network.legacy_weights_in[_new + _param_name] = tf.placeholder(
+                tf.float32,
+                get_tensor_shape(cur_param),
+            )
+            network.legacy_assign_op[_new + _param_name] = tf.assign(
+                cur_param,
+                network.legacy_weights_in[_new + _param_name],
+            )
+
+    # Create function attribute that actually runs the loader with the session
+    if not hasattr(network, "legacy_load_func"):
+        network.legacy_load_func = {}
+
+    def legacy_load_func(sess, model):
+        # Create a feed_dict
+        feed_dict = create_feed_dict(
+            model, network.legacy_weights_in, name_map)
+        # Run all assign ops within the session
+        sess.run(network.legacy_assign_op, feed_dict=feed_dict)
+
+    network.legacy_load_func[subtask] = legacy_load_func
+
+
+def create_feed_dict(model, placeholders, name_map):
+    """Creates a feed dict to use for the assignment op.
+
+    model: dictionary containing the old weights
+
+    placeholders: placeholders to feed to
+
+    name_map: mapping for the old name to the new variable name
+
+    """
+
+    feed_dict = {}
+
+    # For each variables we want to assign
+    for _old in name_map:
+
+        # Get the new name
+        _new = name_map[_old]
+
+        # For weights
+        cur_weights = model[_old][_old + ".W"]
+        # If it is 4D+, shrink dimension, as the last dimension was there for
+        # legacy dev purposes
+        cur_weights = cur_weights.reshape(cur_weights.shape[:4])
+        # Convert theano weights to tensorflow
+        if len(cur_weights.shape) == 4:
+            cur_weights = cur_weights.transpose((2, 3, 1, 0))
+
+        # For biases
+        cur_biases = model[_old][_old + ".b"]
+
+        # Add to feed_dict after
+        feed_dict[placeholders[_new + "/weights"]] = cur_weights
+        feed_dict[placeholders[_new + "/biases"]] = cur_biases
+
+    return feed_dict
+
+
+def load_legacy_network(supervisor, subtask, load_dir):
+    """Load function for our old framework"""
+
+    # Lazy loading to prevent import issues
+    from utils import loadh5
+
+    print("[{}] Checking if old pre-trained weights exists in {}"
+          "".format(subtask, load_dir))
+    model_file = os.path.join(load_dir, "model.h5")
+    norm_file = os.path.join(load_dir, "norm.h5")
+    base_file = os.path.join(load_dir, "base.h5")
+
+    if os.path.exists(model_file) and os.path.exists(norm_file) and \
+       os.path.exists(base_file):
+        model = loadh5(model_file)
+        norm = loadh5(norm_file)
+        base = loadh5(base_file)
+        # Load the input normalization parameters
+        supervisor.network.mean["kp"] = float(norm["mean_x"])
+        supervisor.network.mean["ori"] = float(norm["mean_x"])
+        supervisor.network.mean["desc"] = float(base["patch-mean"])
+        supervisor.network.std["kp"] = float(norm["std_x"])
+        supervisor.network.std["ori"] = float(norm["std_x"])
+        supervisor.network.std["desc"] = float(base["patch-std"])
+        # Load weights for the component
+        supervisor.network.legacy_load_func[subtask](supervisor.sess, model)
+        print("[{}] Loaded previously trained weights".format(subtask))
+
+        return True
+
+    else:
+        print("[{}] No pretrained weights from the old framework"
+              "".format(subtask))
+
+        return False
+
+
+#
+# legacy.py ends here
diff --git a/utils/network.py b/utils/network.py
new file mode 100644
index 0000000..c6e528d
--- /dev/null
+++ b/utils/network.py
@@ -0,0 +1,64 @@
+# network.py ---
+#
+# Filename: network.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 15:38:49 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import tensorflow as tf
+
+
+def make_theta(xyz, cs=None, scale=None, rr=0.5):
+    """Make the theta to be used for the spatial transformer
+
+    If cs is None, simply just do the translation only.
+
+    """
+
+    # get dx, dy, dz
+    dx = xyz[:, 0]
+    dy = xyz[:, 1]
+    dz = xyz[:, 1]
+    # compute the resize from the largest scale image
+    reduce_ratio = rr
+    dr = (reduce_ratio) * (2.0)**dz
+
+    if cs is None:
+        c = tf.ones_like(dx)
+        s = tf.zeros_like(dx)
+    else:
+        c = cs[:, 0]
+        s = cs[:, 1]
+
+    theta = tf.stack(
+        [dr * c, -dr * s, dx,
+         dr * s,  dr * c, dy], axis=1)
+
+    return theta
+
+
+#
+# network.py ends here
diff --git a/utils/nms.py b/utils/nms.py
new file mode 100644
index 0000000..2779f83
--- /dev/null
+++ b/utils/nms.py
@@ -0,0 +1,375 @@
+# nms.py ---
+#
+# Filename: nms.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 16:26:38 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import numpy as np
+import scipy
+from scipy.linalg import lu_factor, lu_solve
+
+from six.moves import xrange
+
+
+def get_XYZS_from_res_list(res_list, resize_to_test, scales_to_test, nearby=1,
+                           edge_th=0, scl_intv=2, nms_intv=1,
+                           do_interpolation=False, fScaleEdgeness=0.0):
+    # NMS
+    nms_res = nonMaxSuppression(res_list, nearby=nearby,
+                                scl_intv=scl_intv, nms_intv=nms_intv)
+
+    XYZS = get_subpixel_XYZS(res_list, nms_res, resize_to_test,
+                             scales_to_test, edge_th, do_interpolation,
+                             fScaleEdgeness)
+    # sort by score
+    XYZS = XYZS[np.argsort(XYZS[:, 3])[::-1]]
+
+    return XYZS
+
+
+def get_subpixel_XYZS(score_list, nms_list, resize_to_test,
+                      scales_to_test, edge_th, do_interpolation,
+                      fScaleEdgeness):
+
+    log_scales = np.log2(scales_to_test)
+    log_scale_step = ((np.max(log_scales) - np.min(log_scales)) /
+                      (len(scales_to_test) - 1.0))
+
+    X = [()] * len(nms_list)
+    Y = [()] * len(nms_list)
+    Z = [()] * len(nms_list)
+    S = [()] * len(nms_list)
+    for idxScale in xrange(len(nms_list)):
+        nms = nms_list[idxScale]
+
+        pts = np.where(nms)
+        if len(pts[0]) == 0:
+            continue
+
+        assert idxScale > 0 and idxScale < len(nms_list) - 1
+
+        # compute ratio for coordinate conversion
+        fRatioUp = (
+            (np.asarray(score_list[idxScale + 1].shape, dtype='float') - 1.0) /
+            (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0)
+        ).reshape([2, -1])
+        fRatioDown = (
+            (np.asarray(score_list[idxScale - 1].shape, dtype='float') - 1.0) /
+            (np.asarray(score_list[idxScale].shape, dtype='float') - 1.0)
+        ).reshape([2, -1])
+
+        # the conversion function
+        def at(dx, dy, ds):
+            if not isinstance(dx, np.ndarray):
+                dx = np.ones(len(pts[0]),) * dx
+            if not isinstance(dy, np.ndarray):
+                dy = np.ones(len(pts[0]),) * dy
+            if not isinstance(ds, np.ndarray):
+                ds = np.ones(len(pts[0]),) * ds
+            new_pts = (pts[0] + dy, pts[1] + dx)
+            ds = np.round(ds).astype(int)
+            fRatio = ((ds == 0).reshape([1, -1]) * 1.0 +
+                      (ds == -1).reshape([1, -1]) * fRatioDown +
+                      (ds == 1).reshape([1, -1]) * fRatioUp)
+            assert np.max(ds) <= 1 and np.min(ds) >= -1
+            new_pts = tuple([np.round(v * r).astype(int)
+                             for v, r in zip(new_pts, fRatio)])
+            scores_to_return = np.asarray([
+                score_list[idxScale + _ds][_y, _x]
+                for _ds, _x, _y in zip(
+                    ds, new_pts[1], new_pts[0]
+                )
+            ])
+            return scores_to_return
+
+        # compute the gradient
+        Dx = 0.5 * (at(+1, 0, 0) - at(-1, 0, 0))
+        Dy = 0.5 * (at(0, +1, 0) - at(0, -1, 0))
+        Ds = 0.5 * (at(0, 0, +1) - at(0, 0, -1))
+
+        # compute the Hessian
+        Dxx = (at(+1, 0, 0) + at(-1, 0, 0) - 2.0 * at(0, 0, 0))
+        Dyy = (at(0, +1, 0) + at(0, -1, 0) - 2.0 * at(0, 0, 0))
+        Dss = (at(0, 0, +1) + at(0, 0, -1) - 2.0 * at(0, 0, 0))
+
+        Dxy = 0.25 * (at(+1, +1, 0) + at(-1, -1, 0) -
+                      at(-1, +1, 0) - at(+1, -1, 0))
+        Dxs = 0.25 * (at(+1, 0, +1) + at(-1, 0, -1) -
+                      at(-1, 0, +1) - at(+1, 0, -1))
+        Dys = 0.25 * (at(0, +1, +1) + at(0, -1, -1) -
+                      at(0, -1, +1) - at(0, +1, -1))
+
+        # filter out all keypoints which we have inf
+        is_good = (np.isfinite(Dx) * np.isfinite(Dy) * np.isfinite(Ds) *
+                   np.isfinite(Dxx) * np.isfinite(Dyy) * np.isfinite(Dss) *
+                   np.isfinite(Dxy) * np.isfinite(Dxs) * np.isfinite(Dys))
+        Dx = Dx[is_good]
+        Dy = Dy[is_good]
+        Ds = Ds[is_good]
+        Dxx = Dxx[is_good]
+        Dyy = Dyy[is_good]
+        Dss = Dss[is_good]
+        Dxy = Dxy[is_good]
+        Dxs = Dxs[is_good]
+        Dys = Dys[is_good]
+        pts = tuple([v[is_good] for v in pts])
+        # check if empty
+        if len(pts[0]) == 0:
+            continue
+
+        # filter out all keypoints which are on edges
+        if edge_th > 0:
+
+            # # re-compute the Hessian
+            # Dxx = (at(b[:, 0] + 1, b[:, 1], b[:, 2]) +
+            #        at(b[:, 0] - 1, b[:, 1], b[:, 2]) -
+            #        2.0 * at(b[:, 0], b[:, 1], b[:, 2]))
+            # Dyy = (at(b[:, 0], b[:, 1] + 1, b[:, 2]) +
+            #        at(b[:, 0], b[:, 1] - 1, b[:, 2]) -
+            #        2.0 * at(b[:, 0], b[:, 1], b[:, 2]))
+
+            # Dxy = 0.25 * (at(b[:, 0] + 1, b[:, 1] + 1, b[:, 2]) +
+            #               at(b[:, 0] - 1, b[:, 1] - 1, b[:, 2]) -
+            #               at(b[:, 0] - 1, b[:, 1] + 1, b[:, 2]) -
+            #               at(b[:, 0] + 1, b[:, 1] - 1, b[:, 2]))
+
+            # H = np.asarray([[Dxx, Dxy, Dxs],
+            #                 [Dxy, Dyy, Dys],
+            #                 [Dxs, Dys, Dss]]).transpose([2, 0, 1])
+
+            edge_score = (Dxx + Dyy) * (Dxx + Dyy) / (Dxx * Dyy - Dxy * Dxy)
+            is_good = ((edge_score >= 0) *
+                       (edge_score < (edge_th + 1.0)**2 / edge_th))
+
+            if fScaleEdgeness > 0:
+                is_good = is_good * (
+                    abs(Dss) > fScaleEdgeness
+                )
+
+            Dx = Dx[is_good]
+            Dy = Dy[is_good]
+            Ds = Ds[is_good]
+            Dxx = Dxx[is_good]
+            Dyy = Dyy[is_good]
+            Dss = Dss[is_good]
+            Dxy = Dxy[is_good]
+            Dxs = Dxs[is_good]
+            Dys = Dys[is_good]
+            pts = tuple([v[is_good] for v in pts])
+            # check if empty
+            if len(pts[0]) == 0:
+                continue
+
+        b = np.zeros((len(pts[0]), 3))
+        if do_interpolation:
+            # from VLFEAT
+
+            # solve linear system
+            A = np.asarray([[Dxx, Dxy, Dxs],
+                            [Dxy, Dyy, Dys],
+                            [Dxs, Dys, Dss]]).transpose([2, 0, 1])
+
+            b = np.asarray([-Dx, -Dy, -Ds]).transpose([1, 0])
+
+            b_solved = np.zeros_like(b)
+            for idxPt in xrange(len(A)):
+                b_solved[idxPt] = lu_solve(lu_factor(A[idxPt]), b[idxPt])
+
+            b = b_solved
+
+        # throw away the ones with bad subpixel localizatino
+        is_good = ((abs(b[:, 0]) < 1.5) * (abs(b[:, 1]) < 1.5) *
+                   (abs(b[:, 2]) < 1.5))
+        b = b[is_good]
+        pts = tuple([v[is_good] for v in pts])
+        # check if empty
+        if len(pts[0]) == 0:
+            continue
+
+        x = pts[1] + b[:, 0]
+        y = pts[0] + b[:, 1]
+        log_ds = b[:, 2]
+
+        S[idxScale] = at(b[:, 0], b[:, 1], b[:, 2])
+        X[idxScale] = x / resize_to_test[idxScale]
+        Y[idxScale] = y / resize_to_test[idxScale]
+        Z[idxScale] = scales_to_test[idxScale] * 2.0**(log_ds * log_scale_step)
+
+    X = np.concatenate(X)
+    Y = np.concatenate(Y)
+    Z = np.concatenate(Z)
+    S = np.concatenate(S)
+
+    XYZS = np.concatenate([X.reshape([-1, 1]),
+                           Y.reshape([-1, 1]),
+                           Z.reshape([-1, 1]),
+                           S.reshape([-1, 1])],
+                          axis=1)
+
+    return XYZS
+
+
+def nonMaxSuppression(score_img_or_list, nearby=1, scl_intv=2, nms_intv=1):
+    """ Performs Non Maximum Suppression.
+
+    Parameters
+    ----------
+    score_img_or_list: nparray or list
+        WRITEME
+
+    nearby: int
+        Size of the neighborhood.
+
+    scl_intv: int
+        How many levels we have between half scale.
+
+    nms_intv: int
+        How many levels we use for scale space nms.
+
+    """
+
+    filter_size = (nearby * 2 + 1,) * 2
+
+    if isinstance(score_img_or_list, list):
+        bis2d = False
+    else:
+        bis2d = True
+
+    if bis2d:
+        score = score_img_or_list
+        # max score in region
+        max_score = scipy.ndimage.filters.maximum_filter(
+            score, filter_size, mode='constant', cval=-np.inf
+        )
+        # second score in region
+        second_score = scipy.ndimage.filters.rank_filter(
+            score, -2, filter_size, mode='constant', cval=-np.inf
+        )
+        # min score in region to check infs
+        min_score = scipy.ndimage.filters.minimum_filter(
+            score, filter_size, mode='constant', cval=-np.inf
+        )
+        nonmax_mask_or_list = ((score == max_score) *
+                               (max_score > second_score) *
+                               np.isfinite(min_score))
+
+    else:
+
+        max2d_list = [
+            scipy.ndimage.filters.maximum_filter(
+                score, filter_size, mode='constant', cval=-np.inf
+            )
+            for score in score_img_or_list
+        ]
+
+        second2d_list = [
+            scipy.ndimage.filters.rank_filter(
+                score, -2, filter_size, mode='constant', cval=-np.inf
+            )
+            for score in score_img_or_list
+        ]
+
+        min2d_list = [
+            scipy.ndimage.filters.minimum_filter(
+                score, filter_size, mode='constant', cval=-np.inf
+            )
+            for score in score_img_or_list
+        ]
+
+        nonmax2d_list = [(score == max_score) * (max_score > second_score) *
+                         np.isfinite(min_score)
+                         for score, max_score, second_score, min_score in
+                         zip(score_img_or_list,
+                             max2d_list,
+                             second2d_list,
+                             min2d_list)
+                         ]
+
+        nonmax_mask_or_list = [None] * len(nonmax2d_list)
+        for idxScale in xrange(len(nonmax2d_list)):
+
+            nonmax2d = nonmax2d_list[idxScale]
+            max2d = max2d_list[idxScale]
+
+            # prep output
+            nonmax_mask = np.zeros_like(nonmax2d)
+
+            # get coordinates of the local max positions of nonmax2d
+            coord2d_max = np.where(nonmax2d)
+            # print("2d nonmax = {} kp".format(len(coord2d_max[0])))
+
+            # range of other scales to look at
+            # scl_diffs = np.arange(-scl_intv, scl_intv + 1)
+            # scl_diffs = np.arange(-1, 1 + 1)
+            scl_diffs = np.arange(-nms_intv, nms_intv + 1)
+            scl_diffs = scl_diffs[scl_diffs != 0]
+
+            # skip if we don't have the complete set
+            if (idxScale + np.min(scl_diffs) < 0 or
+                    idxScale + np.max(scl_diffs) > len(nonmax2d_list) - 1):
+                continue
+
+            # Test on multiple scales to see if it is scalespace local max
+            for scl_diff in scl_diffs:
+
+                scl_to_compare = idxScale + scl_diff
+
+                # look at the other scales max
+                max2d_other = max2d_list[scl_to_compare]
+                # compute ratio for coordinate conversion
+                fRatio \
+                    = (np.asarray(max2d_other.shape, dtype='float') - 1.0) \
+                    / (np.asarray(nonmax2d.shape, dtype='float') - 1.0)
+                # get indices for lower layer
+                coord_other = tuple([np.round(v * r).astype(int)
+                                     for v, r in zip(coord2d_max, fRatio)])
+                # find good points which should survive
+                idxGood = np.where((max2d[coord2d_max] >
+                                    max2d_other[coord_other]) *
+                                   np.isfinite(max2d_other[coord_other])
+                                   )
+
+                # copy only the ones that are good
+                coord2d_max = tuple([v[idxGood] for v in coord2d_max])
+
+            # mark surviving points
+            nonmax_mask[coord2d_max] = 1.0
+
+            # special case when we are asked with single item in list
+            if len(nonmax2d_list) == 1:
+                # get coordinates of the local max positions of nonmax2d
+                coord2d_max = np.where(nonmax2d)
+                # mark surviving points
+                nonmax_mask[coord2d_max] = 1.0
+
+            # add to list
+            nonmax_mask_or_list[idxScale] = nonmax_mask
+
+    return nonmax_mask_or_list
+
+#
+# nms.py ends here
diff --git a/utils/test.py b/utils/test.py
new file mode 100644
index 0000000..168067e
--- /dev/null
+++ b/utils/test.py
@@ -0,0 +1,386 @@
+# test.py ---
+#
+# Filename: test.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 16:24:22 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import cv2
+import numpy as np
+import tensorflow as tf
+
+from six.moves import xrange
+
+
+def compute_auc(x, y):
+    """Compute AUCs given curve (x, y)
+
+    Parameters
+    ----------
+    x : np.ndarray, float, 1d
+        The values in x axis
+
+    y : np.ndarray, float, 1d
+        The values in y axis
+    """
+    # AUC for precision-recall
+    dx = x[1:] - x[:-1]     # bin width
+    y_avg = (y[1:] + y[:-1]) / 2.0  # avg y in bin
+    # Below expression but without loops
+    id_dx_valid = np.cast[float](dx > 0)
+    auc = np.sum(y_avg * dx * id_dx_valid)
+    # auc = 0
+    # for _dx, _y_avg in zip(dx, y_avg):
+    #     if _dx > 0:
+    #         auc += _dx * _y_avg
+
+    return auc
+
+
+def eval_descs(d1, d2, d3, neg_weight):
+
+    # Compute descriptor distances
+    pair_dists = np.sqrt(np.sum((d1 - d2)**2, axis=1)).flatten()
+    nonpair_dists = np.sqrt(np.sum((d1 - d3)**2, axis=1)).flatten()
+
+    # Make dists and labels
+    dists = np.concatenate([pair_dists, nonpair_dists], axis=0)
+    labels = np.concatenate([
+        np.ones_like(pair_dists), np.zeros_like(nonpair_dists)], axis=0)
+
+    # create curves by hand
+    num_th = 1000
+
+    # LATER: Check if neg_per_pos is working
+
+    # -------------------------------------------------
+    # Do some smart trick to only sort and compare once
+    # -------------------------------------------------
+    # sort labels according to data indices
+    sorted_labels = labels[np.argsort(dists)]
+    # get thresholds using histogram function
+    num_in_bin, thr = np.histogram(dists, num_th - 1)
+    num_in_bin = np.concatenate([np.zeros((1,)), num_in_bin])
+    # alocate
+    prec = np.zeros((num_th,))
+    rec = np.zeros((num_th,))
+    tnr = np.zeros((num_th,))
+    # for each bin
+    idx_s = 0
+    retrieved_pos = 0
+    retrieved_neg = 0
+    total_pos = np.sum(sorted_labels == 1).astype(float)
+    total_neg = np.sum(sorted_labels == 0).astype(float) * neg_weight
+    for i in xrange(num_th):
+        # The number of elements we need to look at
+        idx_e = int(idx_s + num_in_bin[i])
+        # look at the labels to see how many are right/wrong and accumulate
+        retrieved_pos += np.sum(sorted_labels[idx_s:idx_e] == 1).astype(float)
+        retrieved_neg += np.sum(
+            sorted_labels[idx_s:idx_e] == 0).astype(float) * neg_weight
+        # move start point
+        idx_s = idx_e
+
+        retrieved = retrieved_pos + retrieved_neg
+        if retrieved > 0:
+            prec[i] = retrieved_pos / retrieved
+        else:
+            # precision is one, is this correct?
+            prec[i] = 1.0
+        rec[i] = retrieved_pos / total_pos
+        tnr[i] = 1 - retrieved_neg / total_neg
+
+    tpr = rec
+
+    return compute_auc(rec, prec)
+
+
+def eval_kps(xyzs, poss, scores, r_base):
+    """Evaluate keypoints based on their locations and scores.
+
+
+    Parameters
+    ----------
+
+    xyzs: the coordinates of the keypoints for P1, P2, P3, and P4
+
+    poss: the ground-truth location of the SfM points for each patch
+
+    scores: scores for each patch
+
+    neg_weight: Weight of the negative sample
+
+    r_base: float
+        Base multiplier to apply to z to get rectangle support regions' half
+        width. Typical computation would be:
+
+        >>> r_base = (float(config.desc_input_size) /
+                      float(get_patch_size(config))) / 6.0
+
+    """
+
+    # Let's convert everything to numpy arrays just to make our lives easier
+    xyzs = np.asarray(xyzs)
+    poss = np.asarray(poss)
+    scores = np.asarray(scores)
+
+    # Check for each keypoint if it is repeated. Note that we only compute the
+    # overlap for *corresponding* pairs.
+    #
+    # th = 0.4
+    # is_repeated = (get_kp_overlap(xyzs[0], poss[0],
+    #                               xyzs[1], poss[1], r_base) >= 0.4)
+    kp_overlap = get_kp_overlap(xyzs[0], poss[0], xyzs[1], poss[1],
+                                r_base, mode='rectangle').flatten()
+
+    # set number of thresholds we want to test
+    num_th = 1000
+    max_score = np.max(scores)
+    min_score = np.min(scores)
+    eps = 1e-10
+    ths = np.linspace(max_score, min_score - 2 * eps, num_th) + eps
+
+    # alocate
+    prec = np.zeros((num_th,))
+    rec = np.zeros((num_th,))
+    tnr = np.zeros((num_th,))
+    # Number of all positive pairs
+    total_pos_pair = float(len(xyzs[0]))
+    # Number of all non-SfM keypoints
+    total_non_sfm_kp = float(len(xyzs[3]))
+    for i in xrange(len(ths)):
+        # set threshold
+        th = ths[i]
+        # find all that are selected as positives
+        retv = [None] * 4
+        for idx_branch in xrange(4):
+            retv[idx_branch] = np.where(scores[idx_branch] >= th)[0]
+        # number of retrieved keypoins. No balancing is needed since we don't
+        # have repetitions any more
+        retrieved_kp = sum([float(len(retv[_i])) for _i in xrange(4)])
+        # Number of retrieved SfM keypoints. Again, no balancing
+        retrieved_sfm_kp = sum([float(len(retv[_i])) for _i in xrange(3)])
+        # number of retrieved non SfM keypoints
+        retrieved_non_sfm_kp = float(len(retv[3]))
+        # find positive pairs that are recovered (ones that are repeated)
+        # check if both of the pairs are retrieved
+        is_pair_retrieved = scores[1][retv[0]] >= th
+        # get cumulative overlap
+        retrieved_pos_pair = float(
+            np.sum(is_pair_retrieved * kp_overlap[retv[0]]))
+
+        if retrieved_kp > 0:
+            prec[i] = retrieved_sfm_kp / retrieved_kp
+        else:
+            # precision is one, is this correct?
+            prec[i] = 1.0
+        rec[i] = retrieved_pos_pair / total_pos_pair
+        tnr[i] = 1 - retrieved_non_sfm_kp / total_non_sfm_kp
+
+        # if i == 500:
+        #     import IPython
+        #     IPython.embed()
+
+    tpr = rec
+
+    #, computeAUC(tnr[::-1], tpr[::-1]), prec, rec
+    return compute_auc(rec, prec)
+
+
+def rebase_xyz(xyz, base):
+    """Recomputes 'xyz' with center being 'base'.
+
+    Parameters
+    ----------
+    xyz: theano.tensor.matrix
+       'xyz' to be modified to have center at 'base'.
+
+    base: theano.tensor.matrix
+        The new center position.
+
+    Notes
+    -----
+    This function is required as xy is affected by which z you are having.
+    """
+
+    # Move to the same z.  Notice how we set the scaler here. For
+    # example, if base == 1 then it means that we are moving to a
+    # lower scale, so we need to multiply x and y by two.
+    new_z = xyz[:, 2] - base[:, 2]
+    scaler = 2.0**base[:, 2]
+    new_x = xyz[:, 0] * scaler
+    new_y = xyz[:, 1] * scaler
+
+    # Move x and y. The new scaler here will be based on where x and y
+    # are after moving, i.e. new_z. For example, if new_z == 1, then
+    # we need to move x and y in half the value of the base x and y.
+    scaler = 2.0**(-new_z)
+    new_x -= base[:, 0] * scaler
+    new_y -= base[:, 1] * scaler
+
+    if isinstance(new_x, np.ndarray):
+        new_xyz = np.concatenate([
+            v.reshape([-1, 1]) for v in [new_x, new_y, new_z]
+        ], axis=1)
+    else:
+        new_xyz = tf.concat([
+            tf.reshape(v, [-1, 1]) for v in [new_x, new_y, new_z]
+        ], axis=1)
+
+    return new_xyz
+
+
+def get_rect_inter(x1, y1, r1, x2, y2, r2):
+    """Computes Intersection of Rectangles
+
+    Parameters
+    ----------
+    x1: ndarray or tensor
+        x coordinates of the first rectangle.
+
+    y1: ndarray or tensor
+        y coordinates of the first rectangle.
+
+    r1: ndarray or tensor
+        Half width of the first rectangle (just to be consistent with
+        circle impl).
+
+    x2: ndarray or tensor
+        x coordinates of the second rectangle.
+
+    y2: ndarray or tensor
+        y coordinates of the second rectangle.
+
+    r2: ndarray or tensor
+        Half width of the second rectangle (just to be consistent with
+        circle impl).
+    """
+
+    # use theano or numpy depending on the variable
+    if isinstance(x1, np.ndarray):
+        # Checking if ndarray will prevent us from loading theano
+        # unnecessarily.
+        cm = np
+    else:
+        # import tensorflow (Note that the code works for theano if you just
+        # uncomment below)
+        # import theano.tensor as T
+        # cm = T
+        import tensorflow as tf
+        cm = tf
+
+    # intersection computation
+    inter_w = cm.minimum(x1 + r1, x2 + r2) - cm.maximum(x1 - r1, x2 - r2)
+    inter_w = cm.maximum(0.0, inter_w)
+    inter_h = cm.minimum(y1 + r1, y2 + r2) - cm.maximum(y1 - r1, y2 - r2)
+    inter_h = cm.maximum(0.0, inter_h)
+
+    return inter_w * inter_h
+
+
+def get_kp_overlap(xyz1, pos1, xyz2, pos2, r_base, mode='circle'):
+    """Determines if keypoint is repeated
+
+    Parameters
+    ----------
+    xyz1: ndarray, float, 3d
+        Coordinates and scale of the first keypoint.
+
+    pos1: ndarray, float, 3d
+        Coordinates and scale of the ground truth position of the first
+        keypoint.
+
+    xyz2: ndarray, float, 3d
+        Coordinates and scale of the second keypoint.
+
+    pos1: ndarray, float, 3d
+        Coordinates and scale of the ground truth position of the second
+        keypoint.
+
+    r_base: float
+        Base multiplier to apply to z to get rectangle support regions' half
+        width. Typical computation would be:
+
+        >>> r_base = (float(myNet.config.nDescInputSize) /
+                      float(myNet.config.nPatchSize)) / 6.0
+
+        We use the 1.0/6.0 of the support region to compute the overlap
+
+    Note
+    ----
+    Unlike the cost function, here we use the intersection of circles to be
+    maximum compatible with the other benchmarks. However, this can be easily
+    changed. Also, r_base is computed to use the original keypoints scale,
+    whereas the cost function uses the descriptor support region.
+
+    """
+
+    # Rebase the two xyzs so that they can be compared.
+    xyz1 = rebase_xyz(xyz1, pos1)
+    xyz2 = rebase_xyz(xyz2, pos2)
+
+    # Retrieve the keypoint support  region based on r_base.
+    x1 = xyz1[:, 0]
+    y1 = xyz1[:, 1]
+    r1 = r_base * 2.0**xyz1[:, 2]
+
+    x2 = xyz2[:, 0]
+    y2 = xyz2[:, 1]
+    r2 = r_base * 2.0**xyz2[:, 2]
+
+    if mode == 'circle':
+        raise NotImplementedError(
+            "LATER: after the release, when somebody wants to do it")
+        # inter = get_circle_inter(x1, y1, r1, x2, y2, r2)
+        # union = r1**2.0 + r2**2.0 - inter
+    elif mode == 'rectangle':
+        inter = get_rect_inter(x1, y1, r1, x2, y2, r2)
+        union = (2.0 * r1)**2.0 + (2.0 * r2)**2.0 - inter
+    else:
+        raise ValueError('Unsupported mode ' + mode)
+
+    return (inter / union).astype("float32").reshape([-1, 1])
+
+
+def draw_XYZS_to_img(XYZS, image_color, out_file_name):
+    """ Drawing functino for displaying """
+
+    # draw onto the original image
+    if cv2.__version__[0] == '3':
+        linetype = cv2.LINE_AA
+    else:
+        linetype = cv2.CV_AA
+
+    [cv2.circle(image_color, tuple(np.round(pos).astype(int)),
+                np.round(rad * 6.0).astype(int), (0, 255, 0), 2,
+                lineType=linetype)
+     for pos, rad in zip(XYZS[:, :2], XYZS[:, 2])]
+
+    cv2.imwrite(out_file_name, image_color)
+
+
+#
+# test.py ends here
diff --git a/utils/tf.py b/utils/tf.py
new file mode 100644
index 0000000..d52ddfe
--- /dev/null
+++ b/utils/tf.py
@@ -0,0 +1,62 @@
+# tf.py ---
+#
+# Filename: tf.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 15:35:36 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+
+
+def show_all_variables():
+    # Adapted from original code at
+    # https://github.com/carpedm20/simulated-unsupervised-tensorflow
+    model_vars = tf.trainable_variables()
+    slim.model_analyzer.analyze_vars(model_vars, print_info=True)
+
+
+def image_summary_nhwc(name, img, max_outputs=1):
+    """Image summary function for NHWC format"""
+
+    return tf.summary.image(name, img, max_outputs)
+
+
+def image_summary_nchw(name, img, max_outputs=1):
+    """Image summary function for NCHW format"""
+
+    return tf.summary.image(
+        name, tf.transpose(img, (0, 2, 3, 1)), max_outputs)
+
+
+def get_tensor_shape(tensor):
+
+    return [_s if _s is not None else -1 for
+            _s in tensor.get_shape().as_list()]
+
+
+#
+# tf.py ends here
diff --git a/utils/train.py b/utils/train.py
new file mode 100644
index 0000000..682f435
--- /dev/null
+++ b/utils/train.py
@@ -0,0 +1,90 @@
+# train.py ---
+#
+# Filename: train.py
+# Description:
+# Author: Kwang Moo Yi
+# Maintainer:
+# Created: Thu Jul  6 16:23:31 2017 (+0200)
+# Version:
+# Package-Requires: ()
+# URL:
+# Doc URL:
+# Keywords:
+# Compatibility:
+#
+#
+
+# Commentary:
+#
+#
+#
+#
+
+# Change Log:
+#
+#
+#
+# Copyright (C), EPFL Computer Vision Lab.
+
+# Code:
+
+import numpy as np
+
+
+def get_hard_batch(loss_list, data_list):
+    """Returns a batch with the hardest samples.
+
+    Given a list of losses and data, merges them together to form a batch with
+    is the hardest samples.
+
+    NOTE
+    ----
+
+    data item in the data_list is accessible by data_list[i]["patch"]["P1"][j]
+
+    loss item in the loss_list is accessible by loss_list[i][j]
+
+    """
+
+    # Concatenate all data
+    all_data = {}
+    for _type in data_list[0]:
+        all_data[_type] = {}
+        for _name in data_list[0][_type]:
+            # "aug_rot" is a dictionary with two keys
+            if isinstance(data_list[0][_type][_name], dict):
+                all_data[_type][_name] = {}
+                for _key in data_list[0][_type][_name]:
+                    all_data[_type][_name][_key] = np.concatenate(
+                        (lambda _t=_type, _n=_name, _k=_key, _d=data_list: [
+                            data[_t][_n][_k] for data in _d])(), axis=0)
+            else:
+                all_data[_type][_name] = np.concatenate(
+                    (lambda _t=_type, _n=_name, _d=data_list: [
+                        data[_t][_n] for data in _d])(), axis=0)
+
+    # Extract batch size
+    batch_size = len(loss_list[0])
+
+    # Concatenate all loss
+    all_loss = np.concatenate(loss_list, axis=0)
+
+    # Sort by loss and get indices for the hardes ones
+    ind = np.argsort(all_loss)[::-1]
+    ind = ind[:batch_size]
+
+    # Select the hardest examples
+    for _type in all_data:
+        for _name in all_data[_type]:
+            if isinstance(all_data[_type][_name], dict):
+                all_data[_type][_name] = {
+                    _key: all_data[_type][_name][_key][ind]
+                    for _key in all_data[_type][_name]}
+            else:
+                all_data[_type][_name] = all_data[_type][_name][ind]
+
+    return all_data
+
+
+#
+# train.py ends here