From e3172b69f919dfd301567724f3cbbf6327d88ec0 Mon Sep 17 00:00:00 2001 From: PatWie Date: Tue, 18 Jul 2017 09:27:15 +0200 Subject: [PATCH] init --- .gitignore | 136 +++++++ README.md | 81 ++++ additional_scripts/convrnn.py | 165 ++++++++ additional_scripts/fba.py | 47 +++ ...ning_blind_motion_deblurring_multiscale.py | 379 ++++++++++++++++++ data_provider.py | 160 ++++++++ data_sampler.py | 336 ++++++++++++++++ download_videos.sh | 40 ++ learning_blind_motion_deblurring.py | 246 ++++++++++++ ...ing_blind_motion_deblurring_singlescale.py | 298 ++++++++++++++ psf.py | 155 +++++++ synthblur/.clang_complete | 1 + synthblur/CMakeLists.txt | 14 + synthblur/include/blur.h | 16 + synthblur/include/flow.h | 33 ++ synthblur/include/meta.h | 55 +++ synthblur/include/video.h | 41 ++ synthblur/src/blur.cpp | 46 +++ synthblur/src/convert.cpp | 73 ++++ synthblur/src/flow.cpp | 239 +++++++++++ synthblur/src/meta.cpp | 11 + synthblur/src/video.cpp | 46 +++ video.py | 110 +++++ 23 files changed, 2728 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 additional_scripts/convrnn.py create mode 100644 additional_scripts/fba.py create mode 100644 additional_scripts/learning_blind_motion_deblurring_multiscale.py create mode 100644 data_provider.py create mode 100644 data_sampler.py create mode 100644 download_videos.sh create mode 100644 learning_blind_motion_deblurring.py create mode 100644 learning_blind_motion_deblurring_singlescale.py create mode 100644 psf.py create mode 100644 synthblur/.clang_complete create mode 100644 synthblur/CMakeLists.txt create mode 100644 synthblur/include/blur.h create mode 100644 synthblur/include/flow.h create mode 100644 synthblur/include/meta.h create mode 100644 synthblur/include/video.h create mode 100644 synthblur/src/blur.cpp create mode 100644 synthblur/src/convert.cpp create mode 100644 synthblur/src/flow.cpp create mode 100644 synthblur/src/meta.cpp create mode 100644 synthblur/src/video.cpp create mode 100644 video.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0766353 --- /dev/null +++ b/.gitignore @@ -0,0 +1,136 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +local/ +train_log*/ +results/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..17a096e --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +# Learning Blind Motion Deblurring + +TensorFlow implementation of multi-frame blind deconvolution: + +**Learning Blind Motion Deblurring**
+Patrick Wieschollek, Michael Hirsch, Bernhard Schölkopf, Hendrik P.A. Lensch
+*ICCV 2017* + +![results](https://user-images.githubusercontent.com/6756603/28306964-93f64ce2-6ba1-11e7-8cdc-4f112d9d6059.jpg) + + +## Prerequisites +### 1. Get YouTube videos + +The first step is to gather videos from some arbitrary sources. We use YouTube to get some videos with diverse content and recording equipment. To download these videos, we use the python-tool `youtube-dl`. + +```bash +pip install youtube-dl --user +``` + +Some examples are given in `download_videos.sh`. Note, you can use whatever mp4 video you want to use for this task. In fact, for this re-implementation we use some other videos, which also work well. + +### 2. Generate Synthetic Motion Blur + +Now, we use optical flow to synthetically add motion blur. We used the most simple OpticalFlow method, wich provides reasonable results (we average frames anyway): + +```bash +cd synthblur +mkdir build && cd build +cmake .. +make all +``` + +To convert a video `input.mp4` into a blurry version, run + +```bash +./synthblur/build/convert "input.mp4" +``` + +This gives you multiple outputs: +- 'input.mp4_blurry.mp4' +- 'input.mp4_sharp.mp4' +- 'input.mp4_flow.mp4' + +Adding blur from synthetic camera shake is done on-the-fly (see `psf.py`). + +### 3. Building a Database +For performance reasons we randomly sample frames from all videos beforehand and store 5+5 consecutive frames (sharp+blurry) into an LMDB file (for training/validation/testing). + +I use + +```bash +#!/bin/bash +for i in `seq 1 30`; do + python data_sampler.py --pattern '/graphics/scratch/wieschol/YouTubeDataset/train/*_blurry.mp4' --lmdb /graphics/scratch/wieschol/YouTubeDataset/train$i.lmdb --num 5000 +done + +for i in `seq 1 10`; do + python data_sampler.py --pattern '/graphics/scratch/wieschol/YouTubeDataset/val/*_blurry.mp4' --lmdb /graphics/scratch/wieschol/YouTubeDataset/val$i.lmdb --num 5000 +done + +``` + +To visualize the training examples just run + +```bash +python data_provider.py --lmdb /graphics/scratch/wieschol/YouTubeDataset/train1.lmdb --show --num 5000 +``` + + +## Training + +This re-imlementation uses [TensorPack](https://github.com/ppwwyyxx/tensorpack) instead of a custom library. Starting training is done by + +```bash +python learning_blind_motion_deblurring_singlescale.py --gpu 0,1 --data path/to/lmdb-files/ +``` + + +## Further experiments +We further tried a convLSTM/convGRU and a multi-scale approach (instead of the simple test from the paper). These script are available in `additional_scripts`. \ No newline at end of file diff --git a/additional_scripts/convrnn.py b/additional_scripts/convrnn.py new file mode 100644 index 0000000..b520228 --- /dev/null +++ b/additional_scripts/convrnn.py @@ -0,0 +1,165 @@ +from abc import ABCMeta, abstractmethod, abstractproperty +import tensorflow as tf +from tensorpack import * +from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope + +""" +References: + +http://colah.github.io/posts/2015-08-Understanding-LSTMs/ +http://www.wildml.com/2015/10/recurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano/ + +""" + + +class ConvRNNCell(object): + __metaclass__ = ABCMeta + + def __init__(self, tensor_shape, out_channel, kernel_shape, nl=tf.nn.tanh, normalize_fn=None): + """Abstract representation for 2D recurrent cells. + + Args: + tensor_shape: shape of inputs (must be fully specified) + out_channel: number of output channels + kernel_shape: size of filters + nl (TYPE, optional): non-linearity (default: tf.nn.tanh) + normalize_fn (None, optional): normalization steps (e.g. tf.contrib.layers.layer_norm) + """ + super(ConvRNNCell, self).__init__() + self.state_tensor = None + + assert len(tensor_shape), "tensor_shape should have 4 dims [BHWC]" + + self.input_shape = tensor_shape + self.out_channel = out_channel + self.kernel_shape = kernel_shape + + self.nl = nl + self.normalize_fn = normalize_fn + + @abstractproperty + def default_state(self): + pass + + def state(self): + if self.state_tensor is None: + self.state_tensor = self.default_state() + return self.state_tensor + + @abstractmethod + def _calc(self, tensor): + pass + + def __call__(self, tensor): + return self._calc(tensor) + + +class ConvLSTMCell(ConvRNNCell): + """Represent LSTM-layer using convolutions. + + conv_gates: + i = sigma(x*U1 + s*W1) input gate + f = sigma(x*U2 + s*W2) forget gate + o = sigma(x*U3 + s*W3) output gate + g = tanh(x*U4 + s*W4) candidate hidden state + + memory update: + c = c * f + g * i internal memory + s = tanh(c) * o output hiden state + + """ + def default_state(self): + b, h, w, c = self.input_shape + return (tf.zeros([b, h, w, self.out_channel]), tf.zeros([b, h, w, self.out_channel])) + + @auto_reuse_variable_scope + def __call__(self, x): + c, s = self.state() + + xs = tf.concat(axis=3, values=[x, s]) + igfo = Conv2D('conv_gates', xs, 4 * self.out_channel, self.kernel_shape, + nl=tf.identity, use_bias=(self.normalize_fn is None)) + # i = input_gate, g = hidden state, f = forget_gate, o = output_gate + i, g, f, o = tf.split(axis=3, num_or_size_splits=4, value=igfo) + + if self.normalize_fn is not None: + i, g = self.normalize_fn(i), self.normalize_fn(g) + f, o = self.normalize_fn(f), self.normalize_fn(o) + + i, g = tf.nn.sigmoid(i), self.nl(g) + f, o = tf.nn.sigmoid(f), tf.nn.sigmoid(o) + + # memory update + c = c * f + g * i + if self.normalize_fn is not None: + c = self.normalize_fn(c) + + # output + s = self.nl(c) * tf.nn.sigmoid(o) + self.state_tensor = (c, s) + + return s + + +class ConvGRUCell(ConvRNNCell): + """Represent GRU-layer using convolutions. + + z = sigma(x*U1 + s*W1) update gate + r = sigma(x*U2 + s*W2) reset gate + h = tanh(x*U3 + (s*r)*W3) + s = (1-z)*h + z*s + """ + def default_state(self): + """GRU just uses the output as the state for the next computation. + """ + b, h, w, c = self.input_shape + return tf.zeros([b, h, w, self.out_channel]) + + @auto_reuse_variable_scope + def _calc(self, x): + s = self.state() + + # we concat x and s to reduce the number of conv-calls + xs = tf.concat(axis=3, values=[x, s]) + zr = Conv2D('conv_zr', xs, 2 * self.out_channel, self.kernel_shape, + nl=tf.identity, use_bias=(self.normalize_fn is None)) + + # z (update gate), r (reset gate) + z, r = tf.split(axis=3, num_or_size_splits=2, value=zr) + + if self.normalize_fn is not None: + r, z = self.normalize_fn(r), self.normalize_fn(z) + + r, z = tf.sigmoid(r), tf.sigmoid(z) + + h = tf.concat(axis=3, values=[x, s * r]) + h = Conv2D('conv_h', h, self.out_channel, self.kernel_shape, + nl=tf.identity, use_bias=(self.normalize_fn is None)) + + if self.normalize_fn is not None: + h = self.normalize_fn(h) + + h = self.nl(h) + s = (1 - z) * h + z * s + + self.state_tensor = s + + return s + + +@layer_register() +def ConvRNN(x, cell): + assert len(x.get_shape().as_list()) == 4, "input in ConvRNN should be B,H,W,C" + return cell(x) + + +@layer_register() +def ConvRNN_unroll(x, cell): + assert len(x.get_shape().as_list()) == 5, "input in ConvRNN should be B,T,H,W,C" + time_dim = x.get_shape().as_list()[1] + + outputs = [] + for t in range(time_dim): + outputs.append(cell(x[:, t, :, :, :])) + + return tf.stack(outputs, axis=1) diff --git a/additional_scripts/fba.py b/additional_scripts/fba.py new file mode 100644 index 0000000..ddf5a7d --- /dev/null +++ b/additional_scripts/fba.py @@ -0,0 +1,47 @@ +import argparse +import cv2 +import numpy as np +import glob as glob + +""" +re-implementation: +Removing Camera Shake via Weighted Fourier Burst Accumulation +""" + + +def rgb2gray(rgb): + return np.dot(rgb[..., :3], [0.299, 0.587, 0.114]) + + +def fba(stack, p=17): + m, n, = stack[0].shape + c = 1 + hu_p = np.zeros((m, n)) + 0j + w = np.zeros((m, n)) + + for img in stack: + hv_i = np.zeros((m, n, c)) + 0j + hv_i = np.fft.fft2(np.array(img).astype(float), axes=[0, 1]) + w_i = abs(hv_i) + hu_p = hu_p + (w_i**p + 0j) * hv_i + w = w + w_i**p + + u_p = np.zeros((m, n, c)) + 0j + u_p = np.fft.ifft2(hu_p[:, :] / (w), axes=[0, 1]) + return np.clip(u_p.real, 0, 1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--pattern', help='images used for fba (in glob)', required=True) + parser.add_argument('--out', help='output of fba', default='fba_result.png') + parser.add_argument('--p', action='parameter of fba', type=int, default=17) + args = parser.parse_args() + + images = glob.glob(args.pattern) + batch = [] * len(images) + for i in range(len(images)): + batch[i] = rgb2gray(cv2.imread(images[i]) / 255.) + + sharp = fba(batch, args.p) + cv2.imwrite(args.out, sharp.astype(float)) diff --git a/additional_scripts/learning_blind_motion_deblurring_multiscale.py b/additional_scripts/learning_blind_motion_deblurring_multiscale.py new file mode 100644 index 0000000..0874af1 --- /dev/null +++ b/additional_scripts/learning_blind_motion_deblurring_multiscale.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Meenal Baheti, Patrick Wieschollek + +import argparse +import tensorflow as tf +from data_provider import get_data as YoutubeData # noqa +from tensorpack import * +from tensorpack.tfutils.summary import add_moving_summary +from tensorpack.utils import logger +from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope +import tensorpack.tfutils.symbolic_functions as symbf +import glob + +""" +Learning Blind Motion Deblurring (Multi -Scale version) +""" + +SEQ_LEN = 5 +BATCH_SIZE = 8 +SHAPE = 128 +LEVELS = 3 + + +def ReluConv2D(name, x, out_channels, use_relu=True, kernel_shape=3, stride=1): + if use_relu: + x = tf.nn.relu(x, name='%s_relu' % name) + x = Conv2D('%s_conv' % name, x, out_channels, kernel_shape=kernel_shape, stride=stride) + return x + + +def ReluDeconv2D(name, x, out_channels, kernel_shape=3, stride=1): + x = tf.nn.relu(x, name='%s_relu' % name) + x = Deconv2D('%s_deconv' % name, x, out_channels, kernel_shape=kernel_shape, stride=stride) + return x + + +def resize(small, name): + """resize "small" to shape of "large" + """ + # # just resize to reduce parameters from 7043997 to ... TODOOOOOOOOO + # return resize_by_factor(x, 2) + with tf.variable_scope('resize'): + out_channels = small.get_shape().as_list()[3] + # need to add name as argument in function. + small = Deconv2D('spatial_skip_deconv_%s' % name, small, out_channels, kernel_shape=4, stride=2) + small = tf.nn.relu(small, name='spatial_skip_relu_%s' % name) + return small + + +def resize_by_factor(x, f): + with tf.name_scope('resize'): + """resize "small" to shape of "large" + """ + height, width = x.get_shape().as_list()[1:3] + return tf.image.resize_images(x, [int(height * f), int(width * f)]) + + +def Merge(incoming_skip, ID, tensor, name): + with tf.name_scope('Merge_%s' % name): + if incoming_skip is None: + # we gonna fake the skip, to allow TF reuse variable and construct + # for this block a senseless conv + incoming_skip_internal = tensor + else: + # we really want to merge both layers + incoming_skip_internal = incoming_skip[ID] + hs, ws = incoming_skip_internal.get_shape().as_list()[1:3] + hl, wl = tensor.get_shape().as_list()[1:3] + + tmp_name = resize(incoming_skip_internal, name) + if (hs != hl) or (ws != wl): + incoming_skip_internal = tmp_name + channels = tensor.get_shape().as_list()[3] + tensor_internal = tf.concat([tensor, incoming_skip_internal], axis=3) + tensor_internal = ReluConv2D(name, tensor_internal, channels, kernel_shape=1) + + if incoming_skip is None: + # we have constructed the operation but just return the unmodified tensor itself + # workaround for '@auto_reuse_variable_scope' + # be aware this gives warnings "not gradient w.r.t. ..." + return tensor + else: + # we return the modified tensor + return tensor_internal + + +class Model(ModelDesc): + + def _get_inputs(self): + return [InputDesc(tf.float32, (None, SEQ_LEN, SHAPE, SHAPE, 3), 'blurry'), + InputDesc(tf.float32, (None, SEQ_LEN, SHAPE, SHAPE, 3), 'sharp')] + + @auto_reuse_variable_scope + def deblur_block(self, observation, estimate, + skip_temporal_in=None, + skip_spatial_in=None, + name=None): + """Apply one deblur step. + + Args: + observation: new unseen observation + estimate: latest estimate + skip_temporal_in (None, optional): list of skip_connections + skip_spatial_in(None, optional): list of connections between multi-scaled layers + skip_unet_out(None, optional): lsit of skip connections between deblurring blocks within the network . + """ + + skip_temporal_out = [] # green + skip_spatial_out = [] # from resized + skip_unet_out = [] # grey + + with tf.name_scope("deblur_block_%s" % name): + # be aware use_local_stat=True gives warnings + with argscope(BatchNorm, use_local_stat=True), \ + argscope([Conv2D, Deconv2D], nl=lambda x, name: BatchNorm(name, x)): + inputs = tf.concat([observation, estimate], 3) + + block = ReluConv2D('d0', inputs, 32, stride=1, kernel_shape=3) + + # H x W -> H/2 x W/2 + # --------------------------------------------------------------------- + block = ReluConv2D('d1_0', block, 64, stride=2) + block = Merge(skip_temporal_in, 2, block, 'd11_s') + block_start = block + block = ReluConv2D('d1_1', block, 64) + block = ReluConv2D('d1_2', block, 64) + block = ReluConv2D('d1_3', block, 64, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_A') + skip_spatial_out.append(block) + block = Merge(skip_spatial_in, 0, block, 'd14_s') + + # H/2 x W/2 -> H/2 x W/2 + # --------------------------------------------------------------------- + block = ReluConv2D('d2_0', block, 64) + block_start = block + block = ReluConv2D('d2_1', block, 64) + block = ReluConv2D('d2_2', block, 64) + block = ReluConv2D('d2_3', block, 64, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_B') + skip_spatial_out.append(block) + skip_unet_out.append(block) + block = Merge(skip_spatial_in, 1, block, 'd24_s') + + # H/2 x W/2 -> H/4 x W/4 + # --------------------------------------------------------------------- + block = ReluConv2D('d3_0', block, 128, stride=2) + block = Merge(skip_temporal_in, 1, block, 'd31_s') + block_start = block + block = ReluConv2D('d3_1', block, 128) + block = ReluConv2D('d3_2', block, 128) + block = ReluConv2D('d3_3', block, 128, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_C') + skip_spatial_out.append(block) + skip_unet_out.append(block) + block = Merge(skip_spatial_in, 2, block, 'd34_s') + + # H/4 x W/4 -> H/8 x W/8 + # --------------------------------------------------------------------- + block = ReluConv2D('d4_0', block, 256, stride=2) + block = Merge(skip_temporal_in, 0, block, 'd41_s') + block_start = block + block = ReluConv2D('d4_1', block, 256) + block = ReluConv2D('d4_2', block, 256) + block = ReluConv2D('d4_3', block, 256, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_D') + skip_temporal_out.append(block) + skip_spatial_out.append(block) + block = Merge(skip_spatial_in, 3, block, 'd44_s') + + # H/8 x W/8 -> H/4 x W/4 + # --------------------------------------------------------------------- + block = ReluDeconv2D('u1_0', block, 128, stride=2, kernel_shape=4) + block = tf.add(block, skip_unet_out[1], name='skip01') + block_start = block + block = ReluConv2D('u1_1', block, 128) + block = ReluConv2D('u1_2', block, 128) + block = ReluConv2D('u1_3', block, 128) + block = tf.add(block, block_start, name='block_skip_E') + skip_temporal_out.append(block) + skip_spatial_out.append(block) + block = Merge(skip_spatial_in, 4, block, 'u14_s') + + # H/4 x W/4 -> H/2 x W/2 + # --------------------------------------------------------------------- + block = ReluDeconv2D('u2_0', block, 64, stride=2, kernel_shape=4) + block = tf.add(block, skip_unet_out[0], name='skip02') + block_start = block + block = ReluConv2D('u2_1', block, 64) + block = ReluConv2D('u2_2', block, 64) + block = ReluConv2D('u2_3', block, 64) + block = tf.add(block, block_start, name='block_skip_F') + skip_temporal_out.append(block) + skip_spatial_out.append(block) + block = Merge(skip_spatial_in, 5, block, 'u24_s') + + # H/2 x W/2 -> H x W + # --------------------------------------------------------------------- + block = ReluDeconv2D('u3_0', block, 64, stride=2, kernel_shape=4) + block = ReluConv2D('u3_1', block, 64) + block = ReluConv2D('u3_2', block, 64) + block = ReluConv2D('u3_3', block, 6) + block = ReluConv2D('u3_4', block, 3) + estimate = tf.add(estimate, block, name='skip03') + # skip_spatial_out.append(estimate) + + return estimate, skip_spatial_out, skip_temporal_out + + def _build_graph(self, input_vars): + + # some loss functions and metrics to track performance + def l2_loss(x, y, name): + return tf.reduce_mean(tf.squared_difference(x, y), name=name) + + def l1_loss(x, y, name): + return tf.reduce_mean(tf.abs(x - y), name=name) + + def scaled_psnr(x, y, name): + return symbf.psnr(128. * (x + 1.0), 128. * (y + 1.), 255, name=name) + + def image_pyramid(img, levels=LEVELS): + """Resizing image to different shapes + + Args: + img: image with original size + levels (int, optional): number of resize steps + + Returns: + images from small to original + """ + with tf.name_scope('image_pyramid'): + pyramid = [img] + for i in range(levels - 1): + pyramid.append(resize_by_factor(img, 1. / (2**(i + 1)))) + return pyramid[::-1] + + # centered inputs [B, T, H, W, C] + blurry, sharp = input_vars + blurry = blurry / 128.0 - 1 + sharp = sharp / 128.0 - 1 + + # take last as target + expected_pyramid = image_pyramid(sharp[:, -1, :, :, :], levels=LEVELS) + estimate_pyramid = image_pyramid(blurry[:, -1, :, :, :], levels=LEVELS) + + # track some performance metrics + # never do dummy = [[]] * LEVELS + l2err_list, l1err_list, psnr_list, psnr_impro_list = [], [], [], [] + for l in range(LEVELS): + l2err_list.append([]) + l1err_list.append([]) + psnr_list.append([]) + psnr_impro_list.append([]) + # track the total costs for this model + cost_list = [] + + skip_spatial_out = [None] * LEVELS + skip_temporal_out = [None] * LEVELS + + estimate_viz = [] + + baseline_pyramid = image_pyramid(blurry[:, SEQ_LEN - 1, :, :, :], levels=LEVELS) + psnr_base = [scaled_psnr(x, y, name="PSNR_base") for x, y in zip(baseline_pyramid, expected_pyramid)] + + for t in range(1, SEQ_LEN): + logger.info("build time step: %i" % t) + # get observation at all scales in time step 't' + observation_pyramid = image_pyramid(blurry[:, SEQ_LEN - t - 1, :, :, :], levels=LEVELS) + + for l in range(LEVELS): + ll = LEVELS - l - 1 + logger.info("level: {} with input shape {}".format(ll, observation_pyramid[l].get_shape())) + # start with observation of smallest spatial size (l == 0) + skip_spatial_in = None if (l == 0) else skip_spatial_out[l - 1] + + estimate_pyramid[l], skip_spatial_out[l], skip_temporal_out[l] = \ + self.deblur_block(observation_pyramid[l], + estimate_pyramid[l], + skip_temporal_in=skip_temporal_out[l], + skip_spatial_in=skip_spatial_in, + name='level_%i_step_%i' % (ll, t)) + + l2err_list[l].append(l2_loss(estimate_pyramid[l], expected_pyramid[l], + name="L2loss_t%i_l%i" % (t, ll))) + l1err_list[l].append(l1_loss(estimate_pyramid[l], expected_pyramid[l], + name="L1loss_t%i_l%i" % (t, ll))) + psnr_list[l].append(scaled_psnr(estimate_pyramid[l], expected_pyramid[l], + name="PSNR_t%i_l%i" % (t, ll))) + pi = tf.divide(psnr_list[l][-1], psnr_base[l], name="PSNR_IMPRO_t%i_l%i" % (t, ll)) + psnr_impro_list[l].append(pi) + + # we just optimize the costs on level 0 + # (otherwise we get artifacts as the CNN tends to focus on optimizing level > 0 only) + if ll == 0: + cost_list.append(l2err_list[l][-1]) + + # naming estimates for grabbing during deployment + tf.identity((estimate_pyramid[l] + 1.0) * 128., name='estimate_t%i_l%i' % (t, ll)) + + if(l == LEVELS - 1): + estimate_viz.append(estimate_pyramid[l]) + + # just visualize original images + with tf.name_scope('visualization'): + + expected = sharp[:, -1, :, :, :] + estimate_viz = tf.concat(estimate_viz, axis=2) + observed = tf.concat([blurry[:, i, :, :, :] for i in range(SEQ_LEN)], axis=2) + + viz = tf.concat([observed, estimate_viz, expected], axis=2, name='estimates') + viz = 128.0 * (viz + 1.0) + viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') + tf.summary.image('blurry5_estimates5_expected', viz, max_outputs=max(30, BATCH_SIZE)) + + # total cost is sum of all individual losses + self.cost = tf.add_n(cost_list, name="total_cost") + add_moving_summary(self.cost) + + for l in range(LEVELS): + add_moving_summary(l2err_list[l] + l1err_list[l] + psnr_list[l] + psnr_impro_list[l]) + + def _get_optimizer(self): + lr = symbolic_functions.get_scalar_var('learning_rate', 0.005, summary=True) + return tf.train.AdamOptimizer(lr) + + +def get_config(batch_size): + logger.auto_set_dir('n') + lmdbs = glob.glob('/graphics/projects/scratch/wieschol/YouTubeDataset/train*.lmdb') + ds_train = [YoutubeData(lmdb, shape=(128, 128), ego_motion_size=17) for lmdb in lmdbs] + ds_train = RandomMixData(ds_train) + ds_train = BatchData(ds_train, BATCH_SIZE) + ds_train = PrefetchDataZMQ(ds_train, 8) + + lmdbs = glob.glob('/graphics/projects/scratch/wieschol/YouTubeDataset/val*.lmdb') + ds_val = [YoutubeData(lmdb, shape=(128, 128), ego_motion_size=17) for lmdb in lmdbs] + ds_val = RandomMixData(ds_val) + ds_val = BatchData(ds_val, BATCH_SIZE) + ds_val = FixedSizeData(ds_val, 100) + ds_val = PrefetchDataZMQ(ds_val, 8) + + steps_per_epoch = 1000 + + return TrainConfig(dataflow=ds_train, + callbacks=[ + ModelSaver(), + InferenceRunner(ds_val, [ScalarStats('total_cost'), + ScalarStats('PSNR_IMPRO_t%i_l0' % (SEQ_LEN - 1))]) + ], + extra_callbacks=[ + MovingAverageSummary(), + ProgressBar(['tower0/PSNR_base', + 'tower0/PSNR_IMPRO_t%i_l0' % (SEQ_LEN - 1), + 'tower0/PSNR_IMPRO_t%i_l1' % (SEQ_LEN - 1), + 'tower0/PSNR_IMPRO_t%i_l2' % (SEQ_LEN - 1), + ]), + MergeAllSummaries(), + RunUpdateOps() + ], + model=Model(), + steps_per_epoch=steps_per_epoch, + max_epoch=400) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') + parser.add_argument('--batch', help='batch-size', type=int, default=32) + parser.add_argument('--load', help='load model') + + args = parser.parse_args() + + NR_GPU = len(args.gpu.split(',')) + with change_gpu(args.gpu): + config = get_config(args.batch) + if args.load: + config.session_init = SaverRestore(args.load) + config.nr_tower = NR_GPU + SyncMultiGPUTrainer(config).train() diff --git a/data_provider.py b/data_provider.py new file mode 100644 index 0000000..a797782 --- /dev/null +++ b/data_provider.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Patrick Wieschollek + +from tensorpack import * +import tensorpack as tp +import argparse +import cv2 +import numpy as np +import psf +from scipy import ndimage + +""" +Usage: + +python data_provider.py --lmdb mydb2.lmdb +""" + + +class PSF(tp.dataflow.DataFlow): + """TensorPack dataflow proxy for PSF-sampler + + Attributes: + kernel_shape (int): size of PSF kernel + multiple (int): number of psf for one step (could be re-written by tp.BatchData) + psf_gen (python-generator): generator producing PSF samples + """ + def __init__(self, kernel_shape=7, multiple=5): + self.kernel_shape = kernel_shape + self.multiple = multiple + self.psf_gen = psf.PSF(kernel_size=kernel_shape) + + def reset_state(self): + pass + + def size(self): + return 100000000 + + def get_data(self): + sampler = self.psf_gen.sample() + while True: + k = [] + for _ in range(self.multiple): + k.append(next(sampler)) + yield k + + +class Blur(tp.dataflow.DataFlow): + """Apply blur from SPF kernels to incoming images. + + This yields [blurry1, blurry2, ... blurry5, sharp1, sharp2, ..., sharp5] + + Attributes: + ds_images: dataflow producing image-bursts (should already contain motion blur). + ds_psf: dataflow producing psf kernels + """ + def __init__(self, ds_images, ds_psf): + self.ds_images = ds_images + self.ds_psf = ds_psf + + def reset_state(self): + self.ds_images.reset_state() + self.ds_psf.reset_state() + + def size(self): + return self.ds_images.size() + + def get_data(self): + + image_iter = self.ds_images.get_data() + psf_iter = self.ds_psf.get_data() + + for dp_image in image_iter: + + # sample camera shake kernel + dp_psf = next(psf_iter) + + # synthesize ego-motion + for t, k in enumerate(dp_psf): + blurry = dp_image[t] + for c in range(3): + blurry[:, :, c] = ndimage.convolve(blurry[:, :, c], k, mode='constant', cval=0.0) + dp_image[t] = blurry + + yield dp_image + + +def get_lmdb_data(lmdb_file): + + class Decoder(MapData): + """compress images into JPEG format""" + def __init__(self, df): + def func(dp): + return [cv2.imdecode(np.asarray(bytearray(i), dtype=np.uint8), cv2.IMREAD_COLOR) for i in dp] + super(Decoder, self).__init__(df, func) + + ds = LMDBDataPoint(lmdb_file, shuffle=True) + ds = Decoder(ds) + return ds + + +def get_data(lmdb_file, shape=(256, 256), ego_motion_size=[17, 25, 35, 71]): + + # s = (shape[0] + 2 * max(ego_motion_size), shape[1] + 2 * max(ego_motion_size)) + s = (306, 306) + + ds_img = get_lmdb_data(lmdb_file) + # to remove hints from border-handling we crop a slightly larger regions ... + ds_img = AugmentImageComponents(ds_img, [imgaug.RandomCrop(s)], index=range(10), copy=True) + # .. and then apply the PSF kernel .... + + ds_psf = [PSF(kernel_shape=m) for m in ego_motion_size] + ds_psf = RandomChooseData(ds_psf) + + ds = Blur(ds_img, ds_psf) + # ... before the final crop + ds = AugmentImageComponents(ds, [imgaug.CenterCrop(shape)], index=range(10), copy=True) + + def combine(x): + nr = len(x) + blurry = np.array(x[:nr // 2]) + sharp = np.array(x[nr // 2:]) + return [blurry, sharp] + ds = MapData(ds, combine) + return ds + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--lmdb', type=str, help='path to lmdb', required='True') + parser.add_argument('--num', type=int, help='display window', default=5) + parser.add_argument('--show', help='display window instead of writing', action='store_true') + + args = parser.parse_args() + + ds = get_data(args.lmdb, shape=(256, 256), ego_motion_size=[17, 25, 35, 71]) + ds.reset_state() + + for counter, dp in enumerate(ds.get_data()): + # from IPython import embed + # embed() + + blurry = dp[0] + blurry = [blurry[i, ...] for i in range(5)] + blurry = np.concatenate(blurry, axis=1) + + sharp = dp[1] + sharp = [sharp[i, ...] for i in range(5)] + sharp = np.concatenate(sharp, axis=1) + + out = np.concatenate([blurry, sharp], axis=1)[:, :, ::-1] + + if args.show: + cv2.imshow('stacked_blurry', out) + cv2.waitKey(0) + else: + cv2.imwrite('/tmp/stacked_data_%i.jpg' % counter, out) + + if counter > args.num: + break diff --git a/data_sampler.py b/data_sampler.py new file mode 100644 index 0000000..a67c334 --- /dev/null +++ b/data_sampler.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Patrick Wieschollek + +import numpy as np + +import tensorpack as tp +from tensorpack import * +import cv2 +import video +import argparse +import os +from glob import glob + +""" +Sampling a burst of consecutive frames for motion blur. + +Example: + + python data_sampler.py --pattern '/graphics/scratch/wieschol/YouTubeDataset/train/*_blurry.mp4' \ + --lmdb /tmp/train.lmdb +""" + + +def get_video(pattern, passes=10, rng=None): + + # they cause a 'moov atom not found' warning + # ignores = ['PsrPTpg6mNo', 'SAyOr2hTRkM', 'wxZO2UjBw'] + + video_list = glob(pattern) + if rng: + rng.shuffle(video_list) + + for _ in range(passes): + for fn in video_list: + + # # ignore some videos + # for i in ignores: + # if i in fn: + # continue + + sharp_video = fn.replace("_blurry.mp4", "_sharp.mp4") + if os.path.isfile(sharp_video): + yield fn + + +def get_random_sharp_frames(fn_seq, window_size=5, avg_frames=1, dark_tresh=8, + number_of_picked_frames=100, max_attemps=100, rng=None): + """Find good frames from videos (poor mans) + + Args: + fn (str): path to video (should end with __blurry.mp4) + window_size (int, optional): number of subsequent frame + avg_frames (int, optional): number subframes to average (param "L" in the paper) + dark_tresh (int, optional): some magic treshold (first guess) + number_of_picked_frames (int, optional): number of frames we wafrom this video + max_attemps (int, optional): a hyperparameter describing our patience + + Yields: + TYPE: tuple (blurry, sharp) + """ + + try: + for fn in fn_seq: + assert avg_frames % 2 == 1 + + if not os.path.isfile(fn): + raise video.ReadFrameException("video %s does not exists" % fn) + + fn_blurry = fn.replace("_blurry.mp4", "_sharp.mp4") + if not os.path.isfile(fn_blurry): + raise video.ReadFrameException("video %s does not exists" % fn_blurry) + + print fn + + blurry_video = video.Reader(fn) + sharp_video = video.Reader(fn_blurry) + + frameIdxs = (blurry_video.frames - window_size * avg_frames * 2) // (2 * avg_frames) + frameIdxs = np.arange(frameIdxs) * (2 * avg_frames) + window_size * avg_frames + if rng: + rng.shuffle(frameIdxs) + + used_frames = 0 + attemps = 0 + + # guess a frame by random + for frameId in frameIdxs: + + # get to that location + blurry_video.jump(frameId) + sharp_video.jump(frameId) + + # start collection from current position + sharp_frames = [] + blurry_frames = [] + + # assume it is a nice one + valid_frame = True + attemps += 1 + + if number_of_picked_frames < 0: + break + + if attemps > max_attemps: + yield None + + # now collect subsequent frames + for _ in xrange(window_size): + reference_sharp = None + subframes = [] + + current_sharp = None + last_sharp = None + + # subframes averaging + for c, a in enumerate(xrange(-avg_frames + 1, avg_frames)): + + # do we already have a sharp one + if current_sharp: + last_sharp = current_sharp + + current_sharp = sharp_video.read()[:, :, [2, 1, 0]] + curent_blurry = blurry_video.read()[:, :, [2, 1, 0]] + + if curent_blurry.shape[2] is not 3: + raise ReadFrameException('blurry frame has no 3 channels {}'.format(curent_blurry.shape)) + if current_sharp.shape[2] is not 3: + raise ReadFrameException('blurry frame has no 3 channels {}'.format(current_sharp.shape)) + if curent_blurry.shape[0] < 100 or curent_blurry.shape[1] < 100: + raise ReadFrameException('blurry frame is to small {}'.format(curent_blurry.shape)) + if current_sharp.shape[0] < 100 or current_sharp.shape[1] < 100: + raise ReadFrameException('sharp frame is to small {}'.format(curent_blurry.shape)) + + # test if too dark + if current_sharp.astype("float32").mean() < dark_tresh: + print "too dark", current_sharp.mean(), "vs", dark_tresh + valid_frame = False + break + + # test too similar + if a > 0: + if np.mean((last_sharp.astype("float32") - current_sharp.astype("float32"))**2) < 0.2: + print "too similar" + valid_frame = False + break + + # middle of average + if a == 0: + reference_sharp = current_sharp.astype(float) + subframes.append(curent_blurry.astype(float)) + + if not valid_frame: + break + + used_frames += 1 + if valid_frame: + blurry_frames.append(np.array(subframes).mean(axis=0)) + sharp_frames.append(reference_sharp) + + if len(blurry_frames) == window_size: + number_of_picked_frames -= 1 + yield [np.array(blurry_frames).astype('uint8'), np.array(sharp_frames).astype('uint8')] + except video.ReadFrameException as e: + print(e) + except Exception as e: + print e + + +def get_good_patches(frame_gen, patch_size=512, tresh=0.4, number_of_picked_patches=10, rng=None): + for frames in frame_gen: + for _ in range(number_of_picked_patches): + blurry, sharp = frames + crop_shape = (patch_size, patch_size) + orig_shape = blurry.shape[1:3] + + # randomly sample some patches + diffh = orig_shape[0] - crop_shape[0] + diffw = orig_shape[1] - crop_shape[1] + if diffh < 1: + print "diffh is too small {} in shapes {} / {}".format(diffh, orig_shape, crop_shape) + break + if diffw < 1: + print "diffw is too small {} in shapes {} / {}".format(diffw, orig_shape, crop_shape) + break + + r_h = np.random.randint(diffh) if not rng else rng.randint(diffh) + h0 = 0 if diffh == 0 else r_h + + r_w = np.random.randint(diffw) if not rng else rng.randint(diffw) + w0 = 0 if diffw == 0 else r_w + + blurry_patches = blurry[:, h0:h0 + crop_shape[0], w0:w0 + crop_shape[1], :] + sharp_patches = sharp[:, h0:h0 + crop_shape[0], w0:w0 + crop_shape[1], :] + # now some tests + + # # patch is too similar + # mse = np.mean((blurry_patches - sharp_patches)**2) + # if mse < 1: + # pass + # print "mse", mse + + dark_mean = np.mean((blurry_patches)) + if dark_mean < 20: + print "--> too dark {} vs. {}".format(dark_mean, 20) + continue + # print "dark_mean", dark_mean + + # reject gradients (content test) + # -------------------------------------------------- + def image_gradient(x): + if len(x.shape) == 2: + gx, gy = np.gradient(x) + return gx, gy + else: + gx, gy, gz = np.gradient(x) + return gy, gz + + # print (sharp_patches[0]) # It reads the first patch from a sequence of 5 patches? Matrix was output + + dx, dy = image_gradient(sharp_patches[0]) + + dx = np.sum((np.sign(np.abs(dx) - 0.05) + 1.) / 2.) + dy = np.sum((np.sign(np.abs(dy) - 0.05) + 1.) / 2.) + ps = sharp.shape[0] * sharp.shape[1] + + if (dx < tresh * ps) or (dy < tresh * ps): + print "--> grad dx {} vs. {}".format(dx, tresh * ps) + print "--> grad dy {} vs. {}".format(dy, tresh * ps) + continue + else: + pass + # print "dx, dy, ps", dx, dy, ps + + # reject psnr input if not blurry enought + # -------------------------------------------------- + def psnr(prediction, ground_truth, maxp=None): + def log10(x): + numerator = np.log(x) + denominator = np.log(10.) + return numerator / denominator + + mse = np.mean((prediction - ground_truth) ** 2) + try: + psnr = -10 * log10(mse) + if maxp: + psnr += 20.0 * log10(maxp) + return psnr + except Exception: + return 100000000 + + current_psnr = psnr(blurry_patches[0, ...], sharp_patches[0, ...], 255.) + if current_psnr < 40.: + yield [blurry_patches, sharp_patches] + else: + print "PSNR to hight {} vs. {}".format(current_psnr, 40.) + + +class VideoPatchesFlow(tp.dataflow.RNGDataFlow): + """Create a burst of""" + def __init__(self, pattern, window_size=5, nr_examples=10): + super(VideoPatchesFlow, self).__init__() + + self.pattern = pattern + self.window_size = window_size + self.nr_examples = nr_examples + from tensorpack.utils import get_rng + self.rng = get_rng(self) + + def reset_state(self): + """ Reset the RNG """ + self.rng = get_rng(self) + + def get_data(self): + def encoder(img): + return np.asarray(bytearray(cv2.imencode('.jpg', img)[1].tostring()), dtype=np.uint8) + + video_list = get_video(self.pattern, passes=10000, rng=self.rng) + frame_list = get_random_sharp_frames(video_list, window_size=self.window_size, + number_of_picked_frames=30, rng=self.rng) + for b, s in get_good_patches(frame_list, number_of_picked_patches=10, rng=self.rng): + values = [] + for i in range(self.window_size): + b_enc = encoder(b[i]) + values.append(b_enc) + + for i in range(self.window_size): + s_enc = encoder(s[i]) + values.append(s_enc) + yield values + self.nr_examples -= 1 + if self.nr_examples == 0: + break + + def size(self): + return self.nr_examples + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--lmdb', type=str, help='path to lmdb') + parser.add_argument('--action', type=str, help='path to lmdb', default='create') + # '/graphics/scratch/wieschol/YouTubeDataset/train/*_blurry.mp4' + parser.add_argument('--pattern', type=str, help='pattern for blurry videos', required='True') + parser.add_argument('--num', type=int, help='number of bursts', required='True') + args = parser.parse_args() + + df = VideoPatchesFlow(args.pattern, nr_examples=args.num) + + if args.action == 'create': + assert args.lmdb is not None + df = PrefetchDataZMQ(df, nr_proc=32) + dftools.dump_dataflow_to_lmdb(df, args.lmdb) + + if args.action == 'debug': + class Decoder(MapData): + """compress images into JPEG format""" + def __init__(self, df): + def func(dp): + return [cv2.imdecode(np.asarray(bytearray(i), dtype=np.uint8), cv2.IMREAD_COLOR) for i in dp] + super(Decoder, self).__init__(df, func) + df = Decoder(df) + df.reset_state() + + for dp in df.get_data(): + nr = len(dp) + blurry = dp[:nr // 2] + sharp = dp[nr // 2:] + + stacked_blurry = np.hstack(blurry) + stacked_sharp = np.hstack(sharp) + + cv2.imshow('blurry', stacked_blurry) + cv2.imshow('sharp', stacked_sharp) + cv2.waitKey(0) diff --git a/download_videos.sh b/download_videos.sh new file mode 100644 index 0000000..1637cfa --- /dev/null +++ b/download_videos.sh @@ -0,0 +1,40 @@ +# these are some videos, we will add the entire collection later + +TARGET_DIR=/tmp/videos + +cd ${TARGET_DIR} +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=nZQQUShfdEs' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=a1D6xizxLXg' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=TN-pwblNxU4' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=tO01J-M3g0U' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=hVzL48VgYZQ' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=SMwxZO2UjBw' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=QPdWJeybMo8' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=a1D6xizxLXg' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=nZQQUShfdEs' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=4Epudzs5GYo' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=sLprVF6d7Ug' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=8ZEriMtypas' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=SsbYwtLLgwM' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=A8s7dh_SJHQ' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=mkggXE5e2yk' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=o_24LPjOIHI' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=Z-BaOvkkIfc' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=o_24LPjOIHI' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=hvj4GePMEnU' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=l_-gNiiKeIU' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=9KBwSA6tgeo' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=mvfdGJVjLto' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=SYNjDs1PR9c' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=w2H07DRv2_M' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=IlXhywbggFY' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=fbumPrvwWmo' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=nCWqPa20kmY' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=I7ZRseJlZS4' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=Jjhyboay6y0' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=Z6lYqgWAmrg&t=4s' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=WA3JT3nRGU8' +youtube-dl -f bestvideo 'https://www.youtube.com/watch?v=9ge2gx69ftk' +youtube-dl -f bestvideo 'https://www.youtube.com/playlist?list=PLD33E5618740295DF' +youtube-dl -f bestvideo 'https://www.youtube.com/playlist?list=PL-NzkgWzAqPzErfdIWhmMV5l7vkG2nfOz' +youtube-dl -f bestvideo 'https://www.youtube.com/playlist?list=PL-NzkgWzAqPyR6LO8oR-JBdebcOwva_7h' \ No newline at end of file diff --git a/learning_blind_motion_deblurring.py b/learning_blind_motion_deblurring.py new file mode 100644 index 0000000..6bc51ca --- /dev/null +++ b/learning_blind_motion_deblurring.py @@ -0,0 +1,246 @@ + +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Patrick Wieschollek + +import argparse +import tensorflow as tf +import numpy as np +from data_provider import get_data as YoutubeData +from tensorpack import * +from tensorpack.tfutils.summary import add_moving_summary +from tensorpack.utils import logger +from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope +import tensorpack.tfutils.symbolic_functions as symbf +import glob + +""" +Learning Blind Motion Deblurring +""" + +SEQ_LEN = 5 +BATCH_SIZE = 32 + + +def ReluConv2D(name, x, out_channels, use_relu=True, kernel_shape=3, stride=1): + if use_relu: + x = tf.nn.relu(x, name='%s_relu' % name) + x = Conv2D('%s_conv' % name, x, out_channels, kernel_shape=kernel_shape, stride=stride) + return x + + +def ReluDeconv2D(name, x, out_channels, kernel_shape=3, stride=1): + x = tf.nn.relu(x, name='%s_relu' % name) + x = Deconv2D('%s_deconv' % name, x, out_channels, kernel_shape=kernel_shape, stride=stride) + return x + + +class Model(ModelDesc): + + def _get_inputs(self): + return [InputDesc(tf.float32, (None, SEQ_LEN, None, None, 3), 'blurry'), + InputDesc(tf.float32, (None, SEQ_LEN, None, None, 3), 'sharp')] + + @auto_reuse_variable_scope + def deblur_block(self, observation, estimate, block_id=0, skip=None): + """apply one deblur step + Args: + observation: new unseen observation + estimate: latest estimate + block_id (int, optional): running id in recurrent structure + skip (None, optional): list of skip_connections + """ + logger.info("create deblur_block %i" % block_id) + with tf.name_scope("deblur_block"): + with argscope(BatchNorm, use_local_stat=True), \ + argscope([Conv2D, Deconv2D], nl=lambda x, name: BatchNorm(name, x)): + inputs = tf.concat([observation, estimate], 3) + d02 = ReluConv2D('d0', inputs, 32, stride=1, kernel_shape=3) + + # H x W -> H/2 x W/2 + d11 = ReluConv2D('d1_0', d02, 64, stride=2) + d12 = ReluConv2D('d1_1', d11, 64) + d13 = ReluConv2D('d1_2', d12, 64) + d14 = ReluConv2D('d1_3', d13, 64, kernel_shape=1) + d14 = tf.add(d11, d14, name='block_skip_A') + + # H/2 x W/2 -> H/2 x W/2 (dilated here?) + d21 = ReluConv2D('d2_0', d14, 64) + d22 = ReluConv2D('d2_1', d21, 64) + d23 = ReluConv2D('d2_2', d22, 64) + d24 = ReluConv2D('d2_3', d23, 64, kernel_shape=1) + d24 = tf.add(d21, d24, name='block_skip_B') + + # H/2 x W/2 -> H/4 x W/4 + d31 = ReluConv2D('d3_0', d21, 128, stride=2) + d32 = ReluConv2D('d3_1', d31, 128) + d33 = ReluConv2D('d3_2', d32, 128) + d34 = ReluConv2D('d3_3', d33, 128, kernel_shape=1) + d34 = tf.add(d31, d34, name='block_skip_C') + + # H/4 x W/4 -> H/8 x W/8 + d41 = ReluConv2D('d4_0', d31, 256, stride=2) + if len(skip) == 0: + skip.append(tf.zeros_like(d41)) + # -- begin temporal skip ----------- + skip[0] = tf.concat([d41, skip[0]], axis=3) + skip[0] = ReluConv2D('s1', skip[0], 256, kernel_shape=1) + if block_id > 1: + d41 = skip[0] + # -- end temporal skip ----------- + d42 = ReluConv2D('d4_1', d41, 256) + d43 = ReluConv2D('d4_2', d42, 256) + d44 = ReluConv2D('d4_3', d43, 256, kernel_shape=1) + skip[0] = ReluConv2D('so1', d44, 256) + d44 = tf.add(d41, d44, name='block_skip_D') + + # H/8 x W/8 -> H/4 x W/4 + u11 = ReluDeconv2D('u1_0', d44, 128, stride=2, kernel_shape=4) + + print 'u11', u11.get_shape() + print 'd34', d44.get_shape() + u11 = tf.add(u11, d34, name='skip01') + u12 = ReluConv2D('u1_1', u11, 128) + if len(skip) == 1: + skip.append(tf.zeros_like(u12)) + # -- begin temporal skip ----------- + skip[1] = tf.concat([u12, skip[1]], axis=3) + skip[1] = ReluConv2D('s2', skip[1], 128, kernel_shape=1) + if block_id > 1: + u12 = skip[1] + # -- end temporal skip ----------- + u13 = ReluConv2D('u1_2', u12, 128) + skip[1] = ReluConv2D('so2', u13, 128) + u14 = ReluConv2D('u1_3', u13, 128) + u14 = tf.add(u14, u11, name='block_skip_E') + + # H/4 x W/4 -> H/2 x W/2 + u21 = ReluDeconv2D('u2_0', u14, 64, stride=2, kernel_shape=4) + u21 = tf.add(u21, d24, name='skip02') + u22 = ReluConv2D('u2_1', u21, 64) + if len(skip) == 2: + skip.append(tf.zeros_like(u22)) + # -- begin temporal skip ----------- + skip[2] = tf.concat([u22, skip[2]], axis=3) + skip[2] = ReluConv2D('s3', skip[2], 64, kernel_shape=1) + if block_id > 1: + u22 = skip[2] + # -- end temporal skip ----------- + u23 = ReluConv2D('u2_2', u22, 64) + skip[2] = ReluConv2D('so32', u23, 64) + u24 = ReluConv2D('u2_3', u23, 64) + u24 = tf.add(u24, u21, name='block_skip_F') + + # H/2 x W/2 -> H x W + u31 = ReluDeconv2D('u3_0', u24, 64, stride=2, kernel_shape=4) + u32 = ReluConv2D('u3_1', u31, 64) + u33 = ReluConv2D('u3_2', u32, 64) + u34 = ReluConv2D('u3_3', u33, 6) + u35 = ReluConv2D('u3_4', u34, 3) + estimate = tf.add(estimate, u35, name='skip03') + + return estimate, skip + + def _build_graph(self, input_vars): + + # centered inputs [B, T, H, W, C] + blurry, sharp = input_vars + blurry = blurry / 128.0 - 1 + sharp = sharp / 128.0 - 1 + + # take last as target + expected = sharp[:, -1, :, :, :] + estimate = blurry[:, -1, :, :, :] + + l2err_list, l1err_list, psnr_list, psnr_impro_list = [], [], [], [] + + skip = [] + # skip.append(tf.constant(np.zeros((BATCH_SIZE, 16, 16, 256), dtype=np.float32))) + # skip.append(tf.constant(np.zeros((BATCH_SIZE, 32, 32, 128), dtype=np.float32))) + # skip.append(tf.constant(np.zeros((BATCH_SIZE, 64, 64, 64), dtype=np.float32))) + + psnr_i = symbf.psnr(128. * (estimate + 1.0), 128. * (expected + 1.), 255, name="psnr_0") + psnr_list.append(psnr_i) + + estimates = [] + for t in range(1, SEQ_LEN): + observation = blurry[:, -1 - t, :, :, :] + estimate, skip = self.deblur_block(observation, estimate, t, skip=skip) + estimates.append(estimate) + + # tracking losses + l2err_list.append(tf.reduce_mean(tf.squared_difference(estimate, expected), name="L2loss_block_%i" % t)) + l1err_list.append(tf.reduce_mean(tf.abs(estimate - expected), name='L1loss_block%i' % t)) + + # tracking psnr + psnr = symbf.psnr(128. * (estimate + 1.0), 128. * (expected + 1.), 255, name="psnr_%i" % t) + psnr_list.append(psnr) + psnr_impro_list.append(tf.divide(psnr, psnr_i, name="psnr_improv_%i" % t)) + + # naming estimates for grabbing during deployment + tf.identity((estimate + 1.0) * 128., name='estimate_%i' % t) + + with tf.name_scope('visualization'): + viz = tf.unstack(blurry, num=SEQ_LEN, axis=1) + estimates + [expected] + viz = 128.0 * (tf.concat(viz, axis=2, name='estimates') + 1.0) + viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') + tf.summary.image('blurry5_estimates5_expected', viz, max_outputs=max(30, BATCH_SIZE)) + + # total cost is sum of all individual losses + self.cost = tf.add_n(l2err_list, name="total_cost") + add_moving_summary([self.costdil01] + l2err_list + l1err_list + psnr_list + psnr_impro_list) + + with tf.name_scope('histograms'): + tf.summary.histogram('l2err_list', tf.stack([tf.expand_dims(d, -1) for d in l2err_list], axis=1)) + tf.summary.histogram('l1err_list', tf.stack([tf.expand_dims(d, -1) for d in l1err_list], axis=1)) + tf.summary.histogram('psnr_list', tf.stack([tf.expand_dims(d, -1) for d in psnr_list], axis=1)) + tf.summary.histogram('psnr_impro_list', tf.stack([tf.expand_dims(d, -1) for d in psnr_impro_list], axis=1)) + + def _get_optimizer(self): + lr = symbolic_functions.get_scalar_var('learning_rate', 0.005, summary=True) + return tf.train.AdamOptimizer(lr) + + +def get_config(batch_size): + logger.auto_set_dir() + lmdbs = glob.glob('/graphics/projects/scratch/wieschol/YouTubeDataset/train*.lmdb') + ds = [YoutubeData(lmdb, shape=(128, 128), ego_motion_size=[17, 25, 35, 71]) for lmdb in lmdbs] + dataset_train = RandomMixData(ds) + dataset_train = BatchData(dataset_train, BATCH_SIZE) + dataset_train = PrefetchDataZMQ(dataset_train, 8) + + steps_per_epoch = 1000 + + return TrainConfig( + dataflow=dataset_train, + callbacks=[ + ModelSaver(), + ], + extra_callbacks=[ + MovingAverageSummary(), + ProgressBar(['tower0/psnr_%i' % (SEQ_LEN - 1), 'tower0/psnr_0', + 'tower0/psnr_improv_%i' % (SEQ_LEN - 1)]), + MergeAllSummaries(), + RunUpdateOps() + ], + model=Model(), + steps_per_epoch=steps_per_epoch, + max_epoch=400, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') + parser.add_argument('--batch', help='batch-size', type=int, default=32) + parser.add_argument('--load', help='load model') + + args = parser.parse_args() + + NR_GPU = len(args.gpu.split(',')) + with change_gpu(args.gpu): + config = get_config(args.batch) + if args.load: + config.session_init = SaverRestore(args.load) + config.nr_tower = NR_GPU + SyncMultiGPUTrainer(config).train() diff --git a/learning_blind_motion_deblurring_singlescale.py b/learning_blind_motion_deblurring_singlescale.py new file mode 100644 index 0000000..f92094b --- /dev/null +++ b/learning_blind_motion_deblurring_singlescale.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Patrick Wieschollek + +import argparse +import tensorflow as tf +from data_provider import get_data as YoutubeData # noqa +from tensorpack import * +from tensorpack.tfutils.summary import add_moving_summary +from tensorpack.utils import logger +from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope +import tensorpack.tfutils.symbolic_functions as symbf +import glob + +""" +Learning Blind Motion Deblurring +""" + +SEQ_LEN = 5 +BATCH_SIZE = 8 +SHAPE = 128 +LEVELS = 3 + + +def ReluConv2D(name, x, out_channels, use_relu=True, kernel_shape=3, stride=1): + if use_relu: + x = tf.nn.relu(x, name='%s_relu' % name) + x = Conv2D('%s_conv' % name, x, out_channels, kernel_shape=kernel_shape, stride=stride) + return x + + +def ReluDeconv2D(name, x, out_channels, kernel_shape=3, stride=1): + x = tf.nn.relu(x, name='%s_relu' % name) + x = Deconv2D('%s_deconv' % name, x, out_channels, kernel_shape=kernel_shape, stride=stride) + return x + + +def Merge(incoming_skip, ID, tensor, name): + with tf.name_scope('Merge_%s' % name): + if incoming_skip is None: + # we gonna fake the skip, to allow TF to reuse variable and construct + # for this block a senseless conv-layer + incoming_skip_internal = tensor + else: + # we really want to merge both layers + incoming_skip_internal = incoming_skip[ID] + hs, ws = incoming_skip_internal.get_shape().as_list()[1:3] + hl, wl = tensor.get_shape().as_list()[1:3] + + tmp_name = resize(incoming_skip_internal, name) + if (hs != hl) or (ws != wl): + incoming_skip_internal = tmp_name + channels = tensor.get_shape().as_list()[3] + tensor_internal = tf.concat([tensor, incoming_skip_internal], axis=3) + tensor_internal = ReluConv2D(name, tensor_internal, channels, kernel_shape=1) + + if incoming_skip is None: + # we have constructed the operation but just return the unmodified tensor itself + # workaround for '@auto_reuse_variable_scope' + # be aware this gives warnings "not gradient w.r.t. ..." + return tensor + else: + # we return the modified tensor + return tensor_internal + + +class Model(ModelDesc): + + def _get_inputs(self): + return [InputDesc(tf.float32, (None, SEQ_LEN, SHAPE, SHAPE, 3), 'blurry'), + InputDesc(tf.float32, (None, SEQ_LEN, SHAPE, SHAPE, 3), 'sharp')] + + @auto_reuse_variable_scope + def deblur_block(self, observation, estimate, + skip_temporal_in=None, + name=None): + """Apply one deblur step. + + Args: + observation: new unseen observation + estimate: latest estimate (the image which should be improved) + skip_temporal_in (None, optional): list of skip_connections + skip_unet_out(None, optional): lsit of skip connections between deblurring blocks within the network . + """ + + skip_temporal_out = [] # green + skip_unet_out = [] # grey + + with tf.name_scope("deblur_block_%s" % name): + # be aware use_local_stat=True gives warnings + with argscope(BatchNorm, use_local_stat=True), \ + argscope([Conv2D, Deconv2D], nl=lambda x, name: BatchNorm(name, x)): + inputs = tf.concat([observation, estimate], 3) + + block = ReluConv2D('d0', inputs, 32, stride=1, kernel_shape=3) + + # H x W -> H/2 x W/2 + # --------------------------------------------------------------------- + block = ReluConv2D('d1_0', block, 64, stride=2) + block = Merge(skip_temporal_in, 2, block, 'd11_s') + block_start = block + block = ReluConv2D('d1_1', block, 64) + block = ReluConv2D('d1_2', block, 64) + block = ReluConv2D('d1_3', block, 64, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_A') + + # H/2 x W/2 -> H/2 x W/2 + # --------------------------------------------------------------------- + block = ReluConv2D('d2_0', block, 64) + block_start = block + block = ReluConv2D('d2_1', block, 64) + block = ReluConv2D('d2_2', block, 64) + block = ReluConv2D('d2_3', block, 64, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_B') + skip_unet_out.append(block) + + # H/2 x W/2 -> H/4 x W/4 + # --------------------------------------------------------------------- + block = ReluConv2D('d3_0', block, 128, stride=2) + block = Merge(skip_temporal_in, 1, block, 'd31_s') + block_start = block + block = ReluConv2D('d3_1', block, 128) + block = ReluConv2D('d3_2', block, 128) + block = ReluConv2D('d3_3', block, 128, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_C') + skip_unet_out.append(block) + + # H/4 x W/4 -> H/8 x W/8 + # --------------------------------------------------------------------- + block = ReluConv2D('d4_0', block, 256, stride=2) + block = Merge(skip_temporal_in, 0, block, 'd41_s') + block_start = block + block = ReluConv2D('d4_1', block, 256) + block = ReluConv2D('d4_2', block, 256) + block = ReluConv2D('d4_3', block, 256, kernel_shape=1) + block = tf.add(block_start, block, name='block_skip_D') + skip_temporal_out.append(block) + + # H/8 x W/8 -> H/4 x W/4 + # --------------------------------------------------------------------- + block = ReluDeconv2D('u1_0', block, 128, stride=2, kernel_shape=4) + block = tf.add(block, skip_unet_out[1], name='skip01') + block_start = block + block = ReluConv2D('u1_1', block, 128) + block = ReluConv2D('u1_2', block, 128) + block = ReluConv2D('u1_3', block, 128) + block = tf.add(block, block_start, name='block_skip_E') + skip_temporal_out.append(block) + + # H/4 x W/4 -> H/2 x W/2 + # --------------------------------------------------------------------- + block = ReluDeconv2D('u2_0', block, 64, stride=2, kernel_shape=4) + block = tf.add(block, skip_unet_out[0], name='skip02') + block_start = block + block = ReluConv2D('u2_1', block, 64) + block = ReluConv2D('u2_2', block, 64) + block = ReluConv2D('u2_3', block, 64) + block = tf.add(block, block_start, name='block_skip_F') + skip_temporal_out.append(block) + + # H/2 x W/2 -> H x W + # --------------------------------------------------------------------- + block = ReluDeconv2D('u3_0', block, 64, stride=2, kernel_shape=4) + block = ReluConv2D('u3_1', block, 64) + block = ReluConv2D('u3_2', block, 64) + block = ReluConv2D('u3_3', block, 6) + block = ReluConv2D('u3_4', block, 3) + estimate = tf.add(estimate, block, name='skip03') + + return estimate, skip_temporal_out + + def _build_graph(self, input_vars): + + # some loss functions and metrics to track performance + def l2_loss(x, y, name): + return tf.reduce_mean(tf.squared_difference(x, y), name=name) + + def l1_loss(x, y, name): + return tf.reduce_mean(tf.abs(x - y), name=name) + + def scaled_psnr(x, y, name): + return symbf.psnr(128. * (x + 1.0), 128. * (y + 1.), 255, name=name) + + # centered inputs [B, T, H, W, C] + blurry, sharp = input_vars + blurry = blurry / 128.0 - 1 + sharp = sharp / 128.0 - 1 + + l2err_list, l1err_list, psnr_list, psnr_impro_list = [], [], [], [] + + skip_temporal_out = None + estimate_viz = [] + + psnr_base = scaled_psnr(blurry[:, SEQ_LEN - 1, :, :, :], sharp[:, -1, :, :, :], name="PSNR_base") + + for t in range(1, SEQ_LEN): + logger.info("build time step: %i" % t) + + # get observation at all scales in time step 't' + observation = blurry[:, SEQ_LEN - t - 1, :, :, :] + logger.info("level: {} with input shape {}".format(ll, observation[l].get_shape())) + + estimate, skip_temporal_out = \ + self.deblur_block(observation, + estimate, + skip_temporal_in=skip_temporal_out, + name='level_%i_step_%i' % (ll, t)) + + l2err_list.append(l2_loss(estimate, expected, name="L2loss_t%i" % (t))) + l1err_list.append(l1_loss(estimate, expected, name="L1loss_t%i" % (t))) + psnr_list.append(scaled_psnr(estimate, expected, name="PSNR_t%i" % (t))) + pi = tf.divide(psnr_list[-1], psnr_base, name="PSNR_IMPRO_t%i" % (t)) + psnr_impro_list[l].append(pi) + + # naming estimates for grabbing during deployment + tf.identity((estimate + 1.0) * 128., name='estimate_t%i_l%i' % (t)) + + if(l == LEVELS - 1): + estimate_viz.append(estimate[l]) + + # just visualize original images + with tf.name_scope('visualization'): + expected = sharp[:, -1, :, :, :] + estimate_viz = tf.concat(estimate_viz, axis=2) + observed = tf.concat([blurry[:, i, :, :, :] for i in range(SEQ_LEN)], axis=2) + + viz = tf.concat([observed, estimate_viz, expected], axis=2, name='estimates') + viz = 128.0 * (viz + 1.0) + viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') + tf.summary.image('blurry5_estimates5_expected', viz, max_outputs=max(30, BATCH_SIZE)) + + # total cost is sum of all individual losses + self.cost = tf.add_n(l2err_list, name="total_cost") + add_moving_summary(self.cost) + + for l in range(LEVELS): + add_moving_summary(l2err_list + l1err_list + psnr_list + psnr_impro_list) + + def _get_optimizer(self): + lr = symbolic_functions.get_scalar_var('learning_rate', 0.005, summary=True) + return tf.train.AdamOptimizer(lr) + + +def get_config(datadir, batch_size): + logger.auto_set_dir('n') + lmdbs = glob.glob(os.path.join(datadir, 'train*.lmdb')) + ds_train = [YoutubeData(lmdb, shape=(128, 128), ego_motion_size=17) for lmdb in lmdbs] + ds_train = RandomMixData(ds_train) + ds_train = BatchData(ds_train, BATCH_SIZE) + ds_train = PrefetchDataZMQ(ds_train, 8) + + lmdbs = glob.glob(os.path.join(datadir, 'val*.lmdb')) + ds_val = [YoutubeData(lmdb, shape=(128, 128), ego_motion_size=17) for lmdb in lmdbs] + ds_val = RandomMixData(ds_val) + ds_val = BatchData(ds_val, BATCH_SIZE) + ds_val = FixedSizeData(ds_val, 100) + ds_val = PrefetchDataZMQ(ds_val, 8) + + steps_per_epoch = 1000 + + return TrainConfig(dataflow=ds_train, + callbacks=[ + ModelSaver(), + InferenceRunner(ds_val, [ScalarStats('total_cost'), + ScalarStats('PSNR_IMPRO_t%i' % (SEQ_LEN - 1))]) + ], + extra_callbacks=[ + MovingAverageSummary(), + ProgressBar(['tower0/PSNR_base', + 'tower0/PSNR_IMPRO_t%i' % (SEQ_LEN - 1), + 'tower0/PSNR_IMPRO_t%i' % (SEQ_LEN - 1), + 'tower0/PSNR_IMPRO_t%i' % (SEQ_LEN - 1), + ]), + MergeAllSummaries(), + RunUpdateOps() + ], + model=Model(), + steps_per_epoch=steps_per_epoch, + max_epoch=400) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') + parser.add_argument('--batch', help='batch-size', type=int, default=32) + d = '/graphics/projects/scratch/wieschol/YouTubeDataset/' + parser.add_argument('--data', help='batch-size', type=str, default=d) + parser.add_argument('--load', help='load model') + + args = parser.parse_args() + + NR_GPU = len(args.gpu.split(',')) + with change_gpu(args.gpu): + config = get_config(args.data, args.batch) + if args.load: + config.session_init = SaverRestore(args.load) + config.nr_tower = NR_GPU + SyncMultiGPUTrainer(config).train() diff --git a/psf.py b/psf.py new file mode 100644 index 0000000..f1a474a --- /dev/null +++ b/psf.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Patrick Wieschollek + +""" +Sampling PSF kernels for synthetic camera shake. + +see "A machine learning approach for non-blind image deconvolution" (CVPR2013) +for details. +""" + +import numpy as np +import cv2 + + +class PSF(object): + """Generator for synthetic PSF kernels. + + Usage: + p = PSF(kernel_size=17) + for psf in p.sample(): + print(psf) + """ + @staticmethod + def cov_materniso(x, z, hyp=np.log([0.3, 1 / 4.]), d=3, i=None): + ell = np.exp(hyp[0]) + sf2 = np.exp(2. * hyp[1]) + + def f(t): + return 1. + t + + def df(t): + return t + + def m(t, f): + return f(t) * np.exp(-t) + + def dm(t, f): + return df(t) * np.exp(-t) * t + + def sq_dist(a, b): + if len(a.shape) == 1: + a = np.reshape(a, (-1, 1)) + if len(b.shape) == 1: + b = np.reshape(b, (-1, 1)) + return (a * a) + (b * b).T - 2 * np.matmul(a, b.T) + # return np.sum(a*a, 0).T[:, None] + np.sum(b*b, 0) - 2 * np.matmul(a.T, b) + + a = (np.sqrt(d) / ell) * x + b = (np.sqrt(d) / ell) * z + k = sq_dist(a, b) + k = sf2 * m(np.sqrt(k), f) + return k + + def __init__(self, kernel_size=7, trajectory_dim=2, num_interpolation=1000): + """Init a new PSF generator + + Args: + kernel_size (int, optional): size of psf kernel + trajectory_dim (int, optional): dimension of trajectory space + num_interpolation (int, optional): number of samples on trajectory + """ + super(PSF, self).__init__() + self.kernel_size = kernel_size + self.trajectory_dim = trajectory_dim + self.num_interpolation = num_interpolation + + self.means = .5 * np.ones((self.num_interpolation, 1)) + + t = np.linspace(0, 1, num_interpolation) + covs = PSF.cov_materniso(t, t) + self.C = np.linalg.cholesky(covs).T + + def sample(self): + """Yield a generated PSF + """ + max_length = self.kernel_size + sf = self.kernel_size + + def centerofmass(f): + sf = f.shape[0] + f = np.abs(f) + f = f / float(f.sum()) + i = np.sum(np.matmul(np.arange(1, sf + 1), f)) + j = np.sum(np.matmul(np.arange(1, sf + 1), f.T)) + return np.array([i, j]) + + def circshift(a, shift): + a = np.roll(a, shift[0] % a.shape[0], axis=0) + a = np.roll(a, shift[1] % a.shape[1], axis=1) + return a + + sample = np.zeros((self.num_interpolation, self.trajectory_dim)) + + while True: + for i in range(self.trajectory_dim): + sample[:, i] = (np.matmul(self.C.T, np.random.randn(self.C.shape[0], 1)) + self.means).flatten() + + max_length = sf + scaled = np.round((max_length - 1) * sample + 1 + 0.1) + + if ((scaled < 1).any() or (scaled > max_length).any()): + # out of bounds + continue + + scaled = scaled.astype(int) + + # Converts trajectory to blur kernel. + f = np.zeros((max_length, max_length)) + for i in scaled: + f[i[0] - 1, i[1] - 1] += 1 + + # Centers kernel up to one pixel precision, could also cause out of bounds. + shift = np.array([max_length + 1, max_length + 1]) / 2.0 + shift -= np.round(centerofmass(f) + 0.1) + shift = shift.astype(int) + f = circshift(f, shift) + + # Test if shift actually caused out of bounds. + test_center = np.round(centerofmass(f) + 0.1) + if np.sum(test_center - [(max_length + 1) // 2, (max_length + 1) // 2]) == 0: + # smooth kernel slightly + # fhh = centerofmass(f) + # print fhh + f = cv2.GaussianBlur(f, (3, 3), sigmaX=0.3, sigmaY=0.3) + f /= f.sum() + # subpixel shift + for k in range(3): + ff = np.copy(f) + shift = np.array([max_length + 1., max_length + 1.]) / 2.0 - centerofmass(ff) + affine_warp = np.array([[1., 0., shift[1]], + [0., 1., shift[0]]]).astype("float32") + + ff = cv2.warpAffine(ff.astype("float32"), affine_warp, (max_length, max_length)) + # print k, centerofmass(ff), ff.sum(), (self.kernel_size + 1) / 2. + if abs(ff.sum() - 1) > 1e-8: + continue + + ff /= ff.max() + ff /= ff.sum() + yield ff + else: + continue + +if __name__ == '__main__': + # usage is like this + p = PSF(kernel_size=35) + for k in p.sample(): + # for visualization --> rescale + viz = np.copy(k) + viz -= viz.min() + viz /= viz.max() + viz = cv2.resize(viz, (0, 0), fx=4, fy=4) + cv2.imshow('PSF', viz) + cv2.waitKey(0) diff --git a/synthblur/.clang_complete b/synthblur/.clang_complete new file mode 100644 index 0000000..ec5adba --- /dev/null +++ b/synthblur/.clang_complete @@ -0,0 +1 @@ +-Iinclude \ No newline at end of file diff --git a/synthblur/CMakeLists.txt b/synthblur/CMakeLists.txt new file mode 100644 index 0000000..fe1cf48 --- /dev/null +++ b/synthblur/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 2.8) +project( convert ) + +if(NOT CMAKE_CXX_STANDARD) # don't override + set(CMAKE_CXX_STANDARD 11) +endif() + +find_package( OpenCV REQUIRED ) +# find_package (glog) + +include_directories(include) + +add_executable( convert src/convert.cpp src/flow.cpp src/video.cpp src/meta.cpp src/blur.cpp) +target_link_libraries( convert ${OpenCV_LIBS} glog) diff --git a/synthblur/include/blur.h b/synthblur/include/blur.h new file mode 100644 index 0000000..cef6364 --- /dev/null +++ b/synthblur/include/blur.h @@ -0,0 +1,16 @@ +// Author: Patrick Wieschollek +#ifndef BLUR_H +#define BLUR_H + +#include + +#include "flow.h" + +class Blur +{ +public: + Blur(); + cv::Mat shift(const cv::Mat &anchor_, const Flow &flow_, float ratio); +}; + +#endif \ No newline at end of file diff --git a/synthblur/include/flow.h b/synthblur/include/flow.h new file mode 100644 index 0000000..a275fff --- /dev/null +++ b/synthblur/include/flow.h @@ -0,0 +1,33 @@ +// Author: Patrick Wieschollek +#ifndef FLOW_H +#define FLOW_H + +#include +#include +#include +#include + + +class Flow { + cv::Mat anchor; + cv::cuda::GpuMat motion; + cv::Ptr flow_estimator; + + cv::Vec3i colorWheel[55]; + + public: + Flow(); + void compute(cv::Mat a, cv::Mat b); + cv::Mat visualize(); + + const cv::cuda::GpuMat& get() const; + + cv::Mat shift(const cv::Mat &img, float ratio); + + void initColorWheel(); + bool isFlowCorrect(cv::Point2f u); + cv::Vec3b computeColor(float fx, float fy); + cv::Mat blur(const cv::Mat &img, std::vector ratios); + +}; +#endif \ No newline at end of file diff --git a/synthblur/include/meta.h b/synthblur/include/meta.h new file mode 100644 index 0000000..d7b84cf --- /dev/null +++ b/synthblur/include/meta.h @@ -0,0 +1,55 @@ +// Author: Patrick Wieschollek +#ifndef META_H +#define META_H + +#include +#include + +template +std::vector linspace(T start_in, T end_in, const int num_in) { + float start = static_cast(start_in); + float end = static_cast(end_in); + float num = static_cast(num_in); + float delta = (end - start) / (num - 1); + + std::vector linspaced; + for (int i = 0; i < num; ++i) { + linspaced.push_back(start + delta * i); + } + // linspaced.push_back(end); + return linspaced; +} + + +template +T clip(T val, T lower, T upper) { + val = (val < upper) ? val : upper - 1; + val = (val < lower) ? lower : val; + return val; +} + + +template +T getMean(const std::vector& images) { + if (images.empty()) return T(); + + cv::Mat accumulator(images[0].rows, images[0].cols, CV_32FC3, float(0.)); + cv::Mat temp; + for (int i = 0; i < images.size(); ++i) { + images[i].convertTo(temp, CV_32FC3); + accumulator += temp; + } + + accumulator.convertTo(accumulator, CV_8U, 1. / images.size()); + return accumulator; +} + +template +void pop_front(std::vector& vec) { + assert(!vec.empty()); + vec.erase(vec.begin()); +} + +cv::Mat scale(const cv::Mat input, float scale); + +#endif \ No newline at end of file diff --git a/synthblur/include/video.h b/synthblur/include/video.h new file mode 100644 index 0000000..ff60a6b --- /dev/null +++ b/synthblur/include/video.h @@ -0,0 +1,41 @@ +// Author: Patrick Wieschollek +#ifndef VIDEO_H +#define VIDEO_H + +#include + +#include +#include +#include + + +class VideoReader { + std::string path; + cv::VideoCapture hnd; + public: + VideoReader(std::string fn); + + void jump(unsigned int idx); + + VideoReader& operator >> (cv::Mat& matrix); + + double fps() const; + unsigned int frames() const; + unsigned int frame() const; + unsigned int height() const; + unsigned int width() const; + +}; + + +class VideoWriter { + std::string path; + cv::VideoWriter hnd; + int codec; + public: + VideoWriter(std::string fn, const int width, const int height, const float fps); + VideoWriter& operator << (const cv::Mat& matrix); +}; + + +#endif \ No newline at end of file diff --git a/synthblur/src/blur.cpp b/synthblur/src/blur.cpp new file mode 100644 index 0000000..dc3417d --- /dev/null +++ b/synthblur/src/blur.cpp @@ -0,0 +1,46 @@ +// Author: Patrick Wieschollek +#include "blur.h" +#include "meta.h" + +Blur::Blur() {} +cv::Mat Blur::shift(const cv::Mat &img, const Flow &d_flow, float ratio) { + cv::cuda::GpuMat planes[2]; + cv::cuda::split(d_flow.get(), planes); + + // optical flow + cv::Mat flowx(planes[0]); + cv::Mat flowy(planes[1]); + + // resulting image is original + shifted values + cv::Mat shifted_img; + img.convertTo(shifted_img, CV_32FC3); + + const int width = shifted_img.size().width; + const int height = shifted_img.size().height; + const int channels = shifted_img.channels(); + + // shifted versions of input image + for (int row = 0; row < height; ++row) { + for (int col = 0; col < width; ++col) { + // get offset for shifting in each direction + int shift_x = flowx.at(row, col); + int shift_y = flowy.at(row, col); + + // compute new coordinates + int col_ = ratio * (col + shift_x) + (1.0 - ratio) * col; + int row_ = ratio * (row + shift_y) + (1.0 - ratio) * row; + + // make sure, we are still within the image + col_ = clip(col_, 0, width); + row_ = clip(row_, 0, height); + + // add to previous image + for (int z = 0; z < channels; ++z) { + // TODO: this might also overlap + shifted_img.at(row_, col_)[z] = float(img.at(row, col)[z]); + + } + } + } + return shifted_img; +} \ No newline at end of file diff --git a/synthblur/src/convert.cpp b/synthblur/src/convert.cpp new file mode 100644 index 0000000..675df73 --- /dev/null +++ b/synthblur/src/convert.cpp @@ -0,0 +1,73 @@ +// Author: Patrick Wieschollek + +#include +#include +#include "meta.h" +#include "flow.h" +#include "blur.h" +#include "video.h" + + +int main(int argc, char const *argv[]) +{ + if (argc < 2) { + std::cerr << "Usage : " << argv[0] << "