diff --git a/COPYING b/COPYING index 1f3e6505..782c7bb6 100644 --- a/COPYING +++ b/COPYING @@ -1,14 +1,14 @@ -Copyright (c) 2014 The MatConvNet team. +Copyright (c) 2014-16 The MatConvNet Team. All rights reserved. Redistribution and use in source and binary forms are permitted provided that the above copyright notice and this paragraph are -duplicated in all such forms and that any documentation, -advertising materials, and other materials related to such -distribution and use acknowledge that the software was developed -by the . The name of the - may not be used to endorse or promote products derived -from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. \ No newline at end of file +duplicated in all such forms and that any documentation, advertising +materials, and other materials related to such distribution and use +acknowledge that the software was developed by the MatConvNet +Team. The name of the MatConvNet Team may not be used to endorse or +promote products derived from this software without specific prior +written permission. THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. \ No newline at end of file diff --git a/Makefile b/Makefile index 165729a7..246d8bda 100644 --- a/Makefile +++ b/Makefile @@ -20,10 +20,10 @@ DEBUG ?= ARCH ?= maci64 # Configure MATLAB -MATLABROOT ?= /Applications/MATLAB_R2015a.app +MATLABROOT ?= /Applications/MATLAB_R2017a.app # Configure CUDA and CuDNN. CUDAMETHOD can be either 'nvcc' or 'mex'. -CUDAROOT ?= /Developer/NVIDIA/CUDA-6.5 +CUDAROOT ?= /Developer/NVIDIA/CUDA-8.0 CUDNNROOT ?= $(CURDIR)/local/ CUDAMETHOD ?= $(if $(ENABLE_CUDNN),nvcc,mex) @@ -38,7 +38,7 @@ CUDAMETHOD ?= $(if $(ENABLE_CUDNN),nvcc,mex) # Maintenance NAME = matconvnet -VER = 1.0-beta22 +VER = 1.0-beta24 DIST = $(NAME)-$(VER) LATEST = $(NAME)-latest RSYNC = rsync @@ -82,7 +82,7 @@ LDFLAGS = LDOPTIMFLAGS = LINKLIBS = -lmwblas -NVCCFLAGS_PASS = -gencode=arch=compute_30,code=\"sm_30,compute_30\" +NVCCFLAGS_PASS = -D_FORCE_INLINES -gencode=arch=compute_30,code=\"sm_30,compute_30\" NVCCVER = $(shell $(NVCC) --version | \ sed -n 's/.*V\([0-9]*\).\([0-9]*\).\([0-9]*\).*/\1 \2 \3/p' | \ xargs printf '%02d%02d%02d') @@ -159,12 +159,14 @@ cpp_src+=matlab/src/bits/nnpooling.$(ext) cpp_src+=matlab/src/bits/nnnormalize.$(ext) cpp_src+=matlab/src/bits/nnbnorm.$(ext) cpp_src+=matlab/src/bits/nnbilinearsampler.$(ext) +cpp_src+=matlab/src/bits/nnroipooling.$(ext) mex_src+=matlab/src/vl_nnconv.$(ext) mex_src+=matlab/src/vl_nnconvt.$(ext) mex_src+=matlab/src/vl_nnpool.$(ext) mex_src+=matlab/src/vl_nnnormalize.$(ext) mex_src+=matlab/src/vl_nnbnorm.$(ext) mex_src+=matlab/src/vl_nnbilinearsampler.$(ext) +mex_src+=matlab/src/vl_nnroipool.$(ext) mex_src+=matlab/src/vl_taccummex.$(ext) mex_src+=matlab/src/vl_tmove.$(ext) ifdef ENABLE_IMREADJPEG @@ -180,6 +182,7 @@ cpp_src+=matlab/src/bits/impl/pooling_cpu.cpp cpp_src+=matlab/src/bits/impl/normalize_cpu.cpp cpp_src+=matlab/src/bits/impl/bnorm_cpu.cpp cpp_src+=matlab/src/bits/impl/bilinearsampler_cpu.cpp +cpp_src+=matlab/src/bits/impl/roipooling_cpu.cpp cpp_src+=matlab/src/bits/impl/tinythread.cpp ifdef ENABLE_IMREADJPEG cpp_src+=matlab/src/bits/impl/imread_$(IMAGELIB).cpp @@ -195,6 +198,7 @@ cpp_src+=matlab/src/bits/impl/pooling_gpu.cu cpp_src+=matlab/src/bits/impl/normalize_gpu.cu cpp_src+=matlab/src/bits/impl/bnorm_gpu.cu cpp_src+=matlab/src/bits/impl/bilinearsampler_gpu.cu +cpp_src+=matlab/src/bits/impl/roipooling_gpu.cu cpp_src+=matlab/src/bits/datacu.cu mex_src+=matlab/src/vl_cudatool.cu ifdef ENABLE_CUDNN @@ -255,7 +259,7 @@ CXXOPTIMFLAGS='$$CXXOPTIMFLAGS $(call nvcc-quote,$(CXXOPTIMFLAGS))' MEXFLAGS_LD := $(MEXFLAGS) \ LDFLAGS='$$LDFLAGS $(LDFLAGS)' \ LDOPTIMFLAGS='$$LDOPTIMFLAGS $(LDOPTIMFLAGS)' \ -LINKLIBS='$$LINKLIBS $(LINKLIBS)' \ +LINKLIBS='$(LINKLIBS) $$LINKLIBS' \ NVCCFLAGS = $(CXXFLAGS) $(NVCCFLAGS_PASS) \ -I"$(MATLABROOT)/extern/include" \ diff --git a/doc/Makefile b/doc/Makefile index 06a48506..0382579b 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -28,6 +28,7 @@ vl_nnnormalizelp.m \ vl_nnpdist.m \ vl_nnpool.m \ vl_nnrelu.m \ +vl_nnroipool.m \ vl_nnsigmoid.m \ vl_nnsoftmax.m \ vl_nnsoftmaxloss.m \ @@ -38,6 +39,7 @@ vl_imreadjpeg.m \ vl_imreadjpeg.m \ vl_taccum.m \ vl_tmove.m \ +vl_tshow.m \ simplenn/vl_simplenn.m \ simplenn/vl_simplenn_diagnose.m \ simplenn/vl_simplenn_tidy.m \ diff --git a/doc/blocks.tex b/doc/blocks.tex index 72ed12a5..ee27bdb2 100644 --- a/doc/blocks.tex +++ b/doc/blocks.tex @@ -214,6 +214,46 @@ \section{Spatial bilinear resampling}\label{s:spatial-sampler} See \cref{s:impl-sampler} for implementation details. +% ------------------------------------------------------------------ +\section{Region of interest pooling}\label{s:roi-pooling} +% ------------------------------------------------------------------ + +The \emph{region of interest (ROI) pooling} block applies max or average pooling to specified subwindows of a tensor. A region is a rectangular region $R = (u_-,v_-,u_+,v_+)$. The region itself is partitioned into $(H',W')$ tiles along the vertical and horizontal directions. The edges of the tiles have coordinates +\begin{align*} + v_{i'} &= v_- + (v_+ - v_- + 1) (i' - 1), \quad i' = 1,\dots,H',\\ + u_{j'} &= u_- + (u_+ - u_- + 1) (j' - 1), \quad j' = 1,\dots,W'. +\end{align*} +Following the implementation of~\cite{girshick15fast}, the $H'\times W'$ pooling tiles are given by +\[ + \Omega_{i'j'} = + \{\lfloor v_{i'} \rfloor + 1, \dots, \lceil v_{i'+1} \rceil\} + \times + \{\lfloor u_{i'} \rfloor + 1, \dots, \lceil u_{i'+1} \rceil\}. +\] +Then the input and output tensors are as follows: +\[ + \bx \in \mathbb{R}^{H \times W \times C}, + \qquad + \by \in \mathbb{R}^{H' \times W' \times C}, +\] +where +\[ + y_{i'j'c} = \operatornamewithlimits{max}_{(i,j) \in \Omega_{i'j'}} x_{ijc}. +\] +Alternatively, $\max$ can be replaced by the averaging operator. + +The extent of each region is defined by four coordinates as specified above; however, differently from tensor indexes, these use $(0,0)$ as the coordinate of the top-left pixel. In fact, if there is a single tile ($H'=W'=1$), then the region $(0,0,H-1,W-1)$ covers the whole input image: +\[ + \Omega_{11} = + \{1, \dots, W\} + \times + \{1, \dots, H\}. +\] + +In more details, the input of the block is a sequence of $K$ regions. Each region pools one of the $T$ images in the batch stored in $\bx \in \mathbb{R}^{H\times W\times C\times T}$. Regions are therefore specified as a tensor $R \in \mathbb{R}^{5 \times K}$, where the first coordinate is the index of the pooled image in the batch. The output is a $\by \in \mathbb{R}^{H' \times W' \times C \times K}$ tensor. + +For compatibility with~\cite{girshick15fast}, furthermore, the region coordinates are rounded to the nearest integer before the definitions above are used. Note also that, due to the discretization details, 1) tiles always contain at least one pixel, 2) there can be a pixel of overlap between them and 3) the discretization has a slight bias towards left-top pixels. + % ------------------------------------------------------------------ \section{Normalization}\label{s:normalization} % ------------------------------------------------------------------ diff --git a/doc/impl.tex b/doc/impl.tex index 65dc9217..93179e43 100644 --- a/doc/impl.tex +++ b/doc/impl.tex @@ -167,7 +167,7 @@ \section{Spatial pooling}\label{s:impl-pooling} \frac{d z}{d (\vv \by)^\top} S(\bx), $ -for all but a null set of points, where the operator is not differentiable (this usually does not pose problems in optimization by stochastic gradient). For max-pooling, similar relations exists with two differences: $S$ does not depend on the input $\bx$ and it is not binary, in order to account for the normalization factors. In summary, we have the expressions: +for all but a null set of points, where the operator is not differentiable (this usually does not pose problems in optimization by stochastic gradient). For average pooling, similar relations exists with two differences: $S$ does not depend on the input $\bx$ and it is not binary, in order to account for the normalization factors. In summary, we have the expressions: \begin{equation}\label{e:max-mat} \boxed{ \vv\by = S(\bx) \vv \bx, @@ -429,12 +429,12 @@ \subsection{Spatial normalization}\label{s:impl-spnorm} The derivative of spatial normalization can be obtained as follows: \begin{align*} \frac{dz}{dx_{ijd}} -&= \sum_{i''j''d} +&= \sum_{i''j''} \frac{dz}{d y_{i''j''d}} \frac{d y_{i''j''d}}{d x_{ijd}} \\ &= -\sum_{i''j''d} +\sum_{i''j''} \frac{dz}{d y_{i''j''d}} (1 + \alpha n_{i''j''d}^2)^{-\beta} \frac{dx_{i''j''d}}{d x_{ijd}} @@ -450,7 +450,7 @@ \subsection{Spatial normalization}\label{s:impl-spnorm} (1 + \alpha n_{ijd}^2)^{-\beta} -2\alpha\beta x_{ijd} \left[ -\sum_{i''j''d} +\sum_{i''j''} \frac{dz}{d y_{i''j''d}} (1 + \alpha n_{i''j''d}^2)^{-\beta-1} x_{i''j''d} @@ -462,7 +462,7 @@ \subsection{Spatial normalization}\label{s:impl-spnorm} (1 + \alpha n_{ijd}^2)^{-\beta} -2\alpha\beta x_{ijd} \left[ -\sum_{i''j''d} +\sum_{i''j''} \eta_{i''j''d} \frac{dn_{i''j''d}^2}{d (x_{ijd}^2)} \right], diff --git a/doc/intro.tex b/doc/intro.tex index 1d017aa4..729d3e63 100644 --- a/doc/intro.tex +++ b/doc/intro.tex @@ -25,8 +25,8 @@ \section{Getting started}\label{s:getting-statrted} \begin{lstlisting}[escapechar=!] % install and compile MatConvNet (run once) untar(['http://www.vlfeat.org/matconvnet/download/' ... - 'matconvnet-1.0-beta12.tar.gz']) ; -cd matconvnet-1.0-beta12 + 'matconvnet-1.0-beta24.tar.gz']) ; +cd matconvnet-1.0-beta24 run matlab/vl_compilenn % download a pre-trained CNN from the web (run once) diff --git a/doc/matdocparser.py b/doc/matdocparser.py index 41ee51b7..8e49ed71 100644 --- a/doc/matdocparser.py +++ b/doc/matdocparser.py @@ -51,7 +51,7 @@ import re __mpname__ = 'MatDocParser' -__version__ = '1.0-beta15' +__version__ = '1.0-beta24' __date__ = '2015-09-20' __description__ = 'MatDoc MATLAB inline function description interpreter.' __long_description__ = __doc__ diff --git a/doc/site/docs/about.md b/doc/site/docs/about.md index 4a1caef9..26c05cdc 100644 --- a/doc/site/docs/about.md +++ b/doc/site/docs/about.md @@ -23,6 +23,28 @@ here. # Changes +- 1.0-beta24 (March 2017). + + **New features** + + * New toy example `cnn_toy_data.m` demonstrating using a + customized `imdb`. + * `vl_argparse.m` now supports dot paths and ignoring missing + defaults. + * Support for different example solvers (AdaGrad, Adam, AdaDelta, + RMSProp) and ability to add new ones. + * A new function `vl_tshow.m` to glance at tensors. + * Bugfixes. + +- 1.0-beta23 (September 2016). + + **New features** + + * A new function `vl_nnroipool.m` for region of interest pooling, + supporting networks such as Fast-RCNN. + * Imported Fast-RCNN models from Caffe. + * An example Fast-RCNN implementation, training and testing. + - 1.0-beta22 (Spetember 2016). * Bugfixes. diff --git a/doc/site/docs/css/fixes.css b/doc/site/docs/css/fixes.css index fb8e4c27..0a0090c2 100644 --- a/doc/site/docs/css/fixes.css +++ b/doc/site/docs/css/fixes.css @@ -66,7 +66,7 @@ a { color: #00438E ; } #Functions .dropdown-menu { color: #000; - max-height: 400px; + max-height: 800px; width: 342px; } diff --git a/doc/site/docs/functions.md b/doc/site/docs/functions.md index 261e09ce..83eb4198 100644 --- a/doc/site/docs/functions.md +++ b/doc/site/docs/functions.md @@ -33,6 +33,7 @@ showing how to train CNNs. - [`vl_nnpdist`](mfiles/vl_nnpdist.md) Pairwise distances. - [`vl_nnpool`](mfiles/vl_nnpool.md) Max and sum pooling. - [`vl_nnrelu`](mfiles/vl_nnrelu.md) Rectified Linear Unit. +- [`vl_nnroipool`](mfiles/vl_nnroipool.md) Reegion of interest pooling. - [`vl_nnsigmoid`](mfiles/vl_nnsigmoid.md) Sigmoid. - [`vl_nnsoftmax`](mfiles/vl_nnsoftmax.md) Channel soft-max. - [`vl_nnsoftmaxloss`](mfiles/vl_nnsoftmaxloss.md) *Deprecated* @@ -70,3 +71,4 @@ showing how to train CNNs. - [`vl_imreadjpeg`](mfiles/vl_imreadjpeg.md) Quickly load a batch of JPEG images. - [`vl_taccum`](mfiles/vl_taccum.md) Accumulate tensors operating in-place when possible. - [`vl_tmove`](mfiles/vl_tmove.md) Exchange tensors between MATLAB processes and GPUs. +- [`vl_tshow`](mfiles/vl_tshow.md) Show a tensor on screen. diff --git a/doc/site/docs/index.md b/doc/site/docs/index.md index d86e5f5a..90b8ad68 100644 --- a/doc/site/docs/index.md +++ b/doc/site/docs/index.md @@ -2,7 +2,7 @@
@@ -31,6 +31,12 @@ efficient, and can run and learn state-of-the-art CNNs. Many pre-trained CNNs for image classification, segmentation, face recognition, and text detection are available. +> **New:** [1.0-beta24](about.md#changes) released with bugfixes, new +> examples, and utility functions. +> +> **New:** [1.0-beta23](about.md#changes) released with +> [`vl_nnroipool`](mfiles/vl_nnroipool) and a Fast-RCNN demo. +> > **New:** [1.0-beta22](about.md#changes) released with a few bugfixes. > > **New:** [1.0-beta21](about.md#changes) provides two new tools, @@ -52,7 +58,7 @@ recognition, and text detection are available. > numerous other improvements and bugfixes. ## Obtaining MatConvNet --  Tarball for [version 1.0-beta22](download/matconvnet-1.0-beta22.tar.gz); [older versions](download/) ( ) +-  Tarball for [version 1.0-beta24](download/matconvnet-1.0-beta24.tar.gz); [older versions](download/) ( ) -  [GIT repository](http://www.github.com/vlfeat/matconvnet.git) -  Citation diff --git a/doc/site/docs/install.md b/doc/site/docs/install.md index 96e379fc..c8b71682 100644 --- a/doc/site/docs/install.md +++ b/doc/site/docs/install.md @@ -21,7 +21,7 @@ To test GPU support (if you have [compiled it](#gpu)) use instead: > vl_testnn('gpu', true) Note that the second tests runs slower than the CPU version; do not -worry, this is an artefact of the test procedure. +worry, this is an artifact of the test procedure. ## Compiling @@ -42,6 +42,30 @@ library: 1. Make sure that MATLAB is [configured to use your compiler](http://www.mathworks.com/help/matlab/matlab_external/changing-default-compiler.html). + In particular, before running `vl_compilenn` do not forget to setup + mex (once is sufficient) as follows: + + ``` + mex -setup + ``` + + The prerequisites are as follows: + + * For **macOS**, make sure you have Xcode installed. Note the special + requirements for GPU below. + + * For **Linux**, make sure GCC and LibJPEG are installed. To + install LibJPEG in and Ubuntu/Debian-like distributions use: + ``` + sudo apt-get install build-essential libjpeg-turbo8-dev + ``` + For Fedora/Centos/RedHat-like distributions use instead: + ``` + sudo yum install gcc gcc-c++ libjpeg-turbo-devel + ``` + + * For **Windows**, you need to install Visual Studio 2010 or greater. + 2. Open MATLAB and issue the commands: > cd @@ -57,7 +81,7 @@ problem by running the compilation script again in verbose mode: Increase the verbosity level to 2 to get even more information. > **Remark:** The 'vl_imreadjpeg' tool uses an external image library -> to load images. In Mac OS X and Windows, the default is to use the +> to load images. In macOS and Windows, the default is to use the > system libraries (Quartz and GDI+ respectively), so this dependency > is immaterial. In Linux, this tool requires the LibJPEG > library and the corresponding development files to be installed in @@ -72,21 +96,42 @@ Increase the verbosity level to 2 to get even more information. To use the GPU-accelerated version of the library, you will need a NVIDA GPU card with compute capability 2.0 or greater and a copy of -the NVIDIA CUDA toolkit. Ideally, the version of the CUDA toolkit -should match your MATLAB version: +the NVIDIA CUDA toolkit. Officially, MATLAB supports the following +CUDA versions: | MATLAB | CUDA toolkit | |-----------|-------------------| -| R2013b | 5.5 | -| R2014a | 5.5 | -| R2014b | 6.0 | -| R2015a | 6.5 | +| R2017a | 8.0 | +| R2016b | 7.5 | +| R2016a | 7.5 | | R2015b | 7.0 | +| R2015a | 6.5 | +| R2014b | 6.0 | +| R2014a | 5.5 | +| R2013b | 5.5 | You can also use the `gpuDevice` MATLAB command to find out MATLAB's -version of the CUDA toolkit. It is also possible (and often necessary) -to use a more recent version of CUDA than the one officially supported -by MATLAB; this is [explained later](#nvcc). +version of the CUDA toolkit. Nevertheless, it is also possible and +often useful to use a more recent version of CUDA than the one +officially supported by MATLAB; this is [explained later](#nvcc). + +> **macOS**. CUDA is typically one or two step behind the latest +> Xcode. For example, CUDA 8.0 requires Xcode 7.3.1 instead of more +> recent versions. You should: +> +> 1. Install Xcode 7.3.1 alongside other versions (e.g. in +> `/Applications/Xcode7.3.1.app`). + +> 2. Use `xcode-select` in the terminal to change the active version +> of Xcode, as in `sudo xcode-select --switch +> /Applications/Xcode7.3.1.app/Contents/Developer/`. +> +> 3. Use `sudo xcode-select --install` to install the corresponding +> (downgraded) version of the command line tools. This is necessary +> or CUDA compilation will fail with odd errors. +> +> It can be helpful to consult the +> [CUDA Installation Guide for Mac](http://docs.nvidia.com/cuda/pdf/CUDA_Installation_Guide_Mac.pdf). Assuming that there is only a single copy of the CUDA toolkit installed in your system and that it matches MATLAB's version, compile @@ -96,9 +141,9 @@ the library with: If you have multiple versions of the CUDA toolkit, or if the script cannot find the toolkit for any reason, specify the path to the CUDA -toolkit explicitly. For example, on a Mac this may look like: +toolkit explicitly. For example, on macOS this may look like: - > vl_compilenn('enableGpu', true, 'cudaRoot', '/Developer/NVIDIA/CUDA-7.0') + > vl_compilenn('enableGpu', true, 'cudaRoot', '/Developer/NVIDIA/CUDA-8.0') Once more, you can use the `verbose` option to obtain more information if needed. @@ -112,7 +157,7 @@ cause unforeseen issues (although none is known so far), it is necessary to use [recent libraries such as cuDNN](#cudnn). Compiling with a newer version of CUDA requires using the -`cudaMethod,nvcc` option. For example, on a Mac this may look like: +`cudaMethod,nvcc` option. For example, on macOS this may look like: > vl_compilenn('enableGpu', true, ... 'cudaRoot', '/Developer/NVIDIA/CUDA-7.0', ... @@ -130,7 +175,7 @@ is to start MATLAB from the command line (terminal) specifying the On Windows, chances are that the CUDA libraries are already visible to MATLAB so that nothing else needs to be done. -On Mac, this step should not be necessary as the library paths are +On macOS, this step should not be necessary as the library paths are hardcoded during compilation. @@ -153,7 +198,7 @@ Unpack the cuDNN library binaries and header files in a place assumed that this cuDNN RC4 has been unpacked in `local/cudnn-rc4` in the `` root directory, (i.e. ``=`/local/cudnn-rc4`). For example, the -directory structure on a Mac should look like: +directory structure on macOS should look like: COPYING Makefile @@ -172,7 +217,7 @@ directory structure on a Mac should look like: Use `vl_compilenn` with the `cudnnEnable,true` option to compile the library; do not forget to use `cudaMethod,nvcc` as, at it is likely, the CUDA toolkit version is newer than MATLAB's CUDA toolkit. For -example, on Mac this may look like: +example, on macOS this may look like: > vl_compilenn('enableGpu', true, ... 'cudaRoot', '/Developer/NVIDIA/CUDA-7.5', ... @@ -191,12 +236,12 @@ On Windows, copy the cuDNN DLL file `/cudnn*dll` (or from wherever you unpacked cuDNN) into the `/matlab/mex` directory. -On Mac, this step should not be necessary as the library paths are +On macOS, this step should not be necessary as the library paths are hardcoded during compilation. ## Further examples -To compile all the features in MatConvNet on a Mac and MATLAB 2014b, +To compile all the features in MatConvNet on macOS and MATLAB 2014b, CUDA toolkit 6.5 and cuDNN Release Candidate 2, use: > vl_compilenn('enableGpu', true, 'cudaMethod', 'nvcc', ... @@ -216,5 +261,3 @@ Using MATLAB 2015b, CUDA 7.5, and cuDNN R4: 'cudaRoot', '/opt/local/cuda-7.5', ... 'enableCudnn', true, ... 'cudnnRoot', 'local/cudnn-rc4') ; - - diff --git a/doc/site/docs/pretrained.md b/doc/site/docs/pretrained.md index f75a9ba2..b4d57f3d 100644 --- a/doc/site/docs/pretrained.md +++ b/doc/site/docs/pretrained.md @@ -23,6 +23,25 @@ from the example code included in the [quickstart guide](quick.md). > 3. These models are provided here for convenience, but please > credit the original authors. + +## Object detection + +These models are trained for object detection in PASCAL VOC. + +- **Fast R-CNN**. Models from the [FastR-CNN](https://github.com/rbgirshick/fast-rcnn) page: + + - [fast-rcnn-caffenet-pascal07-dagnn](models/fast-rcnn-caffenet-pascal07-dagnn.mat) [](models/fast-rcnn-caffenet-pascal07-dagnn.svg) + - [fast-rcnn-vggm1k-pascal07-dagnn](models/fast-rcnn-vggm1k-pascal07-dagnn.mat) [](models/fast-rcnn-vggm1k-pascal07-dagnn.svg) + - [fast-rcnn-vgg16-pascal07-dagnn](models/fast-rcnn-vgg16-pascal07-dagnn.mat) [](models/fast-rcnn-vgg16-pascal07-dagnn.svg) + +The model performance is as follows (*mAP 11* indicates mean average precision computed using 11 point interpolation, as per PASCAL VOC 07 specification): + +|model | training set | PASCAL07 test mAP | mAP 11 | +|-------------------------------------|--------------| ------------------|--------| +|fast-rcnn-caffenet-pascal07-dagnn | imnet12+pas07| 57.3 % | 58.1 % | +|fast-rcnn-vggm12-pascal07-dagnn | imnet12+pas07| 59.4 % | 60.5 % | +|fast-rcnn-vgg16-pascal07-dagnn | imnet12+pas07| 67.3 % | 68.7 % | + ## Face recognition These models are trained for face classification and verification. @@ -260,30 +279,33 @@ The following table summarizes the MD5 checksums for the model files. | MD5 | File name | |----------------------------------|-----------------------------------------| -| ed49ef44caf18496291ce0c3257b0596 | imagenet-caffe-alex.mat | -| 6d69dfa6e549012c94546658737c5885 | imagenet-caffe-ref.mat | -| 04cd60e8ea6a0d47742206749f624ec8 | imagenet-googlenet-dag.mat | -| 55743accfaf47f5c34fa50fa047143fd | imagenet-matconvnet-alex.mat | -| b359b6ad071155eafa35c84a78f397c7 | imagenet-matconvnet-vgg-f.mat | -| 1bcad2e93b0cc6da3b7d1bf610582279 | imagenet-matconvnet-vgg-m.mat | -| 314c982669e202e0d419803c54d1fb8f | imagenet-matconvnet-vgg-s.mat | -| 14ece491f7311f6dc33bc3186729de5b | imagenet-matconvnet-vgg-verydeep-16.mat | -| be19a35a2b4f4c46ed61df684d08b900 | imagenet-resnet-101-dag.mat | -| 4461d3640d55aa2f58d990f7c92ff28c | imagenet-resnet-152-dag.mat | -| 73a3e51b75230d431c88bb795e14e91d | imagenet-resnet-50-dag.mat | -| f666c61dc968c413ef664a7e17b01144 | imagenet-vgg-f.mat | -| d15f53a30bba3abde4377eced695adab | imagenet-vgg-m-1024.mat | -| 779b86f55d0534d9fd322256372007a5 | imagenet-vgg-m-128.mat | -| 9d20b7ab01ca47617e808008da6b18cc | imagenet-vgg-m-2048.mat | -| 1c164950e882b4ea11623e669a86b1c4 | imagenet-vgg-m.mat | -| 93b683d5420c2eeaf07a6eef492f182b | imagenet-vgg-s.mat | -| 7f0f9f01dfd99c7b7088d1c5a26eb483 | imagenet-vgg-verydeep-16.mat | -| 49e623de543b207d57fab0f6eaf79a7e | imagenet-vgg-verydeep-19.mat | -| 48ccac8fb5c4961815705f1f84581ec3 | pascal-fcn16s-dag.mat | -| bf3ca0a59d1525f63e7c28d526ee0656 | pascal-fcn32s-dag.mat | -| 54b7ce1265a6cdd114d39d05515c73c4 | pascal-fcn8s-dag.mat | -| 2a42dd1d2987983dacffc436cca5dabf | pascal-fcn8s-tvg-dag.mat | -| 27e94d9979dad2385f901f0c360cf3bc | vgg-face.mat | +| 9dcc29b03edb5b136fda31fcd59bd025 | fast-rcnn-caffenet-pascal07-dagnn.mat | +| 961f997d7ff922f1ff6b7b20ad677a4c | fast-rcnn-vgg16-pascal07-dagnn.mat | +| f09a662aef88ac4c23d294eb1fb6f385 | fast-rcnn-vggm1k-pascal07-dagnn.mat | +| b5152a54bf61100a8ed61827c76e7d08 | imagenet-caffe-alex.mat | +| 675dd47906d34f4d9a70fc44210d3804 | imagenet-caffe-ref.mat | +| 97f4e1aa9ad6ed33fd325835710c5092 | imagenet-googlenet-dag.mat | +| 0998b7f66bd8dcbb78a3e75aaa6127f3 | imagenet-matconvnet-alex.mat | +| 790e116ec817a58960b103514891fdbf | imagenet-matconvnet-vgg-f.mat | +| 7e07705fc60c178ee3d2c09dab33acf3 | imagenet-matconvnet-vgg-m.mat | +| 82d5705d292714d0d6581a834485705b | imagenet-matconvnet-vgg-s.mat | +| 89a117515f66c3d7b0eb9d0516c65141 | imagenet-matconvnet-vgg-verydeep-16.mat | +| 16ff04c36c7bd33510e4a81db6dc764b | imagenet-resnet-101-dag.mat | +| f399bef82f5bcaf290d07ccc818833d7 | imagenet-resnet-152-dag.mat | +| d72aa76a8ddf8bd96e405ac0ba827724 | imagenet-resnet-50-dag.mat | +| 3513562d28089bd965cc050cbf8597a6 | imagenet-vgg-f.mat | +| 1410d01878346f911e991dd0c1bb983b | imagenet-vgg-m-1024.mat | +| 3a8dc312a44a99d21ad43e8d96a8590f | imagenet-vgg-m-128.mat | +| 087ec812e0a09980bd934e2e7ba157f6 | imagenet-vgg-m-2048.mat | +| 29294d5f62578c96a3533859514235f2 | imagenet-vgg-m.mat | +| dbe23da22e82078debda75842b16d5fa | imagenet-vgg-s.mat | +| f72d927587ca4c97fbd165ec9cb9997f | imagenet-vgg-verydeep-16.mat | +| 106118b7cf60435e6d8e04f6a6dc3657 | imagenet-vgg-verydeep-19.mat | +| 2e49dd427829cdbc08a0154f994687d7 | pascal-fcn16s-dag.mat | +| a1331885ec72a0721e51ac9d16262a48 | pascal-fcn32s-dag.mat | +| 0eeb6a4bc819616ea66f88a3db878983 | pascal-fcn8s-dag.mat | +| b0f7bd5833b555c6241dd0c05897ca41 | pascal-fcn8s-tvg-dag.mat | +| 3d6cd504bf9c98af4a561aad059565d1 | vgg-face.mat | ## Older file versions diff --git a/doc/site/docs/quick.md b/doc/site/docs/quick.md index 4335dfed..dd55f7e3 100644 --- a/doc/site/docs/quick.md +++ b/doc/site/docs/quick.md @@ -11,8 +11,8 @@ speed, downloading the CNN model may require some time. ```matlab % Install and compile MatConvNet (needed once). -untar('http://www.vlfeat.org/matconvnet/download/matconvnet-1.0-beta22.tar.gz') ; -cd matconvnet-1.0-beta22 +untar('http://www.vlfeat.org/matconvnet/download/matconvnet-1.0-beta24.tar.gz') ; +cd matconvnet-1.0-beta24 run matlab/vl_compilenn ; % Download a pre-trained CNN from the web (needed once). diff --git a/doc/site/mkdocs.yml b/doc/site/mkdocs.yml index e24a5c3e..a268e24c 100644 --- a/doc/site/mkdocs.yml +++ b/doc/site/mkdocs.yml @@ -1,5 +1,5 @@ site_name: MatConvNet -markdown_extensions: ['extra', 'mathjax', 'toc'] +markdown_extensions: ['extra', 'math', 'toc'] theme_dir: theme extra_css: ['css/fixes.css'] @@ -37,6 +37,7 @@ pages: - vl_nnnormalize: 'mfiles/vl_nnnormalize.md' - vl_nnpool: 'mfiles/vl_nnpool.md' - vl_nnrelu: 'mfiles/vl_nnrelu.md' + - vl_nnroipool: 'mfiles/vl_nnroipool.md' - vl_nnsigmoid: 'mfiles/vl_nnsigmoid.md' - vl_nnsoftmax: 'mfiles/vl_nnsoftmax.md' - vl_nnsoftmaxloss: 'mfiles/vl_nnsoftmaxloss.md' @@ -53,6 +54,7 @@ pages: - vl_imreadjpeg: 'mfiles/vl_imreadjpeg.md' - vl_taccum: 'mfiles/vl_taccum.md' - vl_tmove: 'mfiles/vl_tmove.md' + - vl_tshow: 'mfiles/vl_tshow.md' - Contributing: - Developers notes: 'developers.md' diff --git a/examples/+solver/adadelta.m b/examples/+solver/adadelta.m new file mode 100644 index 00000000..c13d5d4a --- /dev/null +++ b/examples/+solver/adadelta.m @@ -0,0 +1,42 @@ +function [w, state] = adadelta(w, state, grad, opts, ~) +%ADADELTA +% Example AdaDelta solver, for use with CNN_TRAIN and CNN_TRAIN_DAG. +% +% AdaDelta sets its own learning rate, so any learning rate set in the +% options of CNN_TRAIN and CNN_TRAIN_DAG will be ignored. +% +% If called without any input argument, returns the default options +% structure. +% +% Solver options: (opts.train.solverOpts) +% +% `epsilon`:: 1e-6 +% Small additive constant to regularize variance estimate. +% +% `rho`:: 0.9 +% Moving average window for variance update, between 0 and 1 (larger +% values result in slower/more stable updating). + +% Copyright (C) 2016 Joao F. Henriques. +% All rights reserved. +% +% This file is part of the VLFeat library and is made available under +% the terms of the BSD license (see the COPYING file). + +if nargin == 0 % Return the default solver options + w = struct('epsilon', 1e-6, 'rho', 0.9) ; + return ; +end + +if isequal(state, 0) % First iteration, initialize state struct + state = struct('g_sqr', 0, 'delta_sqr', 0) ; +end + +rho = opts.rho ; + +state.g_sqr = state.g_sqr * rho + grad.^2 * (1 - rho) ; +new_delta = -sqrt((state.delta_sqr + opts.epsilon) ./ ... + (state.g_sqr + opts.epsilon)) .* grad ; +state.delta_sqr = state.delta_sqr * rho + new_delta.^2 * (1 - rho) ; + +w = w + new_delta ; diff --git a/examples/+solver/adagrad.m b/examples/+solver/adagrad.m new file mode 100644 index 00000000..69d66fd4 --- /dev/null +++ b/examples/+solver/adagrad.m @@ -0,0 +1,43 @@ +function [w, g_sqr] = adagrad(w, g_sqr, grad, opts, lr) +%ADAGRAD +% Example AdaGrad solver, for use with CNN_TRAIN and CNN_TRAIN_DAG. +% +% Set the initial learning rate for AdaGrad in the options for +% CNN_TRAIN and CNN_TRAIN_DAG. Note that a learning rate that works for +% SGD may be inappropriate for AdaGrad; the default is 0.001. +% +% If called without any input argument, returns the default options +% structure. +% +% Solver options: (opts.train.solverOpts) +% +% `epsilon`:: 1e-10 +% Small additive constant to regularize variance estimate. +% +% `rho`:: 1 +% Moving average window for variance update, between 0 and 1 (larger +% values result in slower/more stable updating). This is similar to +% RHO in AdaDelta and RMSProp. Standard AdaGrad is obtained with a RHO +% value of 1 (use total average instead of a moving average). +% +% A possibly undesirable effect of standard AdaGrad is that the update +% will monotonically decrease to 0, until training eventually stops. This +% is because the AdaGrad update is inversely proportional to the total +% variance of the gradients seen so far. +% With RHO smaller than 1, a moving average is used instead. This +% prevents the final update from monotonically decreasing to 0. + +% Copyright (C) 2016 Joao F. Henriques. +% All rights reserved. +% +% This file is part of the VLFeat library and is made available under +% the terms of the BSD license (see the COPYING file). + +if nargin == 0 % Return the default solver options + w = struct('epsilon', 1e-10, 'rho', 1) ; + return ; +end + +g_sqr = g_sqr * opts.rho + grad.^2 ; + +w = w - lr * grad ./ (sqrt(g_sqr) + opts.epsilon) ; diff --git a/examples/+solver/adam.m b/examples/+solver/adam.m new file mode 100644 index 00000000..f710c4c7 --- /dev/null +++ b/examples/+solver/adam.m @@ -0,0 +1,75 @@ +function [w, state] = adam(w, state, grad, opts, lr) +%ADAM +% Adam solver for use with CNN_TRAIN and CNN_TRAIN_DAG +% +% See [Kingma et. al., 2014](http://arxiv.org/abs/1412.6980) +% | ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). +% +% If called without any input argument, returns the default options +% structure. Otherwise provide all input arguments. +% +% W is the vector/matrix/tensor of parameters. It can be single/double +% precision and can be a `gpuArray`. +% +% STATE is as defined below and so are supported OPTS. +% +% GRAD is the gradient of the objective w.r.t W +% +% LR is the learning rate, referred to as \alpha by Algorithm 1 in +% [Kingma et. al., 2014]. +% +% Solver options: (opts.train.solverOpts) +% +% `beta1`:: 0.9 +% Decay for 1st moment vector. See algorithm 1 in [Kingma et.al. 2014] +% +% `beta2`:: 0.999 +% Decay for 2nd moment vector +% +% `eps`:: 1e-8 +% Additive offset when dividing by state.v +% +% The state is initialized as 0 (number) to start with. The first call to +% this function will initialize it with the default state consisting of +% +% `m`:: 0 +% First moment vector +% +% `v`:: 0 +% Second moment vector +% +% `t`:: 0 +% Global iteration number across epochs +% +% This implementation borrowed from torch optim.adam + +% Copyright (C) 2016 Aravindh Mahendran. +% All rights reserved. +% +% This file is part of the VLFeat library and is made available under +% the terms of the BSD license (see the COPYING file). + +if nargin == 0 % Returns the default solver options + w = struct('beta1', 0.9, 'beta2', 0.999, 'eps', 1e-8) ; + return ; +end + +if isequal(state, 0) % start off with state = 0 so as to get default state + state = struct('m', 0, 'v', 0, 't', 0); +end + +% update first moment vector `m` +state.m = opts.beta1 * state.m + (1 - opts.beta1) * grad ; + +% update second moment vector `v` +state.v = opts.beta2 * state.v + (1 - opts.beta2) * grad.^2 ; + +% update the time step +state.t = state.t + 1 ; + +% This implicitly corrects for biased estimates of first and second moment +% vectors +lr_t = lr * (((1 - opts.beta2^state.t)^0.5) / (1 - opts.beta1^state.t)) ; + +% Update `w` +w = w - lr_t * state.m ./ (state.v.^0.5 + opts.eps) ; diff --git a/examples/+solver/rmsprop.m b/examples/+solver/rmsprop.m new file mode 100644 index 00000000..f1ae96e6 --- /dev/null +++ b/examples/+solver/rmsprop.m @@ -0,0 +1,34 @@ +function [w, g_sqr] = rmsprop(w, g_sqr, grad, opts, lr) +%RMSPROP +% Example RMSProp solver, for use with CNN_TRAIN and CNN_TRAIN_DAG. +% +% Set the initial learning rate for RMSProp in the options for +% CNN_TRAIN and CNN_TRAIN_DAG. Note that a learning rate that works for +% SGD may be inappropriate for RMSProp; the default is 0.001. +% +% If called without any input argument, returns the default options +% structure. +% +% Solver options: (opts.train.solverOpts) +% +% `epsilon`:: 1e-8 +% Small additive constant to regularize variance estimate. +% +% `rho`:: 0.99 +% Moving average window for variance update, between 0 and 1 (larger +% values result in slower/more stable updating). + +% Copyright (C) 2016 Joao F. Henriques. +% All rights reserved. +% +% This file is part of the VLFeat library and is made available under +% the terms of the BSD license (see the COPYING file). + +if nargin == 0 % Return the default solver options + w = struct('epsilon', 1e-8, 'rho', 0.99) ; + return ; +end + +g_sqr = g_sqr * opts.rho + grad.^2 * (1 - opts.rho) ; + +w = w - lr * grad ./ (sqrt(g_sqr) + opts.epsilon) ; diff --git a/examples/cnn_train.m b/examples/cnn_train.m index cab63a48..99edd5db 100644 --- a/examples/cnn_train.m +++ b/examples/cnn_train.m @@ -16,6 +16,7 @@ % % This file is part of the VLFeat library and is made available under % the terms of the BSD license (see the COPYING file). +addpath(fullfile(vl_rootnn, 'examples')); opts.expDir = fullfile('data','exp') ; opts.continue = true ; @@ -24,12 +25,23 @@ opts.train = [] ; opts.val = [] ; opts.gpus = [] ; +opts.epochSize = inf; opts.prefetch = false ; opts.numEpochs = 300 ; opts.learningRate = 0.001 ; opts.weightDecay = 0.0005 ; + +opts.solver = [] ; % Empty array means use the default SGD solver +[opts, varargin] = vl_argparse(opts, varargin) ; +if ~isempty(opts.solver) + assert(isa(opts.solver, 'function_handle') && nargout(opts.solver) == 2,... + 'Invalid solver; expected a function handle with two outputs.') ; + % Call without input arguments, to get default options + opts.solverOpts = opts.solver() ; +end + opts.momentum = 0.9 ; -opts.saveMomentum = true ; +opts.saveSolverState = true ; opts.nesterovUpdate = false ; opts.randomSeed = 0 ; opts.memoryMapFile = fullfile(tempdir, 'matconvnet.bin') ; @@ -45,13 +57,18 @@ opts.errorLabels = {} ; opts.plotDiagnostics = false ; opts.plotStatistics = true; +opts.postEpochFn = [] ; % postEpochFn(net,params,state) called after each epoch; can return a new learning rate, 0 to stop, [] for no change opts = vl_argparse(opts, varargin) ; if ~exist(opts.expDir, 'dir'), mkdir(opts.expDir) ; end if isempty(opts.train), opts.train = find(imdb.images.set==1) ; end if isempty(opts.val), opts.val = find(imdb.images.set==2) ; end -if isnan(opts.train), opts.train = [] ; end -if isnan(opts.val), opts.val = [] ; end +if isscalar(opts.train) && isnumeric(opts.train) && isnan(opts.train) + opts.train = [] ; +end +if isscalar(opts.val) && isnumeric(opts.val) && isnan(opts.val) + opts.val = [] ; +end % ------------------------------------------------------------------------- % Initialization @@ -124,6 +141,7 @@ params.epoch = epoch ; params.learningRate = opts.learningRate(min(epoch, numel(opts.learningRate))) ; params.train = opts.train(randperm(numel(opts.train))) ; % shuffle + params.train = params.train(1:min(opts.epochSize, numel(opts.train))); params.val = opts.val(randperm(numel(opts.val))) ; params.imdb = imdb ; params.getBatch = getBatch ; @@ -150,7 +168,9 @@ stats.train(epoch) = lastStats.train ; stats.val(epoch) = lastStats.val ; clear lastStats ; - saveStats(modelPath(epoch), stats) ; + if ~evaluateMode + saveStats(modelPath(epoch), stats) ; + end if params.plotStatistics switchFigure(1) ; clf ; @@ -180,6 +200,16 @@ drawnow ; print(1, modelFigPath, '-dpdf') ; end + + if ~isempty(opts.postEpochFn) + if nargout(opts.postEpochFn) == 0 + opts.postEpochFn(net, params, state) ; + else + lr = opts.postEpochFn(net, params, state) ; + if ~isempty(lr), opts.learningRate = lr; end + if opts.learningRate == 0, break; end + end + end end % With multiple GPUs, return one copy @@ -230,11 +260,10 @@ % spmd caller. % initialize with momentum 0 -if isempty(state) || isempty(state.momentum) +if isempty(state) || isempty(state.solverState) for i = 1:numel(net.layers) - for j = 1:numel(net.layers{i}.weights) - state.momentum{i}{j} = 0 ; - end + state.solverState{i} = cell(1, numel(net.layers{i}.weights)) ; + state.solverState{i}(:) = {0} ; end end @@ -242,9 +271,14 @@ numGpus = numel(params.gpus) ; if numGpus >= 1 net = vl_simplenn_move(net, 'gpu') ; - for i = 1:numel(state.momentum) - for j = 1:numel(state.momentum{i}) - state.momentum{i}{j} = gpuArray(state.momentum{i}{j}) ; + for i = 1:numel(state.solverState) + for j = 1:numel(state.solverState{i}) + s = state.solverState{i}{j} ; + if isnumeric(s) + state.solverState{i}{j} = gpuArray(s) ; + elseif isstruct(s) + state.solverState{i}{j} = structfun(@gpuArray, s, 'UniformOutput', false) ; + end end end end @@ -370,7 +404,7 @@ 'XScale', 'log', ... 'XLim', [1e-5 1], ... 'XTick', 10.^(-5:1)) ; - grid on ; + grid on ; title('Variation'); subplot(2,2,2) ; barh(sqrt(diagnpow)) ; set(gca,'TickLabelInterpreter', 'none', ... 'YTick', 1:numel(diagnpow), ... @@ -379,7 +413,7 @@ 'XScale', 'log', ... 'XLim', [1e-5 1e5], ... 'XTick', 10.^(-5:5)) ; - grid on ; + grid on ; title('Power'); subplot(2,2,3); plot(squeeze(res(end-1).x)) ; drawnow ; end @@ -396,12 +430,17 @@ mpiprofile off ; end end -if ~params.saveMomentum - state.momentum = [] ; +if ~params.saveSolverState + state.solverState = [] ; else - for i = 1:numel(state.momentum) - for j = 1:numel(state.momentum{i}) - state.momentum{i}{j} = gather(state.momentum{i}{j}) ; + for i = 1:numel(state.solverState) + for j = 1:numel(state.solverState{i}) + s = state.solverState{i}{j} ; + if isnumeric(s) + state.solverState{i}{j} = gather(s) ; + elseif isstruct(s) + state.solverState{i}{j} = structfun(@gather, s, 'UniformOutput', false) ; + end end end end @@ -437,28 +476,37 @@ thisDecay = params.weightDecay * net.layers{l}.weightDecay(j) ; thisLR = params.learningRate * net.layers{l}.learningRate(j) ; - % Normalize gradient and incorporate weight decay. - parDer = vl_taccum(1/batchSize, parDer, ... - thisDecay, net.layers{l}.weights{j}) ; + if thisLR>0 || thisDecay>0 + % Normalize gradient and incorporate weight decay. + parDer = vl_taccum(1/batchSize, parDer, ... + thisDecay, net.layers{l}.weights{j}) ; + + if isempty(params.solver) + % Default solver is the optimised SGD. + % Update momentum. + state.solverState{l}{j} = vl_taccum(... + params.momentum, state.solverState{l}{j}, ... + -1, parDer) ; + + % Nesterov update (aka one step ahead). + if params.nesterovUpdate + delta = params.momentum * state.solverState{l}{j} - parDer ; + else + delta = state.solverState{l}{j} ; + end - % Update momentum. - state.momentum{l}{j} = vl_taccum(... - params.momentum, state.momentum{l}{j}, ... - -1, parDer) ; + % Update parameters. + net.layers{l}.weights{j} = vl_taccum(... + 1, net.layers{l}.weights{j}, ... + thisLR, delta) ; - % Nesterov update (aka one step ahead). - if params.nesterovUpdate - delta = vl_taccum(... - params.momentum, state.momentum{l}{j}, ... - -1, parDer) ; - else - delta = state.momentum{l}{j} ; + else + % call solver function to update weights + [net.layers{l}.weights{j}, state.solverState{l}{j}] = ... + params.solver(net.layers{l}.weights{j}, state.solverState{l}{j}, ... + parDer, params.solverOpts, thisLR) ; + end end - - % Update parameters. - net.layers{l}.weights{j} = vl_taccum(... - 1, net.layers{l}.weights{j}, ... - thisLR, delta) ; end % if requested, collect some useful stats for debugging @@ -467,7 +515,9 @@ label = '' ; switch net.layers{l}.type case {'conv','convt'} - variation = thisLR * mean(abs(state.momentum{l}{j}(:))) ; + if isnumeric(state.solverState{l}{j}) + variation = thisLR * mean(abs(state.solverState{l}{j}(:))) ; + end power = mean(res(l+1).x(:).^2) ; if j == 1 % fiters base = mean(net.layers{l}.weights{j}(:).^2) ; diff --git a/examples/cnn_train_dag.m b/examples/cnn_train_dag.m index 25cbff46..b1898cc3 100644 --- a/examples/cnn_train_dag.m +++ b/examples/cnn_train_dag.m @@ -8,6 +8,7 @@ % % This file is part of the VLFeat library and is made available under % the terms of the BSD license (see the COPYING file). +addpath(fullfile(vl_rootnn, 'examples')); opts.expDir = fullfile('data','exp') ; opts.continue = true ; @@ -17,11 +18,22 @@ opts.val = [] ; opts.gpus = [] ; opts.prefetch = false ; +opts.epochSize = inf; opts.numEpochs = 300 ; opts.learningRate = 0.001 ; opts.weightDecay = 0.0005 ; + +opts.solver = [] ; % Empty array means use the default SGD solver +[opts, varargin] = vl_argparse(opts, varargin) ; +if ~isempty(opts.solver) + assert(isa(opts.solver, 'function_handle') && nargout(opts.solver) == 2,... + 'Invalid solver; expected a function handle with two outputs.') ; + % Call without input arguments, to get default options + opts.solverOpts = opts.solver() ; +end + opts.momentum = 0.9 ; -opts.saveMomentum = true ; +opts.saveSolverState = true ; opts.nesterovUpdate = false ; opts.randomSeed = 0 ; opts.profile = false ; @@ -31,13 +43,18 @@ opts.derOutputs = {'objective', 1} ; opts.extractStatsFn = @extractStats ; opts.plotStatistics = true; +opts.postEpochFn = [] ; % postEpochFn(net,params,state) called after each epoch; can return a new learning rate, 0 to stop, [] for no change opts = vl_argparse(opts, varargin) ; if ~exist(opts.expDir, 'dir'), mkdir(opts.expDir) ; end if isempty(opts.train), opts.train = find(imdb.images.set==1) ; end if isempty(opts.val), opts.val = find(imdb.images.set==2) ; end -if isnan(opts.train), opts.train = [] ; end -if isnan(opts.val), opts.val = [] ; end +if isscalar(opts.train) && isnumeric(opts.train) && isnan(opts.train) + opts.train = [] ; +end +if isscalar(opts.val) && isnumeric(opts.val) && isnan(opts.val) + opts.val = [] ; +end % ------------------------------------------------------------------------- % Initialization @@ -79,6 +96,7 @@ params.epoch = epoch ; params.learningRate = opts.learningRate(min(epoch, numel(opts.learningRate))) ; params.train = opts.train(randperm(numel(opts.train))) ; % shuffle + params.train = params.train(1:min(opts.epochSize, numel(opts.train))); params.val = opts.val(randperm(numel(opts.val))) ; params.imdb = imdb ; params.getBatch = getBatch ; @@ -135,6 +153,16 @@ drawnow ; print(1, modelFigPath, '-dpdf') ; end + + if ~isempty(opts.postEpochFn) + if nargout(opts.postEpochFn) == 0 + opts.postEpochFn(net, params, state) ; + else + lr = opts.postEpochFn(net, params, state) ; + if ~isempty(lr), opts.learningRate = lr; end + if opts.learningRate == 0, break; end + end + end end % With multiple GPUs, return one copy @@ -148,15 +176,23 @@ % spmd caller. % initialize with momentum 0 -if isempty(state) || isempty(state.momentum) - state.momentum = num2cell(zeros(1, numel(net.params))) ; +if isempty(state) || isempty(state.solverState) + state.solverState = cell(1, numel(net.params)) ; + state.solverState(:) = {0} ; end % move CNN to GPU as needed numGpus = numel(params.gpus) ; if numGpus >= 1 net.move('gpu') ; - state.momentum = cellfun(@gpuArray, state.momentum, 'uniformoutput', false) ; + for i = 1:numel(state.solverState) + s = state.solverState{i} ; + if isnumeric(s) + state.solverState{i} = gpuArray(s) ; + elseif isstruct(s) + state.solverState{i} = structfun(@gpuArray, s, 'UniformOutput', false) ; + end + end end if numGpus > 1 parserv = ParameterServer(params.parameterServer) ; @@ -260,10 +296,17 @@ mpiprofile off ; end end -if ~params.saveMomentum - state.momentum = [] ; +if ~params.saveSolverState + state.solverState = [] ; else - state.momentum = cellfun(@gather, state.momentum, 'uniformoutput', false) ; + for i = 1:numel(state.solverState) + s = state.solverState{i} ; + if isnumeric(s) + state.solverState{i} = gather(s) ; + elseif isstruct(s) + state.solverState{i} = structfun(@gather, s, 'UniformOutput', false) ; + end + end end net.reset() ; @@ -295,28 +338,36 @@ thisDecay = params.weightDecay * net.params(p).weightDecay ; thisLR = params.learningRate * net.params(p).learningRate ; - % Normalize gradient and incorporate weight decay. - parDer = vl_taccum(1/batchSize, parDer, ... - thisDecay, net.params(p).value) ; - - % Update momentum. - state.momentum{p} = vl_taccum(... - params.momentum, state.momentum{p}, ... - -1, parDer) ; - - % Nesterov update (aka one step ahead). - if params.nesterovUpdate - delta = vl_taccum(... - params.momentum, state.momentum{p}, ... - -1, parDer) ; - else - delta = state.momentum{p} ; + if thisLR>0 || thisDecay>0 + % Normalize gradient and incorporate weight decay. + parDer = vl_taccum(1/batchSize, parDer, ... + thisDecay, net.params(p).value) ; + + if isempty(params.solver) + % Default solver is the optimised SGD. + % Update momentum. + state.solverState{p} = vl_taccum(... + params.momentum, state.solverState{p}, ... + -1, parDer) ; + + % Nesterov update (aka one step ahead). + if params.nesterovUpdate + delta = params.momentum * state.solverState{p} - parDer ; + else + delta = state.solverState{p} ; + end + + % Update parameters. + net.params(p).value = vl_taccum(... + 1, net.params(p).value, thisLR, delta) ; + + else + % call solver function to update weights + [net.params(p).value, state.solverState{p}] = ... + params.solver(net.params(p).value, state.solverState{p}, ... + parDer, params.solverOpts, thisLR) ; + end end - - % Update parameters. - net.params(p).value = vl_taccum(... - 1, net.params(p).value, thisLR, delta) ; - otherwise error('Unknown training method ''%s'' for parameter ''%s''.', ... net.params(p).trainMethod, ... @@ -362,6 +413,7 @@ % ------------------------------------------------------------------------- sel = find(cellfun(@(x) isa(x,'dagnn.Loss'), {net.layers.block})) ; for i = 1:numel(sel) + if net.layers(sel(i)).block.ignoreAverage, continue; end; stats.(net.layers(sel(i)).outputs{1}) = net.layers(sel(i)).block.average ; end diff --git a/examples/custom_imdb/cnn_toy_data.m b/examples/custom_imdb/cnn_toy_data.m new file mode 100644 index 00000000..59c2b212 --- /dev/null +++ b/examples/custom_imdb/cnn_toy_data.m @@ -0,0 +1,155 @@ +function [net, stats] = cnn_toy_data(varargin) +% CNN_TOY_DATA +% Minimal demonstration of MatConNet training of a CNN on toy data. +% +% It also serves as a short tutorial on creating and using a custom imdb +% (image database). +% +% The task is to distinguish between images of triangles, squares and +% circles. + +% Copyright (C) 2017 Joao F. Henriques. +% All rights reserved. +% +% This file is part of the VLFeat library and is made available under +% the terms of the BSD license (see the COPYING file). + +run([fileparts(mfilename('fullpath')) '/../../matlab/vl_setupnn.m']) ; + +% Parameter defaults. You can add any custom parameters here (e.g. +% opts.alpha = 1), and change them when calling: cnn_toy_data('alpha', 2). +opts.train.batchSize = 200 ; +opts.train.numEpochs = 10 ; +opts.train.continue = true ; +opts.train.gpus = [] ; +opts.train.learningRate = 0.01 ; +opts.train.expDir = [vl_rootnn '/data/toy'] ; +opts.dataDir = [vl_rootnn '/data/toy-dataset'] ; +[opts, varargin] = vl_argparse(opts, varargin) ; + +opts.imdbPath = [opts.train.expDir '/imdb.mat'] ; +opts = vl_argparse(opts, varargin) ; + +% -------------------------------------------------------------------- +% Prepare data +% -------------------------------------------------------------------- + +% Generate images if they don't exist (this would be skipped for real data) +if ~exist(opts.dataDir, 'dir') + mkdir(opts.dataDir) ; + cnn_toy_data_generator(opts.dataDir) ; +end + +% Create image database (imdb struct). It can be cached to a file for speed +if exist(opts.imdbPath, 'file') + disp('Reloading image database...') + imdb = load(opts.imdbPath) ; +else + disp('Creating image database...') + imdb = getImdb(opts.dataDir) ; + mkdir(fileparts(opts.imdbPath)) ; + save(opts.imdbPath, '-struct', 'imdb') ; +end + +% Create network (see HELP VL_SIMPLENN) +f = 1/100 ; +net.layers = {} ; +net.layers{end+1} = struct('type', 'conv', ... + 'weights', {{f*randn(5,5,1,5, 'single'), zeros(1, 5, 'single')}}) ; +net.layers{end+1} = struct('type', 'pool', ... + 'method', 'max', ... + 'pool', [2 2], ... + 'stride', 2) ; +net.layers{end+1} = struct('type', 'conv', ... + 'weights', {{f*randn(5,5,5,10, 'single'),zeros(1,10,'single')}}) ; +net.layers{end+1} = struct('type', 'pool', ... + 'method', 'max', ... + 'pool', [2 2], ... + 'stride', 2) ; +net.layers{end+1} = struct('type', 'conv', ... + 'weights', {{f*randn(5,5,10,3, 'single'), zeros(1,3,'single')}}) ; +net.layers{end+1} = struct('type', 'softmaxloss') ; + +% Fill in any values we didn't specify explicitly +net = vl_simplenn_tidy(net) ; + + +% -------------------------------------------------------------------- +% Train +% -------------------------------------------------------------------- + +use_gpu = ~isempty(opts.train.gpus) ; + +% Start training +[net, stats] = cnn_train(net, imdb, @(imdb, batch) getBatch(imdb, batch, use_gpu), ... + 'train', find(imdb.set == 1), 'val', find(imdb.set == 2), opts.train) ; + +% Visualize the learned filters +figure(3) ; vl_tshow(net.layers{1}.weights{1}) ; title('Conv1 filters') ; +figure(4) ; vl_tshow(net.layers{3}.weights{1}) ; title('Conv2 filters') ; +figure(5) ; vl_tshow(net.layers{5}.weights{1}) ; title('Conv3 filters') ; + + +% -------------------------------------------------------------------- +function [images, labels] = getBatch(imdb, batch, use_gpu) +% -------------------------------------------------------------------- +% This is where we return a given set of images (and their labels) from +% our imdb structure. +% If the dataset was too large to fit in memory, getBatch could load images +% from disk instead (with indexes given in 'batch'). + +images = imdb.images(:,:,:,batch) ; +labels = imdb.labels(batch) ; + +if use_gpu + images = gpuArray(images) ; +end + +% -------------------------------------------------------------------- +function imdb = getImdb(dataDir) +% -------------------------------------------------------------------- +% Initialize the imdb structure (image database). +% Note the fields are arbitrary: only your getBatch needs to understand it. +% The field imdb.set is used to distinguish between the training and +% validation sets, and is only used in the above call to cnn_train. + +% The sets, and number of samples per label in each set +sets = {'train', 'val'} ; +numSamples = [1500, 150] ; + +% Preallocate memory +totalSamples = 4950 ; % 3 * 1500 + 3 * 150 +images = zeros(32, 32, 1, totalSamples, 'single') ; +labels = zeros(totalSamples, 1) ; +set = ones(totalSamples, 1) ; + +% Read all samples +sample = 1 ; +for s = 1:2 % Iterate sets + for label = 1:3 % Iterate labels + for i = 1:numSamples(s) % Iterate samples + % Read image + im = imread(sprintf('%s/%s/%i/%04i.png', dataDir, sets{s}, label, i)) ; + + % Store it, along with label and train/val set information + images(:,:,:,sample) = single(im) ; + labels(sample) = label ; + set(sample) = s ; + sample = sample + 1 ; + end + end +end + +% Show some random example images +figure(2) ; +montage(images(:,:,:,randperm(totalSamples, 100))) ; +title('Example images') ; + +% Remove mean over whole dataset +images = bsxfun(@minus, images, mean(images, 4)) ; + +% Store results in the imdb struct +imdb.images = images ; +imdb.labels = labels ; +imdb.set = set ; + diff --git a/examples/custom_imdb/cnn_toy_data_generator.m b/examples/custom_imdb/cnn_toy_data_generator.m new file mode 100644 index 00000000..cbe9d2d1 --- /dev/null +++ b/examples/custom_imdb/cnn_toy_data_generator.m @@ -0,0 +1,51 @@ +function cnn_toy_data_generator(dataDir) +%CNN_TOY_DATA_GENERATOR +% Generates toy data in the given path: random image of triangles, +% squares and circles. +% +% The directory format is: '//