From 4be73e25b5db68d94a58c094b6bfd31eece9827a Mon Sep 17 00:00:00 2001 From: umuguc Date: Sat, 18 Apr 2015 00:28:20 +0200 Subject: [PATCH] mnistAutoencoder --- .../mnistAutoencoder/cnn_mnist_autoencoder.m | 279 +++++++++++++++ .../cnn_mnist_autoencoder_test_demo.m | 76 +++++ .../cnn_mnist_autoencoder_training_demo.m | 33 ++ examples/mnistAutoencoder/cnn_train.m | 320 ++++++++++++++++++ examples/mnistAutoencoder/cnn_train_adagrad.m | 318 +++++++++++++++++ examples/mnistAutoencoder/euclideanloss.m | 26 ++ examples/mnistAutoencoder/sigmoid.m | 16 + .../sigmoidcrossentropyloss.m | 29 ++ examples/mnistAutoencoder/vl_simplenn.m | 249 ++++++++++++++ 9 files changed, 1346 insertions(+) create mode 100644 examples/mnistAutoencoder/cnn_mnist_autoencoder.m create mode 100644 examples/mnistAutoencoder/cnn_mnist_autoencoder_test_demo.m create mode 100644 examples/mnistAutoencoder/cnn_mnist_autoencoder_training_demo.m create mode 100644 examples/mnistAutoencoder/cnn_train.m create mode 100644 examples/mnistAutoencoder/cnn_train_adagrad.m create mode 100644 examples/mnistAutoencoder/euclideanloss.m create mode 100644 examples/mnistAutoencoder/sigmoid.m create mode 100644 examples/mnistAutoencoder/sigmoidcrossentropyloss.m create mode 100644 examples/mnistAutoencoder/vl_simplenn.m diff --git a/examples/mnistAutoencoder/cnn_mnist_autoencoder.m b/examples/mnistAutoencoder/cnn_mnist_autoencoder.m new file mode 100644 index 00000000..bc5ac0be --- /dev/null +++ b/examples/mnistAutoencoder/cnn_mnist_autoencoder.m @@ -0,0 +1,279 @@ +function [net, opts, imdb, info] = cnn_mnist_autoencoder +%CNN_MNIST_AUTOENCODER Summary of this function goes here +% Detailed explanation goes here + +net = getMnistAutoencoderNet; +opts = getMnistAutoencoderOpts; + +if exist(opts.imdbPath, 'file') + + load(opts.imdbPath); + +else + + imdb = getMnistAutoencoderImdb(opts); + + if ~exist(opts.expDir, 'dir') + + mkdir(opts.expDir); + + end + + save(opts.imdbPath, 'imdb'); + +end + +% [net, info] = cnn_train(net, imdb, @(imdb, batch) getMnistAutoencoderBatch(imdb, batch), opts); +[net, info] = cnn_train_adagrad(net, imdb, @(imdb, batch) getMnistAutoencoderBatch(imdb, batch), opts); + +net.layers{end} = struct('name', 'data_hat_sigmoid', ... + 'type', 'sigmoid' ); + +net.layers{end + 1} = struct('type', 'euclideanloss'); + +end + +% ------------------------------------------------------------------------- +function net = getMnistAutoencoderNet +% ------------------------------------------------------------------------- + +% Layer 1 + +net.layers{1} = struct('biases' , zeros(1, 1000, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 784 1000]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'encoder_1' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{2} = struct('name', 'encoder_1_sigmoid', ... + 'type', 'sigmoid' ); + +% Layer 2 + +net.layers{3} = struct('biases' , zeros(1, 500, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 1000 500]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'encoder_2' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{4} = struct('name', 'encoder_2_sigmoid', ... + 'type', 'sigmoid' ); + +% Layer 3 + +net.layers{5} = struct('biases' , zeros(1, 250, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 500 250]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'encoder_3' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{6} = struct('name', 'encoder_3_sigmoid', ... + 'type', 'sigmoid' ); + +% Layer 4 + +net.layers{5} = struct('biases' , zeros(1, 30, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 250 30]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'code' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +% Layer 5 + +net.layers{6} = struct('biases' , zeros(1, 250, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 30 250]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'decoder_3' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{7} = struct('name', 'decoder_3_sigmoid', ... + 'type', 'sigmoid' ); + +% Layer 6 + +net.layers{8} = struct('biases' , zeros(1, 500, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 250 500]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'decoder_2' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{9} = struct('name', 'decoder_2_sigmoid', ... + 'type', 'sigmoid' ); + +% Layer 7 + +net.layers{10} = struct('biases' , zeros(1, 1000, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 500 1000]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'decoder_1' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{11} = struct('name', 'decoder_1_sigmoid', ... + 'type', 'sigmoid' ); + +% Layer 8 + +net.layers{12} = struct('biases' , zeros(1, 784, 'single') , ... + 'biasesLearningRate' , 1 , ... + 'biasesWeightDecay' , 0 , ... + 'filters' , sparse_initialization([1 1 1000 784]), ... + 'filtersLearningRate', 1 , ... + 'filtersWeightDecay' , 1 , ... + 'name' , 'data_hat' , ... + 'pad' , [0 0 0 0] , ... + 'stride' , [1 1] , ... + 'type' , 'conv' ); + +net.layers{13} = struct('type', 'sigmoidcrossentropyloss'); + +vl_simplenn_display(net); + +end + +% ------------------------------------------------------------------------- +function filters = sparse_initialization(d) +% ------------------------------------------------------------------------- + +filters = zeros(d, 'single'); + +for index = 1 : d(4) + + p = randperm(d(3), 15); + + filters(1, 1, p, index) = randn(1, 1, 15, 1); + +end + +end + +% ------------------------------------------------------------------------- +function opts = getMnistAutoencoderOpts +% ------------------------------------------------------------------------- + +opts.batchSize = 100; +opts.conserveMemory = false; +opts.continue = false; +opts.dataDir = fullfile('data','mnist'); +opts.display = 10; +opts.delta = 1e-8; +opts.errorType = 'euclideanloss'; +opts.expDir = fullfile('data','mnistAutoencoder'); +opts.imdbPath = fullfile(opts.expDir, 'imdb.mat'); +% opts.learningRate = 1e-4; +opts.learningRate = 1e-2; +% opts.momentum = 0.9; +% opts.numEpochs = 6667; % 6667 epochs is ~4000000 iterations. +opts.numEpochs = 108; % 108 epochs is ~65000 iterations. +opts.plotDiagnostics = false; +opts.prefetch = false; +opts.snapshot = 10; +opts.sync = true; +opts.test_interval = 10; +opts.train = []; +opts.useGpu = true; +opts.val = []; +opts.weightDecay = 5e-4; + +end + +% ------------------------------------------------------------------------- +function imdb = getMnistAutoencoderImdb(opts) +% ------------------------------------------------------------------------- +% Preapre the imdb structure, returns image data with mean image subtracted +files = {'train-images-idx3-ubyte', ... + 'train-labels-idx1-ubyte', ... + 't10k-images-idx3-ubyte', ... + 't10k-labels-idx1-ubyte'} ; + +if ~exist(opts.dataDir, 'dir') + mkdir(opts.dataDir) ; +end + +for i=1:4 + if ~exist(fullfile(opts.dataDir, files{i}), 'file') + url = sprintf('http://yann.lecun.com/exdb/mnist/%s.gz',files{i}) ; + fprintf('downloading %s\n', url) ; + gunzip(url, opts.dataDir) ; + end +end + +f=fopen(fullfile(opts.dataDir, 'train-images-idx3-ubyte'),'r') ; +x1=fread(f,inf,'uint8'); +fclose(f) ; +x1=permute(reshape(x1(17:end),28,28,60e3),[2 1 3]) ; + +f=fopen(fullfile(opts.dataDir, 't10k-images-idx3-ubyte'),'r') ; +x2=fread(f,inf,'uint8'); +fclose(f) ; +x2=permute(reshape(x2(17:end),28,28,10e3),[2 1 3]) ; + +f=fopen(fullfile(opts.dataDir, 'train-labels-idx1-ubyte'),'r') ; +y1=fread(f,inf,'uint8'); +fclose(f) ; +y1=double(y1(9:end)')+1 ; + +f=fopen(fullfile(opts.dataDir, 't10k-labels-idx1-ubyte'),'r') ; +y2=fread(f,inf,'uint8'); +fclose(f) ; +y2=double(y2(9:end)')+1 ; + +set = [ones(1,numel(y1)) 2*ones(1,numel(y2))]; +% data = single(reshape(cat(3, x1, x2),28,28,1,[])); +% dataMean = mean(data(:,:,:,set == 1), 4); +% data = bsxfun(@minus, data, dataMean) ; +data = single(reshape(cat(3, x1, x2), 1, 1, 784, [])); +data = data - min(data(:)); data = data / max(data(:)); + +imdb.images.data = data ; +% imdb.images.data_mean = dataMean; +imdb.images.labels = cat(2, y1, y2) ; +imdb.images.set = set ; +imdb.meta.sets = {'train', 'val', 'test'} ; +imdb.meta.classes = arrayfun(@(x)sprintf('%d',x),0:9,'uniformoutput',false) ; + +end + +% ------------------------------------------------------------------------- +function [im, labels] = getMnistAutoencoderBatch(imdb, batch) +% ------------------------------------------------------------------------- + +im = imdb.images.data(:, :, :, batch); +labels = im; + +end + diff --git a/examples/mnistAutoencoder/cnn_mnist_autoencoder_test_demo.m b/examples/mnistAutoencoder/cnn_mnist_autoencoder_test_demo.m new file mode 100644 index 00000000..77735a41 --- /dev/null +++ b/examples/mnistAutoencoder/cnn_mnist_autoencoder_test_demo.m @@ -0,0 +1,76 @@ +%% + +close all; +clear all; +clc; + +%% + +run('~/GitHub/umuguc/matconvnet/matlab/vl_setupnn'); + +%% + +load('net.mat'); +load(opts.imdbPath); + +%% + +N = [5 2]; + +Y = zeros(N(1) * N(2), 1); + +h = figure; + +for index = 1 : N(1) * N(2) + + im = imdb.images.data(:, :, :, end - index + 1); + + if opts.useGpu + + im = gpuArray(im); + + end + + subplot(N(1), 2 * N(2), 2 * index - 1); + + imagesc(reshape(im, 28, 28)); + + axis off; + axis square; + + drawnow; + + net.layers{end}.class = im; + + res = vl_simplenn(net, im, [], [], 'disableDropout', true); + + subplot(N(1), 2 * N(2), 2 * index); + + imagesc(reshape(res(end - 1).x, 28, 28)); + + axis off; + axis square; + + drawnow; + + Y(index) = gather(res(end).x); + +end + +disp(['Euclidean loss: ' num2str(mean(Y))]); + +%% + +% Test net: + +% layer| 1| 2| 3| 4| 5| 6| 7| 8| 9| 10| 11| 12| 13| 14| +% type| cnv|sigmoid| cnv|sigmoid| cnv| cnv|sigmoid| cnv|sigmoid| cnv|sigmoid| cnv|sigmoid|euclideanloss| +% support| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| +% stride| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| +% pad| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| +% out dim| 1000| 1000| 500| 500| 30| 250| 250| 500| 500| 1000| 1000| 784| 784| 784| +% filt dim| 784| n/a| 1000| n/a| 250| 30| n/a| 250| n/a| 500| n/a| 1000| n/a| n/a| +% rec. field| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| +% c/g net KB| 3066/0| 0/0| 1955/0| 0/0| 29/0| 30/0| 0/0| 490/0| 0/0| 1957/0| 0/0| 3066/0| 0/0| 0/0| +% total network CPU/GPU memory: 10.3/0 MB + diff --git a/examples/mnistAutoencoder/cnn_mnist_autoencoder_training_demo.m b/examples/mnistAutoencoder/cnn_mnist_autoencoder_training_demo.m new file mode 100644 index 00000000..baa3e709 --- /dev/null +++ b/examples/mnistAutoencoder/cnn_mnist_autoencoder_training_demo.m @@ -0,0 +1,33 @@ +%% + +close all; +clear all; +clc; + +%% + +run('~/GitHub/umuguc/matconvnet/matlab/vl_setupnn'); + +%% + +rng(0); + +[net, opts, imdb, info] = cnn_mnist_autoencoder; + +save('net.mat', 'net', 'opts', 'info'); + +%% + +% Training net: + +% layer| 1| 2| 3| 4| 5| 6| 7| 8| 9| 10| 11| 12| 13| +% type| cnv|sigmoid| cnv|sigmoid| cnv| cnv|sigmoid| cnv|sigmoid| cnv|sigmoid| cnv|sigmoidcrossentropyloss| +% support| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| 1x1| +% stride| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| +% pad| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| 0| +% out dim| 1000| 1000| 500| 500| 30| 250| 250| 500| 500| 1000| 1000| 784| 784| +% filt dim| 784| n/a| 1000| n/a| 250| 30| n/a| 250| n/a| 500| n/a| 1000| n/a| +% rec. field| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| 1| +% c/g net KB| 3066/0| 0/0| 1955/0| 0/0| 29/0| 30/0| 0/0| 490/0| 0/0| 1957/0| 0/0| 3066/0| 0/0| +% total network CPU/GPU memory: 10.3/0 MB + diff --git a/examples/mnistAutoencoder/cnn_train.m b/examples/mnistAutoencoder/cnn_train.m new file mode 100644 index 00000000..a92e794a --- /dev/null +++ b/examples/mnistAutoencoder/cnn_train.m @@ -0,0 +1,320 @@ +function [net, info] = cnn_train(net, imdb, getBatch, varargin) +% CNN_TRAIN Demonstrates training a CNN +% CNN_TRAIN() is an example learner implementing stochastic gradient +% descent with momentum to train a CNN for image classification. +% It can be used with different datasets by providing a suitable +% getBatch function. + +opts.train = [] ; +opts.val = [] ; +opts.numEpochs = 300 ; +opts.batchSize = 256 ; +opts.useGpu = false ; +opts.learningRate = 0.001 ; +opts.continue = false ; +opts.expDir = fullfile('data','exp') ; +opts.conserveMemory = false ; +opts.sync = true ; +opts.prefetch = false ; +opts.weightDecay = 0.0005 ; +opts.momentum = 0.9 ; +opts.errorType = 'multiclass' ; +opts.plotDiagnostics = false ; +opts.display = 1; +opts.snapshot = 1; +opts.test_interval = 1; +opts = vl_argparse(opts, varargin) ; + +if ~exist(opts.expDir, 'dir'), mkdir(opts.expDir) ; end +if isempty(opts.train), opts.train = find(imdb.images.set==1) ; end +if isempty(opts.val), opts.val = find(imdb.images.set==2) ; end +if isnan(opts.train), opts.train = [] ; end + +% ------------------------------------------------------------------------- +% Network initialization +% ------------------------------------------------------------------------- + +for i=1:numel(net.layers) + if ~strcmp(net.layers{i}.type,'conv'), continue; end + net.layers{i}.filtersMomentum = zeros(size(net.layers{i}.filters), ... + class(net.layers{i}.filters)) ; + net.layers{i}.biasesMomentum = zeros(size(net.layers{i}.biases), ... + class(net.layers{i}.biases)) ; %#ok<*ZEROLIKE> + if ~isfield(net.layers{i}, 'filtersLearningRate') + net.layers{i}.filtersLearningRate = 1 ; + end + if ~isfield(net.layers{i}, 'biasesLearningRate') + net.layers{i}.biasesLearningRate = 1 ; + end + if ~isfield(net.layers{i}, 'filtersWeightDecay') + net.layers{i}.filtersWeightDecay = 1 ; + end + if ~isfield(net.layers{i}, 'biasesWeightDecay') + net.layers{i}.biasesWeightDecay = 1 ; + end +end + +if opts.useGpu + net = vl_simplenn_move(net, 'gpu') ; + for i=1:numel(net.layers) + if ~strcmp(net.layers{i}.type,'conv'), continue; end + net.layers{i}.filtersMomentum = gpuArray(net.layers{i}.filtersMomentum) ; + net.layers{i}.biasesMomentum = gpuArray(net.layers{i}.biasesMomentum) ; + end +end + +% ------------------------------------------------------------------------- +% Train and validate +% ------------------------------------------------------------------------- + +rng(0) ; + +if opts.useGpu + one = gpuArray(single(1)) ; +else + one = single(1) ; +end + +info.train.objective = [] ; +info.train.error = [] ; +info.train.topFiveError = [] ; +info.train.speed = [] ; +info.val.objective = [] ; +info.val.error = [] ; +info.val.topFiveError = [] ; +info.val.speed = [] ; + +lr = 0 ; +res = [] ; +for epoch=1:opts.numEpochs + prevLr = lr ; + lr = opts.learningRate(min(epoch, numel(opts.learningRate))) ; + + % fast-forward to where we stopped + modelPath = @(ep) fullfile(opts.expDir, sprintf('net-epoch-%d.mat', ep)); + modelFigPath = fullfile(opts.expDir, 'net-train.pdf') ; + if opts.continue + if exist(modelPath(epoch),'file') + if epoch == opts.numEpochs + load(modelPath(epoch), 'net', 'info') ; + end + continue ; + end + if epoch > 1 + fprintf('resuming by loading epoch %d\n', epoch-1) ; + load(modelPath(epoch-1), 'net', 'info') ; + end + end + + train = opts.train(randperm(numel(opts.train))) ; + val = opts.val ; + + info.train.objective(end+1) = 0 ; + info.train.error(end+1) = 0 ; + info.train.topFiveError(end+1) = 0 ; + info.train.speed(end+1) = 0 ; + info.val.objective(end+1) = 0 ; + info.val.error(end+1) = 0 ; + info.val.topFiveError(end+1) = 0 ; + info.val.speed(end+1) = 0 ; + + % reset momentum if needed + if prevLr ~= lr + fprintf('learning rate changed (%f --> %f): resetting momentum\n', prevLr, lr) ; + for l=1:numel(net.layers) + if ~strcmp(net.layers{l}.type, 'conv'), continue ; end + net.layers{l}.filtersMomentum = 0 * net.layers{l}.filtersMomentum ; + net.layers{l}.biasesMomentum = 0 * net.layers{l}.biasesMomentum ; + end + end + + for t=1:opts.batchSize:numel(train) + % get next image batch and labels + batch = train(t:min(t+opts.batchSize-1, numel(train))) ; + batch_time = tic ; + fprintf('training: epoch %02d: processing batch %3d of %3d ...', epoch, ... + fix(t/opts.batchSize)+1, ceil(numel(train)/opts.batchSize)) ; + [im, labels] = getBatch(imdb, batch) ; + if opts.prefetch + nextBatch = train(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(train))) ; + getBatch(imdb, nextBatch) ; + end + if opts.useGpu + im = gpuArray(im) ; + end + + % backprop + net.layers{end}.class = labels ; + res = vl_simplenn(net, im, one, res, ... + 'conserveMemory', opts.conserveMemory, ... + 'sync', opts.sync) ; + + % gradient step + for l=1:numel(net.layers) + if ~strcmp(net.layers{l}.type, 'conv'), continue ; end + + net.layers{l}.filtersMomentum = ... + opts.momentum * net.layers{l}.filtersMomentum ... + - (lr * net.layers{l}.filtersLearningRate) * ... + (opts.weightDecay * net.layers{l}.filtersWeightDecay) * net.layers{l}.filters ... + - (lr * net.layers{l}.filtersLearningRate) / numel(batch) * res(l).dzdw{1} ; + + net.layers{l}.biasesMomentum = ... + opts.momentum * net.layers{l}.biasesMomentum ... + - (lr * net.layers{l}.biasesLearningRate) * .... + (opts.weightDecay * net.layers{l}.biasesWeightDecay) * net.layers{l}.biases ... + - (lr * net.layers{l}.biasesLearningRate) / numel(batch) * res(l).dzdw{2} ; + + net.layers{l}.filters = net.layers{l}.filters + net.layers{l}.filtersMomentum ; + net.layers{l}.biases = net.layers{l}.biases + net.layers{l}.biasesMomentum ; + end + + % print information + batch_time = toc(batch_time) ; + speed = numel(batch)/batch_time ; + info.train = updateError(opts, info.train, net, res, batch_time) ; + + fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ; + n = t + numel(batch) - 1 ; + switch opts.errorType + case 'multiclass' + fprintf(' err %.1f err5 %.1f', ... + info.train.error(end)/n*100, info.train.topFiveError(end)/n*100) ; + fprintf('\n') ; + case 'binary' + fprintf(' err %.1f', ... + info.train.error(end)/n*100) ; + fprintf('\n') ; + case 'euclideanloss' + fprintf(' err %.1f', info.train.error(end) / n); + fprintf('\n') ; + end + + % debug info + if opts.plotDiagnostics + figure(2) ; vl_simplenn_diagnose(net,res) ; drawnow ; + end + end % next batch + + % evaluation on validation set + if epoch == 1 || rem(epoch, opts.test_interval) == 0 || epoch == opts.numEpochs + for t=1:opts.batchSize:numel(val) + batch_time = tic ; + batch = val(t:min(t+opts.batchSize-1, numel(val))) ; + fprintf('validation: epoch %02d: processing batch %3d of %3d ...', epoch, ... + fix(t/opts.batchSize)+1, ceil(numel(val)/opts.batchSize)) ; + [im, labels] = getBatch(imdb, batch) ; + if opts.prefetch + nextBatch = val(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(val))) ; + getBatch(imdb, nextBatch) ; + end + if opts.useGpu + im = gpuArray(im) ; + end + + net.layers{end}.class = labels ; + res = vl_simplenn(net, im, [], res, ... + 'disableDropout', true, ... + 'conserveMemory', opts.conserveMemory, ... + 'sync', opts.sync) ; + + % print information + batch_time = toc(batch_time) ; + speed = numel(batch)/batch_time ; + info.val = updateError(opts, info.val, net, res, batch_time) ; + + fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ; + n = t + numel(batch) - 1 ; + switch opts.errorType + case 'multiclass' + fprintf(' err %.1f err5 %.1f', ... + info.val.error(end)/n*100, info.val.topFiveError(end)/n*100) ; + fprintf('\n') ; + case 'binary' + fprintf(' err %.1f', ... + info.val.error(end)/n*100) ; + fprintf('\n') ; + case 'euclideanloss' + fprintf(' err %.1f', info.val.error(end) / n); + fprintf('\n') ; + end + end + end + + % save + info.train.objective(end) = info.train.objective(end) / numel(train) ; + info.train.error(end) = info.train.error(end) / numel(train) ; + info.train.topFiveError(end) = info.train.topFiveError(end) / numel(train) ; + info.train.speed(end) = numel(train) / info.train.speed(end) ; + info.val.objective(end) = info.val.objective(end) / numel(val) ; + info.val.error(end) = info.val.error(end) / numel(val) ; + info.val.topFiveError(end) = info.val.topFiveError(end) / numel(val) ; + info.val.speed(end) = numel(val) / info.val.speed(end) ; + if epoch == 1 || rem(epoch, opts.snapshot) == 0 || epoch == opts.numEpochs + save(modelPath(epoch), 'net', 'info') ; + end + + if epoch == 1 || rem(epoch, opts.display) == 0 || epoch == opts.numEpochs + figure(1) ; clf ; + subplot(1,2,1) ; + semilogy(1:epoch, info.train.objective, 'k') ; hold on ; + semilogy([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.objective([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + xlabel('training epoch') ; ylabel('energy') ; + grid on ; + h=legend('train', 'val') ; + set(h,'color','none'); + title('objective') ; + subplot(1,2,2) ; + switch opts.errorType + case 'multiclass' + plot(1:epoch, info.train.error, 'k') ; hold on ; + plot(1:epoch, info.train.topFiveError, 'k--') ; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.error([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.topFiveError([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b--') ; + h=legend('train','train-5','val','val-5') ; + case 'binary' + plot(1:epoch, info.train.error, 'k') ; hold on ; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.error([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + h=legend('train','val') ; + case 'euclideanloss' + plot(1 : epoch, info.train.error, 'k'); hold on; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.error([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + h = legend('train', 'val') ; + end + grid on ; + xlabel('training epoch') ; ylabel('error') ; + set(h,'color','none') ; + title('error') ; + drawnow ; + print(1, modelFigPath, '-dpdf') ; + end +end + +% ------------------------------------------------------------------------- +function info = updateError(opts, info, net, res, speed) +% ------------------------------------------------------------------------- +predictions = gather(res(end-1).x) ; +sz = size(predictions) ; +n = prod(sz(1:2)) ; + +labels = net.layers{end}.class ; +info.objective(end) = info.objective(end) + sum(double(gather(res(end).x))) ; +info.speed(end) = info.speed(end) + speed ; +switch opts.errorType + case 'multiclass' + [~,predictions] = sort(predictions, 3, 'descend') ; + error = ~bsxfun(@eq, predictions, reshape(labels, 1, 1, 1, [])) ; + info.error(end) = info.error(end) +.... + sum(sum(sum(error(:,:,1,:))))/n ; + info.topFiveError(end) = info.topFiveError(end) + ... + sum(sum(sum(min(error(:,:,1:5,:),[],3))))/n ; + case 'binary' + error = bsxfun(@times, predictions, labels) < 0 ; + info.error(end) = info.error(end) + sum(error(:))/n ; + case 'euclideanloss' + error = euclideanloss(sigmoid(predictions), labels); + info.error(end) = info.error(end) + error; +end + + + diff --git a/examples/mnistAutoencoder/cnn_train_adagrad.m b/examples/mnistAutoencoder/cnn_train_adagrad.m new file mode 100644 index 00000000..eb8e1b5e --- /dev/null +++ b/examples/mnistAutoencoder/cnn_train_adagrad.m @@ -0,0 +1,318 @@ +function [net, info] = cnn_train_adagrad(net, imdb, getBatch, varargin) +% CNN_TRAIN Demonstrates training a CNN +% CNN_TRAIN() is an example learner implementing stochastic gradient +% descent with momentum to train a CNN for image classification. +% It can be used with different datasets by providing a suitable +% getBatch function. + +opts.train = [] ; +opts.val = [] ; +opts.numEpochs = 300 ; +opts.batchSize = 256 ; +opts.useGpu = false ; +opts.learningRate = 0.001 ; +opts.continue = false ; +opts.expDir = fullfile('data','exp') ; +opts.conserveMemory = false ; +opts.sync = true ; +opts.prefetch = false ; +opts.weightDecay = 0.0005 ; +opts.errorType = 'multiclass' ; +opts.plotDiagnostics = false ; +opts.delta = 1e-8; +opts.display = 1; +opts.snapshot = 1; +opts.test_interval = 1; +opts = vl_argparse(opts, varargin) ; + +if ~exist(opts.expDir, 'dir'), mkdir(opts.expDir) ; end +if isempty(opts.train), opts.train = find(imdb.images.set==1) ; end +if isempty(opts.val), opts.val = find(imdb.images.set==2) ; end +if isnan(opts.train), opts.train = [] ; end + +% ------------------------------------------------------------------------- +% Network initialization +% ------------------------------------------------------------------------- + +for i=1:numel(net.layers) + if ~strcmp(net.layers{i}.type,'conv'), continue; end + net.layers{i}.filtersMomentum = zeros(size(net.layers{i}.filters), ... + class(net.layers{i}.filters)) ; + net.layers{i}.biasesMomentum = zeros(size(net.layers{i}.biases), ... + class(net.layers{i}.biases)) ; %#ok<*ZEROLIKE> + if ~isfield(net.layers{i}, 'filtersLearningRate') + net.layers{i}.filtersLearningRate = 1 ; + end + if ~isfield(net.layers{i}, 'biasesLearningRate') + net.layers{i}.biasesLearningRate = 1 ; + end + if ~isfield(net.layers{i}, 'filtersWeightDecay') + net.layers{i}.filtersWeightDecay = 1 ; + end + if ~isfield(net.layers{i}, 'biasesWeightDecay') + net.layers{i}.biasesWeightDecay = 1 ; + end +end + +if opts.useGpu + net = vl_simplenn_move(net, 'gpu') ; + for i=1:numel(net.layers) + if ~strcmp(net.layers{i}.type,'conv'), continue; end + net.layers{i}.filtersMomentum = gpuArray(net.layers{i}.filtersMomentum) ; + net.layers{i}.biasesMomentum = gpuArray(net.layers{i}.biasesMomentum) ; + end +end + +G_f = cell(numel(net.layers), 1); +G_b = cell(numel(net.layers), 1); + +for l=1:numel(net.layers) + + if ~strcmp(net.layers{l}.type, 'conv'), continue ; end + + G_f{l} = zeros(size(net.layers{l}.filters), 'single'); + G_b{l} = zeros(size(net.layers{l}.biases), 'single'); + +end + +% ------------------------------------------------------------------------- +% Train and validate +% ------------------------------------------------------------------------- + +rng(0) ; + +if opts.useGpu + one = gpuArray(single(1)) ; +else + one = single(1) ; +end + +info.train.objective = [] ; +info.train.error = [] ; +info.train.topFiveError = [] ; +info.train.speed = [] ; +info.val.objective = [] ; +info.val.error = [] ; +info.val.topFiveError = [] ; +info.val.speed = [] ; + +lr = opts.learningRate ; +res = [] ; +for epoch=1:opts.numEpochs + + % fast-forward to where we stopped + modelPath = @(ep) fullfile(opts.expDir, sprintf('net-epoch-%d.mat', ep)); + modelFigPath = fullfile(opts.expDir, 'net-train.pdf') ; + if opts.continue + if exist(modelPath(epoch),'file') + if epoch == opts.numEpochs + load(modelPath(epoch), 'net', 'info') ; + end + continue ; + end + if epoch > 1 + fprintf('resuming by loading epoch %d\n', epoch-1) ; + load(modelPath(epoch-1), 'net', 'info') ; + end + end + + train = opts.train(randperm(numel(opts.train))) ; + val = opts.val ; + + info.train.objective(end+1) = 0 ; + info.train.error(end+1) = 0 ; + info.train.topFiveError(end+1) = 0 ; + info.train.speed(end+1) = 0 ; + info.val.objective(end+1) = 0 ; + info.val.error(end+1) = 0 ; + info.val.topFiveError(end+1) = 0 ; + info.val.speed(end+1) = 0 ; + + for t=1:opts.batchSize:numel(train) + % get next image batch and labels + batch = train(t:min(t+opts.batchSize-1, numel(train))) ; + batch_time = tic ; + fprintf('training: epoch %02d: processing batch %3d of %3d ...', epoch, ... + fix(t/opts.batchSize)+1, ceil(numel(train)/opts.batchSize)) ; + [im, labels] = getBatch(imdb, batch) ; + if opts.prefetch + nextBatch = train(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(train))) ; + getBatch(imdb, nextBatch) ; + end + if opts.useGpu + im = gpuArray(im) ; + end + + % backprop + net.layers{end}.class = labels ; + res = vl_simplenn(net, im, one, res, ... + 'conserveMemory', opts.conserveMemory, ... + 'sync', opts.sync) ; + + % gradient step + for l=1:numel(net.layers) + if ~strcmp(net.layers{l}.type, 'conv'), continue ; end + + g_f = (net.layers{l}.filtersLearningRate) * ... + (opts.weightDecay * net.layers{l}.filtersWeightDecay) * net.layers{l}.filters + ... + (net.layers{l}.filtersLearningRate) / numel(batch) * res(l).dzdw{1}; + g_b = (net.layers{l}.biasesLearningRate) * ... + (opts.weightDecay * net.layers{l}.biasesWeightDecay) * net.layers{l}.biases + ... + (net.layers{l}.biasesLearningRate) / numel(batch) * res(l).dzdw{2}; + + G_f{l} = G_f{l} + g_f .^ 2; + G_b{l} = G_b{l} + g_b .^ 2; + + net.layers{l}.filters = net.layers{l}.filters - lr ./ (opts.delta + sqrt(G_f{l})) .* g_f; + net.layers{l}.biases = net.layers{l}.biases - lr ./ (opts.delta + sqrt(G_b{l})) .* g_b; + end + + % print information + batch_time = toc(batch_time) ; + speed = numel(batch)/batch_time ; + info.train = updateError(opts, info.train, net, res, batch_time) ; + + fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ; + n = t + numel(batch) - 1 ; + switch opts.errorType + case 'multiclass' + fprintf(' err %.1f err5 %.1f', ... + info.train.error(end)/n*100, info.train.topFiveError(end)/n*100) ; + fprintf('\n') ; + case 'binary' + fprintf(' err %.1f', ... + info.train.error(end)/n*100) ; + fprintf('\n') ; + case 'euclideanloss' + fprintf(' err %.1f', info.train.error(end) / n); + fprintf('\n') ; + end + + % debug info + if opts.plotDiagnostics + figure(2) ; vl_simplenn_diagnose(net,res) ; drawnow ; + end + end % next batch + + % evaluation on validation set + if epoch == 1 || rem(epoch, opts.test_interval) == 0 || epoch == opts.numEpochs + for t=1:opts.batchSize:numel(val) + batch_time = tic ; + batch = val(t:min(t+opts.batchSize-1, numel(val))) ; + fprintf('validation: epoch %02d: processing batch %3d of %3d ...', epoch, ... + fix(t/opts.batchSize)+1, ceil(numel(val)/opts.batchSize)) ; + [im, labels] = getBatch(imdb, batch) ; + if opts.prefetch + nextBatch = val(t+opts.batchSize:min(t+2*opts.batchSize-1, numel(val))) ; + getBatch(imdb, nextBatch) ; + end + if opts.useGpu + im = gpuArray(im) ; + end + + net.layers{end}.class = labels ; + res = vl_simplenn(net, im, [], res, ... + 'disableDropout', true, ... + 'conserveMemory', opts.conserveMemory, ... + 'sync', opts.sync) ; + + % print information + batch_time = toc(batch_time) ; + speed = numel(batch)/batch_time ; + info.val = updateError(opts, info.val, net, res, batch_time) ; + + fprintf(' %.2f s (%.1f images/s)', batch_time, speed) ; + n = t + numel(batch) - 1 ; + switch opts.errorType + case 'multiclass' + fprintf(' err %.1f err5 %.1f', ... + info.val.error(end)/n*100, info.val.topFiveError(end)/n*100) ; + fprintf('\n') ; + case 'binary' + fprintf(' err %.1f', ... + info.val.error(end)/n*100) ; + fprintf('\n') ; + case 'euclideanloss' + fprintf(' err %.1f', info.val.error(end) / n); + fprintf('\n') ; + end + end + end + + % save + info.train.objective(end) = info.train.objective(end) / numel(train) ; + info.train.error(end) = info.train.error(end) / numel(train) ; + info.train.topFiveError(end) = info.train.topFiveError(end) / numel(train) ; + info.train.speed(end) = numel(train) / info.train.speed(end) ; + info.val.objective(end) = info.val.objective(end) / numel(val) ; + info.val.error(end) = info.val.error(end) / numel(val) ; + info.val.topFiveError(end) = info.val.topFiveError(end) / numel(val) ; + info.val.speed(end) = numel(val) / info.val.speed(end) ; + if epoch == 1 || rem(epoch, opts.snapshot) == 0 || epoch == opts.numEpochs + save(modelPath(epoch), 'net', 'info') ; + end + + if epoch == 1 || rem(epoch, opts.display) == 0 || epoch == opts.numEpochs + figure(1) ; clf ; + subplot(1,2,1) ; + semilogy(1:epoch, info.train.objective, 'k') ; hold on ; + semilogy([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.objective([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + xlabel('training epoch') ; ylabel('energy') ; + grid on ; + h=legend('train', 'val') ; + set(h,'color','none'); + title('objective') ; + subplot(1,2,2) ; + switch opts.errorType + case 'multiclass' + plot(1:epoch, info.train.error, 'k') ; hold on ; + plot(1:epoch, info.train.topFiveError, 'k--') ; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.error([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.topFiveError([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b--') ; + h=legend('train','train-5','val','val-5') ; + case 'binary' + plot(1:epoch, info.train.error, 'k') ; hold on ; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.error([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + h=legend('train','val') ; + case 'euclideanloss' + plot(1 : epoch, info.train.error, 'k'); hold on; + plot([1 opts.test_interval : opts.test_interval : epoch epoch], info.val.error([1 opts.test_interval : opts.test_interval : epoch epoch]), 'b') ; + h = legend('train', 'val') ; + end + grid on ; + xlabel('training epoch') ; ylabel('error') ; + set(h,'color','none') ; + title('error') ; + drawnow ; + print(1, modelFigPath, '-dpdf') ; + end +end + +% ------------------------------------------------------------------------- +function info = updateError(opts, info, net, res, speed) +% ------------------------------------------------------------------------- +predictions = gather(res(end-1).x) ; +sz = size(predictions) ; +n = prod(sz(1:2)) ; + +labels = net.layers{end}.class ; +info.objective(end) = info.objective(end) + sum(double(gather(res(end).x))) ; +info.speed(end) = info.speed(end) + speed ; +switch opts.errorType + case 'multiclass' + [~,predictions] = sort(predictions, 3, 'descend') ; + error = ~bsxfun(@eq, predictions, reshape(labels, 1, 1, 1, [])) ; + info.error(end) = info.error(end) +.... + sum(sum(sum(error(:,:,1,:))))/n ; + info.topFiveError(end) = info.topFiveError(end) + ... + sum(sum(sum(min(error(:,:,1:5,:),[],3))))/n ; + case 'binary' + error = bsxfun(@times, predictions, labels) < 0 ; + info.error(end) = info.error(end) + sum(error(:))/n ; + case 'euclideanloss' + error = euclideanloss(sigmoid(predictions), labels); + info.error(end) = info.error(end) + error; +end + + + diff --git a/examples/mnistAutoencoder/euclideanloss.m b/examples/mnistAutoencoder/euclideanloss.m new file mode 100644 index 00000000..b5a795c9 --- /dev/null +++ b/examples/mnistAutoencoder/euclideanloss.m @@ -0,0 +1,26 @@ +function Y = euclideanloss(X, c, dzdy) +%EUCLIDEANLOSS Summary of this function goes here +% Detailed explanation goes here + +assert(numel(X) == numel(c)); + +d = size(X); + +assert(all(d == size(c))); + +if nargin == 2 || (nargin == 3 && isempty(dzdy)) + + Y = 1 / 2 * sum(subsref((X - c) .^ 2, substruct('()', {':'}))); % Y is divided by d(4) in cnn_train.m / cnn_train_mgpu.m. +% Y = 1 / (2 * prod(d(1 : 3))) * sum(subsref((X - c) .^ 2, substruct('()', {':'}))); % Should Y be divided by prod(d(1 : 3))? It depends on the learning rate. + +elseif nargin == 3 && ~isempty(dzdy) + + assert(numel(dzdy) == 1); + + Y = dzdy * (X - c); % Y is divided by d(4) in cnn_train.m / cnn_train_mgpu.m. +% Y = dzdy / prod(d(1 : 3)) * (X - c); % Should Y be divided by prod(d(1 : 3))? It depends on the learning rate. + +end + +end + diff --git a/examples/mnistAutoencoder/sigmoid.m b/examples/mnistAutoencoder/sigmoid.m new file mode 100644 index 00000000..9d2bad38 --- /dev/null +++ b/examples/mnistAutoencoder/sigmoid.m @@ -0,0 +1,16 @@ +function y = sigmoid(x, dzdy) +%SIGMOID Summary of this function goes here +% Detailed explanation goes here + +y = 1 ./ (1 + exp(-x)); + +if nargin == 2 && ~isempty(dzdy) + + assert(all(size(x) == size(dzdy))); + + y = dzdy .* y .* (1 - y); + +end + +end + diff --git a/examples/mnistAutoencoder/sigmoidcrossentropyloss.m b/examples/mnistAutoencoder/sigmoidcrossentropyloss.m new file mode 100644 index 00000000..d09ffbb6 --- /dev/null +++ b/examples/mnistAutoencoder/sigmoidcrossentropyloss.m @@ -0,0 +1,29 @@ +function Y = sigmoidcrossentropyloss(X, c, dzdy) +%EUCLIDEANLOSS Summary of this function goes here +% Detailed explanation goes here + +assert(numel(X) == numel(c)); + +d = size(X); + +assert(all(d == size(c))); + +p = sigmoid(c); +p_hat = sigmoid(X); + +if nargin == 2 || isempty(dzdy) + + Y = -sum(subsref(p * log(p_hat) + (1 - p) * log(1 - p_hat), substruct('()', {':'}))); % Y is divided by d(4) in cnn_train.m / cnn_train_mgpu.m. +% Y = -1 / prod(d(1 : 3)) * sum(subsref(p * log(p_hat) + (1 - p) * log(1 - p_hat), substruct('()', {':'}))); % Should Y be divided by prod(d(1 : 3))? It depends on the learning rate. + +elseif nargin == 3 && ~isempty(dzdy) + + assert(numel(dzdy) == 1); + + Y = dzdy * (p_hat - p); % Y is divided by d(4) in cnn_train.m / cnn_train_mgpu.m. +% Y = dzdy / prod(d(1 : 3)) * (p_hat - p); % Should Y be divided by prod(d(1 : 3))? It depends on the learning rate. + +end + +end + diff --git a/examples/mnistAutoencoder/vl_simplenn.m b/examples/mnistAutoencoder/vl_simplenn.m new file mode 100644 index 00000000..5a3c18cd --- /dev/null +++ b/examples/mnistAutoencoder/vl_simplenn.m @@ -0,0 +1,249 @@ +function res = vl_simplenn(net, x, dzdy, res, varargin) +% VL_SIMPLENN Evaluates a simple CNN +% RES = VL_SIMPLENN(NET, X) evaluates the convnet NET on data X. +% RES = VL_SIMPLENN(NET, X, DZDY) evaluates the convnent NET and its +% derivative on data X and output derivative DZDY. +% +% The network has a simple (linear) topology, i.e. the computational +% blocks are arranged in a sequence of layers. Please note that +% there is no need to use this wrapper, which is provided for +% convenience. Instead, the individual CNN computational blocks can +% be evaluated directly, making it possible to create significantly +% more complex topologies, and in general allowing greater +% flexibility. +% +% The NET structure contains two fields: +% +% - net.layers: the CNN layers. +% - net.normalization: information on how to normalize input data. +% +% The network expects the data X to be already normalized. This +% usually involves rescaling the input image(s) and subtracting a +% mean. +% +% RES is a structure array with one element per network layer plus +% one representing the input. So RES(1) refers to the zeroth-layer +% (input), RES(2) refers to the first layer, etc. Each entry has +% fields: +% +% - res(i+1).x: the output of layer i. Hence res(1).x is the network +% input. +% +% - res(i+1).aux: auxiliary output data of layer i. For example, +% dropout uses this field to store the dropout mask. +% +% - res(i+1).dzdx: the derivative of the network output relative to +% variable res(i+1).x, i.e. the output of layer i. In particular +% res(1).dzdx is the derivative of the network output with respect +% to the network input. +% +% - res(i+1).dzdw: the derivative of the network output relative to +% the parameters of layer i. It can be a cell array for multiple +% parameters. +% +% net.layers is a cell array of network layers. The following +% layers, encapsulating corresponding functions in the toolbox, are +% supported: +% +% Convolutional layer:: +% The convolutional layer wraps VL_NNCONV(). It has fields: +% +% - layer.type = 'conv' +% - layer.filters: the filters. +% - layer.biases: the biases. +% - layer.stride: the sampling stride (usually 1). +% - layer.padding: the padding (usually 0). +% +% Max pooling layer:: +% The max pooling layer wraps VL_NNPOOL(). It has fields: +% +% - layer.type = 'pool' +% - layer.method: pooling method ('max' or 'avg'). +% - layer.pool: the pooling size. +% - layer.stride: the sampling stride (usually 1). +% - layer.padding: the padding (usually 0). +% +% Normalization layer:: +% The normalization layer wraps VL_NNNORMALIZE(). It has fields +% +% - layer.type = 'normalize' +% - layer.param: the normalization parameters. +% +% ReLU layer:: +% The ReLU layer wraps VL_NNRELU(). It has fields: +% +% - layer.type = 'relu' +% +% Dropout layer:: +% The dropout layer wraps VL_NNDROPOUT(). It has fields: +% +% - layer.type = 'dropout' +% - layer.rate: the dropout rate. +% +% Softmax layer:: +% The softmax layer wraps VL_NNSOFTMAX(). It has fields +% +% - layer.type = 'softmax' +% +% Log-loss layer:: +% The log-loss layer wraps VL_NNLOSS(). It has fields: +% +% - layer.type = 'loss' +% - layer.class: the ground-truth class. +% +% Softmax-log-loss layer:: +% The softmax-log-loss layer wraps VL_NNSOFTMAXLOSS(). It has +% fields: +% +% - layer.type = 'softmaxloss' +% - layer.class: the ground-truth class. +% +% Custom layer:: +% This can be used to specify custom layers. +% +% - layer.type = 'custom' +% - layer.forward: a function handle computing the block. +% - layer.backward: a function handle computing the block derivative. +% +% The first function is called as res(i+1) = forward(layer, res(i), res(i+1)) +% where res() is the struct array specified before. The second function is +% called as res(i) = backward(layer, res(i), res(i+1)). Note that the +% `layer` structure can contain additional fields if needed. + + +% Copyright (C) 2014 Andrea Vedaldi. +% All rights reserved. +% +% This file is part of the VLFeat library and is made available under +% the terms of the BSD license (see the COPYING file). + +opts.res = [] ; +opts.conserveMemory = false ; +opts.sync = false ; +opts.disableDropout = false ; +opts.freezeDropout = false ; +opts = vl_argparse(opts, varargin); + +n = numel(net.layers) ; + +if (nargin <= 2) || isempty(dzdy) + doder = false ; +else + doder = true ; +end + +gpuMode = isa(x, 'gpuArray') ; + +if nargin <= 3 || isempty(res) + res = struct(... + 'x', cell(1,n+1), ... + 'dzdx', cell(1,n+1), ... + 'dzdw', cell(1,n+1), ... + 'aux', cell(1,n+1), ... + 'time', num2cell(zeros(1,n+1)), ... + 'backwardTime', num2cell(zeros(1,n+1))) ; +end +res(1).x = x ; + +for i=1:n + l = net.layers{i} ; + res(i).time = tic ; + switch l.type + case 'conv' + res(i+1).x = vl_nnconv(res(i).x, l.filters, l.biases, 'pad', l.pad, 'stride', l.stride) ; + case 'pool' + res(i+1).x = vl_nnpool(res(i).x, l.pool, 'pad', l.pad, 'stride', l.stride, 'method', l.method) ; + case 'normalize' + res(i+1).x = vl_nnnormalize(res(i).x, l.param) ; + case 'softmax' + res(i+1).x = vl_nnsoftmax(res(i).x) ; + case 'loss' + res(i+1).x = vl_nnloss(res(i).x, l.class) ; + case 'softmaxloss' + res(i+1).x = vl_nnsoftmaxloss(res(i).x, l.class) ; + case 'relu' + res(i+1).x = vl_nnrelu(res(i).x) ; + case 'noffset' + res(i+1).x = vl_nnnoffset(res(i).x, l.param) ; + case 'dropout' + if opts.disableDropout + res(i+1).x = res(i).x ; + elseif opts.freezeDropout + [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate, 'mask', res(i+1).aux) ; + else + [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate) ; + end + case 'custom' + res(i+1) = l.forward(l, res(i), res(i+1)) ; + case 'sigmoid' + res(i+1).x = sigmoid(res(i).x); + case 'sigmoidcrossentropyloss' + res(i+1).x = euclideanloss(res(i).x, l.class); + case 'euclideanloss' + res(i+1).x = euclideanloss(res(i).x, l.class); + otherwise + error('Unknown layer type %s', l.type) ; + end + if opts.conserveMemory & ~doder & i < numel(net.layers) - 1 + % TODO: forget unnecesary intermediate computations even when + % derivatives are required + res(i).x = [] ; + end + if gpuMode & opts.sync + % This should make things slower, but on MATLAB 2014a it is necessary + % for any decent performance. + wait(gpuDevice) ; + end + res(i).time = toc(res(i).time) ; +end + +if doder + res(n+1).dzdx = dzdy ; + for i=n:-1:1 + l = net.layers{i} ; + res(i).backwardTime = tic ; + switch l.type + case 'conv' + [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ... + vl_nnconv(res(i).x, l.filters, l.biases, ... + res(i+1).dzdx, ... + 'pad', l.pad, 'stride', l.stride) ; + case 'pool' + res(i).dzdx = vl_nnpool(res(i).x, l.pool, res(i+1).dzdx, ... + 'pad', l.pad, 'stride', l.stride, 'method', l.method) ; + case 'normalize' + res(i).dzdx = vl_nnnormalize(res(i).x, l.param, res(i+1).dzdx) ; + case 'softmax' + res(i).dzdx = vl_nnsoftmax(res(i).x, res(i+1).dzdx) ; + case 'loss' + res(i).dzdx = vl_nnloss(res(i).x, l.class, res(i+1).dzdx) ; + case 'softmaxloss' + res(i).dzdx = vl_nnsoftmaxloss(res(i).x, l.class, res(i+1).dzdx) ; + case 'relu' + res(i).dzdx = vl_nnrelu(res(i).x, res(i+1).dzdx) ; + case 'noffset' + res(i).dzdx = vl_nnnoffset(res(i).x, l.param, res(i+1).dzdx) ; + case 'dropout' + if opts.disableDropout + res(i).dzdx = res(i+1).dzdx ; + else + res(i).dzdx = vl_nndropout(res(i).x, res(i+1).dzdx, 'mask', res(i+1).aux) ; + end + case 'custom' + res(i) = l.backward(l, res(i), res(i+1)) ; + case 'sigmoid' + res(i).dzdx = sigmoid(res(i).x, res(i+1).dzdx); + case 'sigmoidcrossentropyloss' + res(i).dzdx = sigmoidcrossentropyloss(res(i).x, l.class, res(i+1).dzdx); + case 'euclideanloss' + res(i).dzdx = euclideanloss(res(i).x, l.class, res(i+1).dzdx); + end + if opts.conserveMemory + res(i+1).dzdx = [] ; + end + if gpuMode & opts.sync + wait(gpuDevice) ; + end + res(i).backwardTime = toc(res(i).backwardTime) ; + end +end