Documentation

JonathanAMichaels · Jan 11, 2018 · a81c845 · a81c845
1 parent 685a771
commit a81c845
Show file tree

Hide file tree

Showing 9 changed files with 186 additions and 336 deletions.
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 
 **Date:** 10.01.2018
 
-## What is geneticRNN
+## What is geneticRNN?
 
 The current package is a Matlab implementation of a simple genetic training algorithm for recurrent neural networks. My algorithm is a very faithful implemetation of the algorithm layed out in this paper [Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning](https://arxiv.org/abs/1712.06567) as Algorithm 1.
 
@@ -24,6 +24,8 @@ Deep artificial neural networks (DNNs) are typically trained via gradient-based
     - Policies are multiplied with a decay term to prevent variance explosion as a consequence of summing many normal distributions.
     - A decay term is subtracted from the policy to bring unneeded weights closer to zero. The general effect is to produce a power law distribution of weights as opposed to normal.
 
+- Mutation power decays automatically over generations and decays rapidly when no improved policy is found for a given generation.
+
 
 ## Documentation & Examples
 All functions are documented throughout, and two examples illustrating the intended use of the package are provided with the release.

diff --git a/examples/geneticRNN_Example_CO.asv b/examples/geneticRNN_Example_CO.asv
diff --git a/examples/geneticRNN_Example_CO.m b/examples/geneticRNN_Example_CO.m
@@ -1,23 +1,19 @@
-% hebbRNN_Example_CO
+% geneticRNN_Example_CO
 %
-% This function illustrates an example of reward-modulated Hebbian learning
+% This function illustrates an example of a simple genetic learning algorithm
 % in recurrent neural networks to complete a center-out reaching task.
 %
 %
-% Copyright (c) Jonathan A Michaels 2016
+% Copyright (c) Jonathan A Michaels 2018
 % German Primate Center
 % jonathanamichaels AT gmail DOT com
-%
-% If used in published work please see repository README.md for citation
-% and license information: https://github.com/JonathanAMichaels/hebbRNN
 
 
 clear
 close all
 
-numConds = 8; % Number of peripheral targets. Try changing this number to alter the difficulty!
-totalTime = 70; % Total trial time
-moveTime = 70;
+numConds = 4; % Number of peripheral targets. Try changing this number to alter the difficulty!
+totalTime = 40; % Total trial time
 L = [3 3]; % Length of each segment of the arm
 
 %% Populate target function passthrough data
@@ -30,52 +26,48 @@
 inp = cell(1,numConds);
 targ = cell(1,numConds);
 ang = linspace(0, 2*pi - 2*pi/numConds, numConds);
-blankTime = 20;
 for cond = 1:numConds
-    inp{cond} = zeros(numConds+1, totalTime);
-    inp{cond}(cond,:) = 0.5;
-%    inp{cond}(numConds+1,1:totalTime-moveTime-1) = 5;
-    targ{cond} = [[zeros(totalTime-moveTime,1); nan(blankTime,1); ones(moveTime-blankTime,1)]*sin(ang(cond)) ...
-        [zeros(totalTime-moveTime,1); nan(blankTime,1); ones(moveTime-blankTime,1)]*cos(ang(cond))]';
+    inp{cond} = zeros(numConds, totalTime);
+    inp{cond}(cond,:) = 1;
+    targ{cond} = [ones(totalTime,1)*sin(ang(cond)) ones(totalTime,1)*cos(ang(cond))]';
 end
 % In the center-out reaching task the network needs to produce the joint angle
 % velocities of a two-segment arm to reach to a number of peripheral
 % targets spaced along a circle in the 2D plane, based on the desired target
 % specified by the input.
 
 %% Initialize network parameters
-N = 200; % Number of neurons
+N = 100; % Number of neurons
 B = size(targ{1},1); % Outputs
 I = size(inp{1},1); % Inputs
 p = 1; % Sparsity
 g = 1.1; % Spectral scaling
-dt = 1; % Time step
-tau = 10; % Time constant
-
-%% Initialize learning parameters
-evalOpts = [2 1]; % Plotting level and frequency of evaluation
-targetFun = @geneticRNN_COTargetFun; % handle of custom target function
+dt = 10; % Time step
+tau = 50; % Time constant
 
+%% Policy initialization parameters
 policyInitInputs = {N, B, I, p, g, dt, tau};
-policyInitInputsOptional = {'feedback', true, 'actFun', 'tanh', 'energyCost', 0.1};
+policyInitInputsOptional = {'feedback', false};
 
-mutationPower = 1e-2;
-populationSize = 5000;
-truncationSize = 50;
-fitnessFunInputs = targ;
+%% Initialize learning parameters
+targetFun = @geneticRNN_COTargetFun; % handle of custom target function
+mutationPower = 1e-2; % Standard deviation of normally distributed noise to add in each generation
+populationSize = 5000; % Number of individuals in each generation
+truncationSize = 50; % Number of individuals to save for next generation
+fitnessFunInputs = targ; % Target data for fitness calculation
+evalOpts = [2 1]; % Plotting level and frequency of evaluation
 
 %% Train network
 % This step should take about 5 minutes, depending on your processor.
-% Can be stopped at any time by pressing the STOP button.
+% Should stopped at the desired time by pressing the STOP button and waiting for 1 iteration
 % Look inside to see information about the many optional parameters.
-[net, learnStats] = geneticRNN_learn_model_2(mutationPower, populationSize, truncationSize, fitnessFunInputs, policyInitInputs, ...
-    'input', inp, ...
+[net, learnStats] = geneticRNN_learn_model_2(inp, mutationPower, populationSize, truncationSize, fitnessFunInputs, policyInitInputs, ...
     'evalOpts', evalOpts, ...
     'policyInitInputsOptional', policyInitInputsOptional, ...
     'targetFun', targetFun, 'targetFunPassthrough', targetFunPassthrough);
 
 % run model
-[Z0, Z1, R, X, kin] = geneticRNN_run_model(net, 'input', inp, 'targetFun', targetFun, 'targetFunPassthrough', targetFunPassthrough);
+[Z0, Z1, R, X, kin] = geneticRNN_run_model(net, inp, 'targetFun', targetFun, 'targetFunPassthrough', targetFunPassthrough);
 
 
 %% Plot center-out reaching results

diff --git a/examples/geneticRNN_Example_DNMS.m b/examples/geneticRNN_Example_DNMS.m
@@ -1,16 +1,13 @@
-% hebbRNN_Example_DNMS
+% geneticRNN_Example_DNMS
 %
-% This function illustrates an example of reward-modulated Hebbian learning
+% This function illustrates an example of a simple genetic learning algorithm
 % in a recurrent neural network to complete a delayed nonmatch-to-sample
 % task.
 %
 %
-% Copyright (c) Jonathan A Michaels 2016
+% Copyright (c) Jonathan A Michaels 2018
 % German Primate Center
 % jonathanamichaels AT gmail DOT com
-%
-% If used in published work please see repository README.md for citation
-% and license information: https://github.com/JonathanAMichaels/hebbRNN
 
 
 clear
@@ -57,30 +54,30 @@
 I = size(inp{1},1); % Inputs
 p = 1; % Sparsity
 g = 1.2; % Spectral scaling
-dt = 1; % Time step
-tau = 10; % Time constant
-
-%% Initialize learning parameters
-evalOpts = [2 1]; % Plotting level and frequency of evaluation
+dt = 10; % Time step
+tau = 50; % Time constant
 
+%% Policy initialization parameters
 policyInitInputs = {N, B, I, p, g, dt, tau};
-policyInitInputsOptional = {'feedback', true, 'actFun', 'tanh'};
+policyInitInputsOptional = {'feedback', true};
 
-mutationPower = 1e-2;
-populationSize = 5000;
-truncationSize = 10;
-fitnessFunInputs = targ;
+%% Initialize learning parameters
+mutationPower = 1e-2; % Standard deviation of normally distributed noise to add in each generation
+populationSize = 5000; % Number of individuals in each generation
+truncationSize = 100; % Number of individuals to save for next generation
+fitnessFunInputs = targ; % Target data for fitness calculation
 policyInitFun = @geneticRNN_create_model;
+evalOpts = [2 1]; % Plotting level and frequency of evaluation
 
 %% Train network
 % This step should take about 5 minutes, depending on your processor.
-% Can be stopped at any time by pressing the STOP button.
+% Should stopped at the desired time by pressing the STOP button and waiting for 1 iteration
 % Look inside to see information about the many optional parameters.
 [net, learnStats] = geneticRNN_learn_model_2(mutationPower, populationSize, truncationSize, fitnessFunInputs, policyInitInputs, ...
     'input', inp, ...
     'evalOpts', evalOpts, ...
     'policyInitInputsOptional', policyInitInputsOptional);
 
 %% Run network
-[Z0, Z1, R, X, kin] = geneticRNN_run_model(net(1), 'input', inp);
+[Z0, Z1, R, X, kin] = geneticRNN_run_model(net, 'input', inp);
 
diff --git a/examples/geneticRNN_Example_DNMS.asv → examples/geneticRNN_Example_DNMS.m~ b/examples/geneticRNN_Example_DNMS.asv → examples/geneticRNN_Example_DNMS.m~
@@ -1,16 +1,13 @@
-% hebbRNN_Example_DNMS
+% geneticRNN_Example_DNMS
 %
-% This function illustrates an example of reward-modulated Hebbian learning
+% This function illustrates an example of a simple genetic learning algorithm
 % in a recurrent neural network to complete a delayed nonmatch-to-sample
 % task.
 %
 %
-% Copyright (c) Jonathan A Michaels 2016
+% Copyright (c) Jonathan A Michaels 2018
 % German Primate Center
 % jonathanamichaels AT gmail DOT com
-%
-% If used in published work please see repository README.md for citation
-% and license information: https://github.com/JonathanAMichaels/hebbRNN
 
 
 clear
@@ -57,31 +54,30 @@ B = size(targ{1},1); % Outputs
 I = size(inp{1},1); % Inputs
 p = 1; % Sparsity
 g = 1.2; % Spectral scaling
-dt = 1; % Time step
-tau = 10; % Time constant
-
-%% Initialize learning parameters
-systemNoise = 0.0; % Network noise level
-evalOpts = [2 1]; % Plotting level and frequency of evaluation
+dt = 10; % Time step
+tau = 50; % Time constant
 
-policyInitInputs = {N, B, I, p, g, dt, tau, systemNoise, true, 'tanh', 0.1};
+%% Policy initialization parameters
+policyInitInputs = {N, B, I, p, g, dt, tau};
+policyInitInputsOptional = {'feedback', false};
 
-mutationPower = 5e-2;
-populationSize = 3000;
-truncationSize = 400;
-fitnessFun = @geneticRNN_fitness;
-fitnessFunInputs = targ;
+%% Initialize learning parameters
+mutationPower = 1e-2; % Standard deviation of normally distributed noise to add 
+populationSize = 1000; % Number of individuals in each generation
+truncationSize = 10; % Number of individuals to save for next generation
+fitnessFunInputs = targ; % Target data for fitness calculation
 policyInitFun = @geneticRNN_create_model;
+evalOpts = [2 1]; % Plotting level and frequency of evaluation
 
 %% Train network
 % This step should take about 5 minutes, depending on your processor.
-% Can be stopped at any time by pressing the STOP button.
+% Should stopped at the desired time by pressing the STOP button and waiting for 1 iteration
 % Look inside to see information about the many optional parameters.
 [net, learnStats] = geneticRNN_learn_model_2(mutationPower, populationSize, truncationSize, fitnessFunInputs, policyInitInputs, ...
     'input', inp, ...
     'evalOpts', evalOpts, ...
     'policyInitInputsOptional', policyInitInputsOptional);
 
 %% Run network
-[Z0, Z1, R, dR, X, kin] = geneticRNN_run_model(net(1), 'input', inp);
+[Z0, Z1, R, X, kin] = geneticRNN_run_model(net, 'input', inp);