Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implements batchnorm and layernorm for MLPs #64

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nupic/embodied/multitask/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
#
# ------------------------------------------------------------------------------
from .custom_dendritic_mlp import CustomDendriticMLP
from .gaussian_mlp_module import GaussianTwoHeadedMLPModule, GaussianTwoHeadedDendriticMLPModule
from .gaussian_mlp_module import GaussianTwoHeadedMLPModule, GaussianTwoHeadedDendriticMLPModule, MLPModule
276 changes: 256 additions & 20 deletions nupic/embodied/multitask/modules/gaussian_mlp_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,19 @@
#
# ------------------------------------------------------------------------------
import abc
import copy

import torch
from torch import nn
from torch.distributions import Normal
from torch.distributions.independent import Independent
import torch.nn.functional as F

from garage.torch.distributions import TanhNormal
from garage.torch.modules import MultiHeadedMLPModule

from nupic.embodied.multitask.modules import CustomDendriticMLP
from garage.torch import NonLinearity

class GaussianMLPBaseModule(nn.Module):
"""Base of GaussianMLPModel. Adapted from:
Expand Down Expand Up @@ -88,14 +91,14 @@ def __init__(
hidden_sizes,
hidden_nonlinearity,
output_nonlinearity,
min_std,
max_std,
normal_distribution_cls,
init_std,
min_std,
max_std,
normal_distribution_cls,
init_std,
std_parameterization,
hidden_w_init,
hidden_b_init,
output_w_init,
output_w_init,
output_b_init,
layer_normalization,
learn_std
Expand All @@ -114,7 +117,7 @@ def __init__(

self._min_std = torch.Tensor([min_std])
self.register_buffer("min_std", self._min_std)

self._max_std = torch.Tensor([max_std])
self.register_buffer("max_std", self._max_std)

Expand Down Expand Up @@ -233,12 +236,12 @@ def __init__(
hidden_sizes,
hidden_nonlinearity,
output_nonlinearity,
min_std,
max_std,
normal_distribution_cls,
init_std,
min_std,
max_std,
normal_distribution_cls,
init_std,
std_parameterization,
hidden_w_init,
hidden_w_init,
hidden_b_init,
output_w_init,
output_b_init,
Expand All @@ -265,7 +268,7 @@ def __init__(
learn_std=learn_std,
)

self.mean_log_std = MultiHeadedMLPModule(
self.mean_log_std = CustomMultiHeadedMLPModule(
n_heads=2,
input_dim=input_dim,
output_dims=output_dim,
Expand Down Expand Up @@ -313,11 +316,11 @@ def __init__(
preprocess_module_type,
preprocess_output_dim,
preprocess_kw_percent_on,
min_std,
max_std,
normal_distribution_cls,
init_std,
std_parameterization,
min_std,
max_std,
normal_distribution_cls,
init_std,
std_parameterization,
layer_normalization,
learn_std
):
Expand Down Expand Up @@ -354,7 +357,7 @@ def __init__(
dendrite_weight_sparsity=dendrite_weight_sparsity,
dendrite_init=dendrite_init,
dendritic_layer_class=dendritic_layer_class,
output_nonlinearity=output_nonlinearity,
output_nonlinearity=output_nonlinearity,
preprocess_module_type=preprocess_module_type,
preprocess_output_dim=preprocess_output_dim,
preprocess_kw_percent_on=preprocess_kw_percent_on,
Expand All @@ -363,9 +366,242 @@ def __init__(
def get_mean_log_std(self, *inputs):
"""Get mean and std of Gaussian distribution given inputs.
Args:

Returns:
torch.Tensor: The mean of Gaussian distribution.
torch.Tensor: The variance of Gaussian distribution.
"""
return self.mean_log_std(*inputs)
return self.mean_log_std(*inputs)


class CustomMultiHeadedMLPModule(nn.Module):
"""MultiHeadedMLPModule Model.

A PyTorch module composed only of a multi-layer perceptron (MLP) with
multiple parallel output layers which maps real-valued inputs to
real-valued outputs. The length of outputs is n_heads and shape of each
output element is depend on each output dimension

Args:
n_heads (int): Number of different output layers
input_dim (int): Dimension of the network input.
output_dims (int or list or tuple): Dimension of the network output.
hidden_sizes (list[int]): Output dimension of dense layer(s).
For example, (32, 32) means this MLP consists of two
hidden layers, each with 32 hidden units.
hidden_nonlinearity (callable or torch.nn.Module or list or tuple):
Activation function for intermediate dense layer(s).
It should return a torch.Tensor. Set it to None to maintain a
linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
torch.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
torch.Tensor.
output_nonlinearities (callable or torch.nn.Module or list or tuple):
Activation function for output dense layer. It should return a
torch.Tensor. Set it to None to maintain a linear activation.
Size of the parameter should be 1 or equal to n_head
output_w_inits (callable or list or tuple): Initializer function for
the weight of output dense layer(s). The function should return a
torch.Tensor. Size of the parameter should be 1 or equal to n_head
output_b_inits (callable or list or tuple): Initializer function for
the bias of output dense layer(s). The function should return a
torch.Tensor. Size of the parameter should be 1 or equal to n_head
layer_normalization (bool): Bool for using layer normalization or not.

"""

def __init__(self,
n_heads,
input_dim,
output_dims,
hidden_sizes,
hidden_nonlinearity=torch.relu,
hidden_w_init=nn.init.xavier_normal_,
hidden_b_init=nn.init.zeros_,
output_nonlinearities=None,
output_w_inits=nn.init.xavier_normal_,
output_b_inits=nn.init.zeros_,
hidden_layer_normalization=None,
hidden_layer_normalization_affine=False,
):
super().__init__()

self._layers = nn.ModuleList()

output_dims = self._check_parameter_for_output_layer(
"output_dims", output_dims, n_heads)
output_w_inits = self._check_parameter_for_output_layer(
"output_w_inits", output_w_inits, n_heads)
output_b_inits = self._check_parameter_for_output_layer(
"output_b_inits", output_b_inits, n_heads)
output_nonlinearities = self._check_parameter_for_output_layer(
"output_nonlinearities", output_nonlinearities, n_heads)

self._layers = nn.ModuleList()

prev_size = input_dim
for size in hidden_sizes:
hidden_layers = nn.Sequential()
linear_layer = nn.Linear(prev_size, size)
hidden_w_init(linear_layer.weight)
hidden_b_init(linear_layer.bias)
hidden_layers.add_module("linear", linear_layer)

if hidden_layer_normalization == "batch_normalization":
hidden_layers.add_module(
"batch_normalization",
nn.BatchNorm1d(size, affine=hidden_layer_normalization_affine)
)
elif hidden_layer_normalization == "layer_normalization":
hidden_layers.add_module(
"layer_normalization",
nn.LayerNorm(
size, elementwise_affine=hidden_layer_normalization_affine
)
)

if hidden_nonlinearity:
hidden_layers.add_module("non_linearity",
NonLinearity(hidden_nonlinearity))

self._layers.append(hidden_layers)
prev_size = size

print("Running network with batch normalization")
print(hidden_layers)
self._output_layers = nn.ModuleList()
for i in range(n_heads):
output_layer = nn.Sequential()
linear_layer = nn.Linear(prev_size, output_dims[i])
output_w_inits[i](linear_layer.weight)
output_b_inits[i](linear_layer.bias)
output_layer.add_module("linear", linear_layer)

if output_nonlinearities[i]:
output_layer.add_module("non_linearity",
NonLinearity(output_nonlinearities[i]))

self._output_layers.append(output_layer)

@classmethod
def _check_parameter_for_output_layer(cls, var_name, var, n_heads):
"""Check input parameters for output layer are valid.

Args:
var_name (str): variable name
var (any): variable to be checked
n_heads (int): number of head

Returns:
list: list of variables (length of n_heads)

Raises:
ValueError: if the variable is a list but length of the variable
is not equal to n_heads

"""
if isinstance(var, (list, tuple)):
if len(var) == 1:
return list(var) * n_heads
if len(var) == n_heads:
return var
msg = ("{} should be either an integer or a collection of length "
"n_heads ({}), but {} provided.")
raise ValueError(msg.format(var_name, n_heads, var))
return [copy.deepcopy(var) for _ in range(n_heads)]

def forward(self, input_val):
"""Forward method.

Args:
input_val (torch.Tensor): Input values with (N, *, input_dim)
shape.

Returns:
List[torch.Tensor]: Output values

"""
x = input_val
for layer in self._layers:
x = layer(x)

return [output_layer(x) for output_layer in self._output_layers]


class MLPModule(CustomMultiHeadedMLPModule):
"""MLP Model.

A Pytorch module composed only of a multi-layer perceptron (MLP), which
maps real-valued inputs to real-valued outputs.

Args:
input_dim (int) : Dimension of the network input.
output_dim (int): Dimension of the network output.
hidden_sizes (list[int]): Output dimension of dense layer(s).
For example, (32, 32) means this MLP consists of two
hidden layers, each with 32 hidden units.
hidden_nonlinearity (callable or torch.nn.Module): Activation function
for intermediate dense layer(s). It should return a torch.Tensor.
Set it to None to maintain a linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
torch.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
torch.Tensor.
output_nonlinearity (callable or torch.nn.Module): Activation function
for output dense layer. It should return a torch.Tensor.
Set it to None to maintain a linear activation.
output_w_init (callable): Initializer function for the weight
of output dense layer(s). The function should return a
torch.Tensor.
output_b_init (callable): Initializer function for the bias
of output dense layer(s). The function should return a
torch.Tensor.
layer_normalization (bool): Bool for using layer normalization or not.

"""

def __init__(self,
input_dim,
output_dim,
hidden_sizes,
hidden_nonlinearity=F.relu,
hidden_w_init=nn.init.xavier_normal_,
hidden_b_init=nn.init.zeros_,
output_nonlinearity=None,
output_w_init=nn.init.xavier_normal_,
output_b_init=nn.init.zeros_,
layer_normalization=False):
super().__init__(1, input_dim, output_dim, hidden_sizes,
hidden_nonlinearity, hidden_w_init, hidden_b_init,
output_nonlinearity, output_w_init, output_b_init,
layer_normalization)

self._output_dim = output_dim

def forward(self, input_value):
"""Forward method.

Args:
input_value (torch.Tensor): Input values with (N, *, input_dim)
shape.

Returns:
torch.Tensor: Output value

"""
return super().forward(input_value)[0]

@property
def output_dim(self):
"""Return output dimension of network.

Returns:
int: Output dimension of network.

"""
return self._output_dim
2 changes: 1 addition & 1 deletion nupic/embodied/multitask/policies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
# http://numenta.org/licenses/
#
# ------------------------------------------------------------------------------
from .gaussian_mlp_policy import GaussianMLPPolicy, GaussianDendriticMLPPolicy
from .gaussian_mlp_policy import GaussianMLPPolicy, GaussianDendriticMLPPolicy, ContinuousMLPQFunction
Loading