Skip to content

Commit

Permalink
Refactor mygrad.nnet.layers
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmascharka committed Feb 6, 2022
1 parent c83a1a2 commit 4d3f0d0
Show file tree
Hide file tree
Showing 12 changed files with 232 additions and 172 deletions.
13 changes: 0 additions & 13 deletions src/mygrad/nnet/layers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +0,0 @@
from .batchnorm import batchnorm
from .conv import conv_nd
from .pooling import max_pool

__all__ = ["conv_nd", "max_pool", "batchnorm"]


try:
from .gru import gru

__all__ += ["gru"]
except ImportError: # pragma: no cover
pass
197 changes: 42 additions & 155 deletions src/mygrad/nnet/layers/batchnorm.py
Original file line number Diff line number Diff line change
@@ -1,173 +1,60 @@
from typing import Optional, Tuple, Union

import numpy as np

from mygrad import Tensor
from mygrad.operation_base import Operation
from mygrad.typing import ArrayLike

__all__ = ["batchnorm"]
from mygrad.tensor_creation.funcs import ones, zeros
from mygrad.nnet.layers.operations.batchnorm import batchnorm as batchnorm_op


# TODO: Remove affine parameters from Operation
class BatchNorm(Operation):
"""
Attributes
----------
mean : numpy.ndarray
var : numpy.ndarray
class BatchNorm:
""" A batch normalization layer.
Notes
-----
`mean` and `var` are bound as instance-attributes upon
calling the batch-norm instance.
This class will perform an n-dimensional batch normalization operation on an
(N, D, ...)-shaped tensor scaled by γ of shape (D, ...) and shifted by β of shape (D, ...).
"""

def __call__(self, x, gamma, beta, *, eps):
"""
y(x) = (x - E[x]) / sqrt(Var[x} + eps)
batchnorm(x) = gamma * y(x) + beta
def __init__(self, input_channels: int, momentum: float = 0.1):
""" Initialize a batch normalization layer.
Parameters
----------
x : mygrad.Tensor
gamma : Optional[mygrad.Tensor]
beta : Optional[mygrad.Tensor]
eps : Real
A small non-negative number.
Returns
-------
numpy.ndarray
input_channels : int
The number of channels of the data to be batch-normalized.
momentum : float, optional (default=0.1)
The momentum value used to maintain moving averages.
"""
normed_dims = tuple(i for i in range(x.ndim) if i != 1)
keepdims_shape = tuple(1 if n != 1 else d for n, d in enumerate(x.shape))

self.variables = tuple(i for i in (x, gamma, beta))

if gamma.size == 0:
gamma = None
if beta.size == 0:
beta = None
self.gamma = ones((1, input_channels), dtype=np.float32)
self.beta = zeros((1, input_channels), dtype=np.float32)
self.moving_mean = np.zeros((1, input_channels), dtype=np.float32)
self.moving_variance = np.zeros((1, input_channels), dtype=np.float32)
self.momentum = momentum
self.input_channels = input_channels

self.gamma = gamma
self.beta = beta
def __call__(self, x: Tensor, test: bool = False) -> Tensor:
""" Perform the forward-pass of n-dimensional batch normalization over axis 1 on `x`.
x = x.data
self.x_norm = None # required for backprop through gamma
self.mean = x.mean(axis=normed_dims)
self.var = x.var(axis=normed_dims)

if eps:
self.var += eps

y = x - self.mean.reshape(keepdims_shape)
self._std = np.sqrt(self.var).reshape(keepdims_shape) # sqrt(var + eps)
y /= self._std
self.x_norm = y
# optional affine transformation
if gamma is not None:
gamma = gamma.data
# must copy `y` to prevent mutation of `self.x_norm`
y = y * gamma.reshape(keepdims_shape)

if beta is not None:
beta = beta.data
y = y + beta.reshape(keepdims_shape)
return y

def backward_var(self, grad, index, **kwargs):
x = self.variables[0].data
if index == 0: # backprop through x
normed_dims = tuple(i for i in range(x.ndim) if i != 1)
Parameters
----------
x : Union[numpy.ndarray, mygrad.Tensor], shape=(N, D, ...)
The data to normalize.
test : boolean, optional (default=False)
Determines whether the layer is being used at training time. The mean and variance
will be computed for the batch during training, while averaged batch statistics will
be used at test time.
"""
if test:
# use the averaged batch statistics from training rather than computing them on a test batch
keepdims_shape = tuple(1 if n != 1 else d for n, d in enumerate(x.shape))
N = x.size / x.shape[1]
x = x - self.moving_mean.reshape(keepdims_shape)
x /= np.sqrt(self.moving_variance.reshape(keepdims_shape) + 1e-08)
return self.gamma * x + self.beta

# all sums carried over non-channel dims
# (1/sqrt(var + eps)) * [dL - dL.mean() - (1/N)*x_norm*(x_norm @ dL)]
grad_ = grad - np.mean(grad, axis=normed_dims, keepdims=True)
x_norm = batchnorm_op(x, gamma=self.gamma, beta=self.beta, eps=1e-08)

rterm = self.x_norm * np.reshape(
np.einsum(grad, range(x.ndim), self.x_norm, range(x.ndim), [1]),
keepdims_shape,
)
rterm /= N
grad_ -= rterm
grad_ /= self._std
if (
self.gamma is not None
): # backprop through optional affine transformation
gamma = self.gamma.data
grad_ *= gamma.reshape(keepdims_shape)
return grad_
batch_mean = x_norm.creator.mean
batch_variance = x_norm.creator.var

elif index == 1 and self.gamma is not None: # backprop through gamma
return np.einsum(grad, range(x.ndim), self.x_norm, range(x.ndim), [1])

elif (index == 1 and self.gamma is None) or index == 2:
normed_dims = tuple(i for i in range(x.ndim) if i != 1)
return grad.sum(axis=normed_dims)
else: # pragma: no cover
raise IndexError


def batchnorm(
x: ArrayLike,
*,
gamma: Optional[ArrayLike] = None,
beta: Optional[ArrayLike] = None,
eps: float,
constant: Optional[bool] = None
) -> Tensor:
"""
Performs batch normalization on ``x``::
y(x) = (x - E[x]) / sqrt(Var[x] + eps)
batchnorm(x) = gamma * y(x) + beta
Where :math:`E[x]` and :math:`Var[x]` represent the mean and variance, respectively,
over axis-1 of ``x``. The subsequent affine transformation on ``y``
is optional.
Parameters
----------
x : array_like, shape=(N, C, ...)
The batch to be normalized within each entry of C
gamma : Optional[array_like], shape=(C,)
Optional per-channel scaling factors to be applied after the
normalization step.
beta : Optional[array_like], shape=(C,)
Optional per-channel scaling bias factors to be applied after the
normalization step.
eps : Real
A small non-negative number.
constant : bool, optional (default=False)
If True, the resulting Tensor is a constant.
Returns
-------
mygrad.Tensor
The batch-normalized data.
Examples
--------
>>> import mygrad as mg
>>> from mygrad.nnet import batchnorm
>>> x = mg.Tensor([1., 4., 1.]).reshape(3, 1)
>>> batchnorm(x, eps=0)
Tensor([[-0.70710678],
[ 1.41421356],
[-0.70710678]])
"""
# pass gamma and beta as empty arrays if they are not supplied
if gamma is None:
gamma = np.array([])
if beta is None:
beta = np.array([])
return Tensor._op(
BatchNorm, x, gamma, beta, op_kwargs=dict(eps=eps), constant=constant
)
self.moving_mean *= 1 - self.momentum
self.moving_mean += self.momentum * batch_mean
self.moving_variance *= 1 - self.momentum
self.moving_variance += self.momentum * batch_variance
return x_norm
13 changes: 13 additions & 0 deletions src/mygrad/nnet/layers/operations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from .batchnorm import batchnorm
from .conv import conv_nd
from .pooling import max_pool

__all__ = ["conv_nd", "max_pool", "batchnorm"]


try:
from .gru import gru

__all__ += ["gru"]
except ImportError: # pragma: no cover
pass
Loading

0 comments on commit 4d3f0d0

Please sign in to comment.