-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c83a1a2
commit 4d3f0d0
Showing
12 changed files
with
232 additions
and
172 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +0,0 @@ | ||
from .batchnorm import batchnorm | ||
from .conv import conv_nd | ||
from .pooling import max_pool | ||
|
||
__all__ = ["conv_nd", "max_pool", "batchnorm"] | ||
|
||
|
||
try: | ||
from .gru import gru | ||
|
||
__all__ += ["gru"] | ||
except ImportError: # pragma: no cover | ||
pass | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,173 +1,60 @@ | ||
from typing import Optional, Tuple, Union | ||
|
||
import numpy as np | ||
|
||
from mygrad import Tensor | ||
from mygrad.operation_base import Operation | ||
from mygrad.typing import ArrayLike | ||
|
||
__all__ = ["batchnorm"] | ||
from mygrad.tensor_creation.funcs import ones, zeros | ||
from mygrad.nnet.layers.operations.batchnorm import batchnorm as batchnorm_op | ||
|
||
|
||
# TODO: Remove affine parameters from Operation | ||
class BatchNorm(Operation): | ||
""" | ||
Attributes | ||
---------- | ||
mean : numpy.ndarray | ||
var : numpy.ndarray | ||
class BatchNorm: | ||
""" A batch normalization layer. | ||
Notes | ||
----- | ||
`mean` and `var` are bound as instance-attributes upon | ||
calling the batch-norm instance. | ||
This class will perform an n-dimensional batch normalization operation on an | ||
(N, D, ...)-shaped tensor scaled by γ of shape (D, ...) and shifted by β of shape (D, ...). | ||
""" | ||
|
||
def __call__(self, x, gamma, beta, *, eps): | ||
""" | ||
y(x) = (x - E[x]) / sqrt(Var[x} + eps) | ||
batchnorm(x) = gamma * y(x) + beta | ||
def __init__(self, input_channels: int, momentum: float = 0.1): | ||
""" Initialize a batch normalization layer. | ||
Parameters | ||
---------- | ||
x : mygrad.Tensor | ||
gamma : Optional[mygrad.Tensor] | ||
beta : Optional[mygrad.Tensor] | ||
eps : Real | ||
A small non-negative number. | ||
Returns | ||
------- | ||
numpy.ndarray | ||
input_channels : int | ||
The number of channels of the data to be batch-normalized. | ||
momentum : float, optional (default=0.1) | ||
The momentum value used to maintain moving averages. | ||
""" | ||
normed_dims = tuple(i for i in range(x.ndim) if i != 1) | ||
keepdims_shape = tuple(1 if n != 1 else d for n, d in enumerate(x.shape)) | ||
|
||
self.variables = tuple(i for i in (x, gamma, beta)) | ||
|
||
if gamma.size == 0: | ||
gamma = None | ||
if beta.size == 0: | ||
beta = None | ||
self.gamma = ones((1, input_channels), dtype=np.float32) | ||
self.beta = zeros((1, input_channels), dtype=np.float32) | ||
self.moving_mean = np.zeros((1, input_channels), dtype=np.float32) | ||
self.moving_variance = np.zeros((1, input_channels), dtype=np.float32) | ||
self.momentum = momentum | ||
self.input_channels = input_channels | ||
|
||
self.gamma = gamma | ||
self.beta = beta | ||
def __call__(self, x: Tensor, test: bool = False) -> Tensor: | ||
""" Perform the forward-pass of n-dimensional batch normalization over axis 1 on `x`. | ||
x = x.data | ||
self.x_norm = None # required for backprop through gamma | ||
self.mean = x.mean(axis=normed_dims) | ||
self.var = x.var(axis=normed_dims) | ||
|
||
if eps: | ||
self.var += eps | ||
|
||
y = x - self.mean.reshape(keepdims_shape) | ||
self._std = np.sqrt(self.var).reshape(keepdims_shape) # sqrt(var + eps) | ||
y /= self._std | ||
self.x_norm = y | ||
# optional affine transformation | ||
if gamma is not None: | ||
gamma = gamma.data | ||
# must copy `y` to prevent mutation of `self.x_norm` | ||
y = y * gamma.reshape(keepdims_shape) | ||
|
||
if beta is not None: | ||
beta = beta.data | ||
y = y + beta.reshape(keepdims_shape) | ||
return y | ||
|
||
def backward_var(self, grad, index, **kwargs): | ||
x = self.variables[0].data | ||
if index == 0: # backprop through x | ||
normed_dims = tuple(i for i in range(x.ndim) if i != 1) | ||
Parameters | ||
---------- | ||
x : Union[numpy.ndarray, mygrad.Tensor], shape=(N, D, ...) | ||
The data to normalize. | ||
test : boolean, optional (default=False) | ||
Determines whether the layer is being used at training time. The mean and variance | ||
will be computed for the batch during training, while averaged batch statistics will | ||
be used at test time. | ||
""" | ||
if test: | ||
# use the averaged batch statistics from training rather than computing them on a test batch | ||
keepdims_shape = tuple(1 if n != 1 else d for n, d in enumerate(x.shape)) | ||
N = x.size / x.shape[1] | ||
x = x - self.moving_mean.reshape(keepdims_shape) | ||
x /= np.sqrt(self.moving_variance.reshape(keepdims_shape) + 1e-08) | ||
return self.gamma * x + self.beta | ||
|
||
# all sums carried over non-channel dims | ||
# (1/sqrt(var + eps)) * [dL - dL.mean() - (1/N)*x_norm*(x_norm @ dL)] | ||
grad_ = grad - np.mean(grad, axis=normed_dims, keepdims=True) | ||
x_norm = batchnorm_op(x, gamma=self.gamma, beta=self.beta, eps=1e-08) | ||
|
||
rterm = self.x_norm * np.reshape( | ||
np.einsum(grad, range(x.ndim), self.x_norm, range(x.ndim), [1]), | ||
keepdims_shape, | ||
) | ||
rterm /= N | ||
grad_ -= rterm | ||
grad_ /= self._std | ||
if ( | ||
self.gamma is not None | ||
): # backprop through optional affine transformation | ||
gamma = self.gamma.data | ||
grad_ *= gamma.reshape(keepdims_shape) | ||
return grad_ | ||
batch_mean = x_norm.creator.mean | ||
batch_variance = x_norm.creator.var | ||
|
||
elif index == 1 and self.gamma is not None: # backprop through gamma | ||
return np.einsum(grad, range(x.ndim), self.x_norm, range(x.ndim), [1]) | ||
|
||
elif (index == 1 and self.gamma is None) or index == 2: | ||
normed_dims = tuple(i for i in range(x.ndim) if i != 1) | ||
return grad.sum(axis=normed_dims) | ||
else: # pragma: no cover | ||
raise IndexError | ||
|
||
|
||
def batchnorm( | ||
x: ArrayLike, | ||
*, | ||
gamma: Optional[ArrayLike] = None, | ||
beta: Optional[ArrayLike] = None, | ||
eps: float, | ||
constant: Optional[bool] = None | ||
) -> Tensor: | ||
""" | ||
Performs batch normalization on ``x``:: | ||
y(x) = (x - E[x]) / sqrt(Var[x] + eps) | ||
batchnorm(x) = gamma * y(x) + beta | ||
Where :math:`E[x]` and :math:`Var[x]` represent the mean and variance, respectively, | ||
over axis-1 of ``x``. The subsequent affine transformation on ``y`` | ||
is optional. | ||
Parameters | ||
---------- | ||
x : array_like, shape=(N, C, ...) | ||
The batch to be normalized within each entry of C | ||
gamma : Optional[array_like], shape=(C,) | ||
Optional per-channel scaling factors to be applied after the | ||
normalization step. | ||
beta : Optional[array_like], shape=(C,) | ||
Optional per-channel scaling bias factors to be applied after the | ||
normalization step. | ||
eps : Real | ||
A small non-negative number. | ||
constant : bool, optional (default=False) | ||
If True, the resulting Tensor is a constant. | ||
Returns | ||
------- | ||
mygrad.Tensor | ||
The batch-normalized data. | ||
Examples | ||
-------- | ||
>>> import mygrad as mg | ||
>>> from mygrad.nnet import batchnorm | ||
>>> x = mg.Tensor([1., 4., 1.]).reshape(3, 1) | ||
>>> batchnorm(x, eps=0) | ||
Tensor([[-0.70710678], | ||
[ 1.41421356], | ||
[-0.70710678]]) | ||
""" | ||
# pass gamma and beta as empty arrays if they are not supplied | ||
if gamma is None: | ||
gamma = np.array([]) | ||
if beta is None: | ||
beta = np.array([]) | ||
return Tensor._op( | ||
BatchNorm, x, gamma, beta, op_kwargs=dict(eps=eps), constant=constant | ||
) | ||
self.moving_mean *= 1 - self.momentum | ||
self.moving_mean += self.momentum * batch_mean | ||
self.moving_variance *= 1 - self.momentum | ||
self.moving_variance += self.momentum * batch_variance | ||
return x_norm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from .batchnorm import batchnorm | ||
from .conv import conv_nd | ||
from .pooling import max_pool | ||
|
||
__all__ = ["conv_nd", "max_pool", "batchnorm"] | ||
|
||
|
||
try: | ||
from .gru import gru | ||
|
||
__all__ += ["gru"] | ||
except ImportError: # pragma: no cover | ||
pass |
Oops, something went wrong.