diff --git a/docs/source/experimental.rst b/docs/source/experimental.rst index 86b4dc60f88..e2a11723ebb 100644 --- a/docs/source/experimental.rst +++ b/docs/source/experimental.rst @@ -88,6 +88,7 @@ Experimental features .. autofunction:: oneflow.experimental.nn.Linear .. autofunction:: oneflow.experimental.nn.CrossEntropyLoss .. autofunction:: oneflow.experimental.nn.NLLLoss +.. autofunction:: oneflow.experimental.nn.MSELoss .. autofunction:: oneflow.experimental.nn.MarginRankingLoss .. autofunction:: oneflow.experimental.masked_fill .. autofunction:: oneflow.experimental.Tensor.masked_fill diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index 3c59f0f9ee9..2a92f8c6ba3 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -16,8 +16,10 @@ from typing import Optional import oneflow as flow +from oneflow.python.framework.tensor import Tensor from oneflow.python.oneflow_export import oneflow_export, experimental_api from oneflow.python.nn.module import Module +from oneflow.python.nn.modules.math_ops import Subtract, Square, Sum, Mean @oneflow_export("nn.CrossEntropyLoss") @@ -296,6 +298,123 @@ def forward(self, input, target): return res.mean() +@oneflow_export("nn.MSELoss") +@experimental_api +class MSELoss(Module): + r"""The interface is consistent with PyTorch. + The documentation is referenced from: + https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html?highlight=mseloss#torch.nn.MSELoss + + Creates a criterion that measures the mean squared error (squared L2 norm) between + each element in the input :math:`x` and target :math:`y`. + + The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: + + .. math:: + \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad + l_n = \left( x_n - y_n \right)^2, + + where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` + (default ``'mean'``), then: + + .. math:: + \ell(x, y) = + \begin{cases} + \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\ + \operatorname{sum}(L), & \text{if reduction} = \text{`sum'.} + \end{cases} + + :math:`x` and :math:`y` are tensors of arbitrary shapes with a total + of :math:`n` elements each. + + The mean operation still operates over all the elements, and divides by :math:`n`. + + The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``. + + Args: + size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, + the losses are averaged over each loss element in the batch. Note that for + some losses, there are multiple elements per sample. If the field :attr:`size_average` + is set to ``False``, the losses are instead summed for each minibatch. Ignored + when :attr:`reduce` is ``False``. Default: ``True`` + reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the + losses are averaged or summed over observations for each minibatch depending + on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per + batch element instead and ignores :attr:`size_average`. Default: ``True`` + reduction (string, optional): Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, + ``'mean'``: the sum of the output will be divided by the number of + elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` + and :attr:`reduce` are in the process of being deprecated, and in the meantime, + specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` + + Shape: + - Input: :math:`(N, *)` where :math:`*` means, any number of additional + dimensions + - Target: :math:`(N, *)`, same shape as the input + + For example: + + .. code-block:: python + + >>> import oneflow.experimental as flow + >>> import numpy as np + >>> flow.enable_eager_execution() + + >>> input = flow.Tensor( + ... [[-0.02557137, 0.03101675, 1.37493674], + ... [0.25599439, -1.08372561, -0.21006816]], dtype=flow.float32) + >>> target = flow.Tensor( + ... [[-1.53105064, -0.68137555, 0.5931354], + ... [-0.49158347, 0.93673637, 0.1324141]], dtype=flow.float32) + >>> m = flow.nn.MSELoss(reduction="none") + >>> out = m(input, target) + >>> print(out.numpy()) + [[2.266468 0.50750285 0.61121327] + [0.55887264 4.082267 0.1172941 ]] + >>> m = flow.nn.MSELoss(reduction="mean") + >>> out = m(input, target) + >>> print(out.numpy()) + [1.3572696] + >>> m = flow.nn.MSELoss(reduction="sum") + >>> out = m(input, target) + >>> print(out.numpy()) + [8.143618] + + """ + + def __init__( + self, reduction: str = "mean", size_average: bool = True, reduce: bool = True + ) -> None: + super().__init__() + if size_average is False: + raise ValueError("Argument size_average is not supported yet") + if reduce is False: + raise ValueError("Argument reduce is not supported yet") + assert reduction in [ + "sum", + "none", + "mean", + None, + ], "Argument reduction only support 'sum'/'mean'/'none'/None for now!" + + self.reduction = reduction + self.square_op = Square() + self.subtract_op = Subtract() + self.sum_op = Sum() + self.mean_op = Mean() + + def forward(self, input: Tensor, target: Tensor) -> Tensor: + mean_squared_difference = self.square_op(self.subtract_op(input, target)) + if self.reduction == "mean": + return self.mean_op(mean_squared_difference) + elif self.reduction == "sum": + return self.sum_op(mean_squared_difference) + else: + # Do no reduction + return mean_squared_difference + + @oneflow_export("nn.MarginRankingLoss") @experimental_api class MarginRankingLoss(Module): diff --git a/oneflow/python/ops/nn_ops.py b/oneflow/python/ops/nn_ops.py index 6680b371455..b189ee85c68 100644 --- a/oneflow/python/ops/nn_ops.py +++ b/oneflow/python/ops/nn_ops.py @@ -3917,6 +3917,7 @@ def bce_with_logits_loss_job(input: tp.Numpy.Placeholder(shape=(2, 3)), @oneflow_export("nn.MSELoss") +@stable_api def mse_loss( input: oneflow._oneflow_internal.BlobDesc, target: oneflow._oneflow_internal.BlobDesc, diff --git a/oneflow/python/test/modules/test_mseloss.py b/oneflow/python/test/modules/test_mseloss.py new file mode 100644 index 00000000000..3f21e55b411 --- /dev/null +++ b/oneflow/python/test/modules/test_mseloss.py @@ -0,0 +1,93 @@ +""" +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import unittest +from collections import OrderedDict + +import numpy as np + +import oneflow.experimental as flow +from test_util import GenArgList + + +def _np_mseloss(np_input, np_target): + np_mse = np.square(np_target - np_input) + np_mse_mean = np.mean(np_mse) + np_mse_sum = np.sum(np_mse) + + return { + "none": np_mse, + "mean": np_mse_mean, + "sum": np_mse_sum, + } + + +def _np_mseloss_grad(np_input, np_target): + elem_cnt = np_input.size + np_mse_grad_sum = -2 * (np_target - np_input) + np_mse_grad_mean = np_mse_grad_sum / elem_cnt + + return { + "none": np_mse_grad_sum, + "mean": np_mse_grad_mean, + "sum": np_mse_grad_sum, + } + + +def _test_mseloss_impl(test_case, device, shape, reduction): + x = np.random.randn(*shape) + y = np.random.randn(*shape) + input = flow.Tensor( + x, dtype=flow.float32, requires_grad=True, device=flow.device(device) + ) + target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) + + loss = flow.nn.MSELoss(reduction=reduction) + loss = loss.to(device) + of_out = loss(input, target) + np_out = _np_mseloss(x, y)[reduction] + test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5)) + + of_out = of_out.sum() + of_out.backward() + np_grad = _np_mseloss_grad(x, y)[reduction] + test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5)) + + +@unittest.skipIf( + not flow.unittest.env.eager_execution_enabled(), + ".numpy() doesn't work in lazy mode", +) +class TestMSELossModule(flow.unittest.TestCase): + def test_mseloss(test_case): + arg_dict = OrderedDict() + arg_dict["test_fun"] = [ + _test_mseloss_impl, + ] + arg_dict["device"] = ["cpu", "cuda"] + arg_dict["shape"] = [ + (3, 5), + (10, 9, 21), + (14, 22, 9, 21), + (3, 2, 4, 16, 5), + (1,), + ] + arg_dict["reduction"] = ["none", "mean", "sum"] + for arg in GenArgList(arg_dict): + arg[0](test_case, *arg[1:]) + + +if __name__ == "__main__": + unittest.main()