From 3a328371981d91210f229bf41428092869fb1d56 Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Mon, 7 Jun 2021 14:23:11 +0800 Subject: [PATCH 1/7] add mseloss module --- docs/source/experimental.rst | 1 + oneflow/python/nn/modules/loss.py | 96 +++++++++++++++++++++ oneflow/python/ops/nn_ops.py | 1 + oneflow/python/test/modules/test_mseloss.py | 67 ++++++++++++++ 4 files changed, 165 insertions(+) create mode 100644 oneflow/python/test/modules/test_mseloss.py diff --git a/docs/source/experimental.rst b/docs/source/experimental.rst index 3383d8c5eef..fbcde93007a 100644 --- a/docs/source/experimental.rst +++ b/docs/source/experimental.rst @@ -85,6 +85,7 @@ Experimental features .. autofunction:: oneflow.experimental.nn.Linear .. autofunction:: oneflow.experimental.nn.CrossEntropyLoss .. autofunction:: oneflow.experimental.nn.NLLLoss +.. autofunction:: oneflow.experimental.nn.MSELoss .. autofunction:: oneflow.experimental.masked_fill .. autofunction:: oneflow.experimental.Tensor.masked_fill .. autofunction:: oneflow.experimental.sum diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index abdc723ac2c..8095162e9d3 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -18,6 +18,7 @@ import oneflow as flow from oneflow.python.oneflow_export import oneflow_export, experimental_api from oneflow.python.nn.module import Module +from oneflow.python.nn.modules.math_ops import Subtract, Square, Sum, Mean @oneflow_export("nn.CrossEntropyLoss") @@ -296,6 +297,101 @@ def forward(self, input, target): return res.mean() +@oneflow_export("nn.MSELoss") +@experimental_api +class MSELoss(Module): + r"""The interface is consistent with PyTorch. + The documentation is referenced from: + https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html?highlight=mseloss#torch.nn.MSELoss + + Creates a criterion that measures the mean squared error (squared L2 norm) between + each element in the input :math:`x` and target :math:`y`. + + The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: + + .. math:: + \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad + l_n = \left( x_n - y_n \right)^2, + + where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` + (default ``'mean'``), then: + + .. math:: + \ell(x, y) = + \begin{cases} + \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\ + \operatorname{sum}(L), & \text{if reduction} = \text{`sum'.} + \end{cases} + + :math:`x` and :math:`y` are tensors of arbitrary shapes with a total + of :math:`n` elements each. + + The mean operation still operates over all the elements, and divides by :math:`n`. + + The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``. + + Args: + size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, + the losses are averaged over each loss element in the batch. Note that for + some losses, there are multiple elements per sample. If the field :attr:`size_average` + is set to ``False``, the losses are instead summed for each minibatch. Ignored + when :attr:`reduce` is ``False``. Default: ``True`` + reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the + losses are averaged or summed over observations for each minibatch depending + on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per + batch element instead and ignores :attr:`size_average`. Default: ``True`` + reduction (string, optional): Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, + ``'mean'``: the sum of the output will be divided by the number of + elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` + and :attr:`reduce` are in the process of being deprecated, and in the meantime, + specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` + + Shape: + - Input: :math:`(N, *)` where :math:`*` means, any number of additional + dimensions + - Target: :math:`(N, *)`, same shape as the input + + For example: + + .. code-block:: python + + >>> import oneflow.experimental as flow + >>> import numpy as np + >>> flow.enable_eager_execution() + + """ + + def __init__(self, reduction: str = "mean", size_average=True, reduce=True) -> None: + super().__init__() + if size_average is not None and not size_average: + raise ValueError("Argument size_average is not supported yet") + if reduce is not None and not reduce: + raise ValueError("Argument reduce is not supported yet") + assert reduction in [ + "sum", + "none", + "mean", + None, + ], "only 'sum', 'mean' and None supported by now" + + self.reduction = reduction + self.square_op = Square() + self.subtract_op = Subtract() + self.sum_op = Sum() + self.mean_op = Mean() + + def forward(self, input, target): + mean_squared_difference = self.square_op(self.subtract_op(input, target)) + if self.reduction == "mean": + return self.mean_op(mean_squared_difference) + elif self.reduction == "sum": + return self.sum_op(mean_squared_difference) + else: + # Do no reduction + return mean_squared_difference + + if __name__ == "__main__": import doctest diff --git a/oneflow/python/ops/nn_ops.py b/oneflow/python/ops/nn_ops.py index 44a94feaa07..e0c354421d5 100644 --- a/oneflow/python/ops/nn_ops.py +++ b/oneflow/python/ops/nn_ops.py @@ -3917,6 +3917,7 @@ def bce_with_logits_loss_job(input: tp.Numpy.Placeholder(shape=(2, 3)), @oneflow_export("nn.MSELoss") +@stable_api def mse_loss( input: oneflow._oneflow_internal.BlobDesc, target: oneflow._oneflow_internal.BlobDesc, diff --git a/oneflow/python/test/modules/test_mseloss.py b/oneflow/python/test/modules/test_mseloss.py new file mode 100644 index 00000000000..3e2e9ad68c5 --- /dev/null +++ b/oneflow/python/test/modules/test_mseloss.py @@ -0,0 +1,67 @@ +""" +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import unittest +from collections import OrderedDict + +import numpy as np + +import oneflow.experimental as flow +from test_util import GenArgList + + +def np_mseloss(np_input, np_target): + np_mse = np.square(np_target - np_input) + np_mse_mean = np.mean(np_mse) + np_mse_sum = np.sum(np_mse) + + return { + "none": np_mse, + "mean": np_mse_mean, + "sum": np_mse_sum, + } + + +def _test_mseloss(test_case, device, reduction): + x = np.random.randn(2, 3) + y = np.random.randn(2, 3) + input = flow.Tensor(x, dtype=flow.float32, device=flow.device(device)) + target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) + + loss = flow.nn.MSELoss(reduction=reduction) + loss = loss.to(device) + of_out = loss(input, target) + np_out = np_mseloss(x, y)[reduction] + test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) + + +@unittest.skipIf( + not flow.unittest.env.eager_execution_enabled(), + ".numpy() doesn't work in lazy mode", +) +class TestMSELossModule(flow.unittest.TestCase): + def test_mseloss(test_case): + arg_dict = OrderedDict() + arg_dict["test_fun"] = [ + _test_mseloss, + ] + arg_dict["device"] = ["cpu"] + arg_dict["reduction"] = ["none"] + for arg in GenArgList(arg_dict): + arg[0](test_case, *arg[1:]) + + +if __name__ == "__main__": + unittest.main() From a4a4e8880b698160add4466eb2d69fa7ac883dbb Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Mon, 7 Jun 2021 17:37:00 +0800 Subject: [PATCH 2/7] add mseloss testcase --- oneflow/python/nn/modules/loss.py | 21 ++++++ oneflow/python/test/modules/test_mseloss.py | 81 ++++++++++++++++++--- 2 files changed, 92 insertions(+), 10 deletions(-) diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index 8095162e9d3..e0efb72ae46 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -360,6 +360,27 @@ class MSELoss(Module): >>> import numpy as np >>> flow.enable_eager_execution() + >>> input = flow.Tensor( + ... [[-0.02557137, 0.03101675, 1.37493674], + ... [0.25599439, -1.08372561, -0.21006816]], dtype=flow.float32) + >>> #1111 + >>> target = flow.Tensor( + ... [[-1.53105064, -0.68137555, 0.5931354], + ... [-0.49158347, 0.93673637, 0.1324141]], dtype=flow.float32) + >>> m = flow.nn.MSELoss(reduction="none") + >>> out = m(input, target) + >>> print(out.numpy()) + [[2.266468 0.50750285 0.61121327] + [0.55887264 4.082267 0.1172941 ]] + >>> m = flow.nn.MSELoss(reduction="mean") + >>> out = m(input, target) + >>> print(out.numpy()) + [1.3572696] + >>> m = flow.nn.MSELoss(reduction="sum") + >>> out = m(input, target) + >>> print(out.numpy()) + [8.143618] + """ def __init__(self, reduction: str = "mean", size_average=True, reduce=True) -> None: diff --git a/oneflow/python/test/modules/test_mseloss.py b/oneflow/python/test/modules/test_mseloss.py index 3e2e9ad68c5..81677c67d42 100644 --- a/oneflow/python/test/modules/test_mseloss.py +++ b/oneflow/python/test/modules/test_mseloss.py @@ -22,7 +22,7 @@ from test_util import GenArgList -def np_mseloss(np_input, np_target): +def _np_mseloss(np_input, np_target): np_mse = np.square(np_target - np_input) np_mse_mean = np.mean(np_mse) np_mse_sum = np.sum(np_mse) @@ -34,17 +34,76 @@ def np_mseloss(np_input, np_target): } -def _test_mseloss(test_case, device, reduction): - x = np.random.randn(2, 3) - y = np.random.randn(2, 3) - input = flow.Tensor(x, dtype=flow.float32, device=flow.device(device)) +def _np_mseloss_grad(np_input, np_target): + elem_cnt = np_input.size + np_mse_grad_sum = -2 * (np_target - np_input) + np_mse_grad_mean = np_mse_grad_sum / elem_cnt + + return { + "none": np_mse_grad_sum, + "mean": np_mse_grad_mean, + "sum": np_mse_grad_sum, + } + + +def _test_mseloss_backward(test_case, device, reduction): + x = np.random.randn(3, 5) + y = np.random.randn(3, 5) + input = flow.Tensor( + x, dtype=flow.float32, requires_grad=True, device=flow.device(device) + ) + target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) + + loss = flow.nn.MSELoss(reduction=reduction) + loss = loss.to(device) + of_out = loss(input, target) + np_out = _np_mseloss(x, y)[reduction] + test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5)) + + of_out = of_out.sum() + of_out.backward() + np_grad = _np_mseloss_grad(x, y)[reduction] + test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5)) + + +def _test_mseloss_high_dim_input_backward(test_case, device, reduction): + x = np.random.randn(3, 2, 4, 16, 5) + y = np.random.randn(3, 2, 4, 16, 5) + input = flow.Tensor( + x, dtype=flow.float32, requires_grad=True, device=flow.device(device) + ) target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) loss = flow.nn.MSELoss(reduction=reduction) loss = loss.to(device) of_out = loss(input, target) - np_out = np_mseloss(x, y)[reduction] - test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4)) + np_out = _np_mseloss(x, y)[reduction] + test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5)) + + of_out = of_out.sum() + of_out.backward() + np_grad = _np_mseloss_grad(x, y)[reduction] + test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5)) + + +def _test_mseloss_one_elem_input_backward(test_case, device, reduction): + x = np.array([0]).astype(np.float) + y = np.array([-1]).astype(np.float) + input = flow.Tensor( + x, dtype=flow.float32, requires_grad=True, device=flow.device(device) + ) + target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) + + loss = flow.nn.MSELoss(reduction=reduction) + loss = loss.to(device) + of_out = loss(input, target) + np_out = _np_mseloss(x, y)[reduction] + test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5)) + + of_out = of_out.sum() + of_out.backward() + np_grad = _np_mseloss_grad(x, y)[reduction] + test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5)) @unittest.skipIf( @@ -55,10 +114,12 @@ class TestMSELossModule(flow.unittest.TestCase): def test_mseloss(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [ - _test_mseloss, + _test_mseloss_backward, + _test_mseloss_high_dim_input_backward, + _test_mseloss_one_elem_input_backward, ] - arg_dict["device"] = ["cpu"] - arg_dict["reduction"] = ["none"] + arg_dict["device"] = ["cpu", "cuda"] + arg_dict["reduction"] = ["none", "mean", "sum"] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:]) From 7da3e82cca9ff3fbcdfe5eb9348f7c18c1aff242 Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Tue, 8 Jun 2021 09:45:47 +0800 Subject: [PATCH 3/7] delete debug code --- oneflow/python/nn/modules/loss.py | 1 - 1 file changed, 1 deletion(-) diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index e0efb72ae46..ef7ec33dd10 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -363,7 +363,6 @@ class MSELoss(Module): >>> input = flow.Tensor( ... [[-0.02557137, 0.03101675, 1.37493674], ... [0.25599439, -1.08372561, -0.21006816]], dtype=flow.float32) - >>> #1111 >>> target = flow.Tensor( ... [[-1.53105064, -0.68137555, 0.5931354], ... [-0.49158347, 0.93673637, 0.1324141]], dtype=flow.float32) From f5982f0339375c2f8bef2bbdc84f43b6d0a8d69e Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Tue, 8 Jun 2021 10:25:33 +0800 Subject: [PATCH 4/7] add mseloss testcase --- oneflow/python/nn/modules/loss.py | 2 +- oneflow/python/test/modules/test_mseloss.py | 55 ++++----------------- 2 files changed, 11 insertions(+), 46 deletions(-) diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index ef7ec33dd10..af790c7fba1 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -393,7 +393,7 @@ def __init__(self, reduction: str = "mean", size_average=True, reduce=True) -> N "none", "mean", None, - ], "only 'sum', 'mean' and None supported by now" + ], "reduction parameter only support 'sum'/'mean'/'none'/None for now!" self.reduction = reduction self.square_op = Square() diff --git a/oneflow/python/test/modules/test_mseloss.py b/oneflow/python/test/modules/test_mseloss.py index 81677c67d42..8621b181b38 100644 --- a/oneflow/python/test/modules/test_mseloss.py +++ b/oneflow/python/test/modules/test_mseloss.py @@ -46,49 +46,9 @@ def _np_mseloss_grad(np_input, np_target): } -def _test_mseloss_backward(test_case, device, reduction): - x = np.random.randn(3, 5) - y = np.random.randn(3, 5) - input = flow.Tensor( - x, dtype=flow.float32, requires_grad=True, device=flow.device(device) - ) - target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) - - loss = flow.nn.MSELoss(reduction=reduction) - loss = loss.to(device) - of_out = loss(input, target) - np_out = _np_mseloss(x, y)[reduction] - test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5)) - - of_out = of_out.sum() - of_out.backward() - np_grad = _np_mseloss_grad(x, y)[reduction] - test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5)) - - -def _test_mseloss_high_dim_input_backward(test_case, device, reduction): - x = np.random.randn(3, 2, 4, 16, 5) - y = np.random.randn(3, 2, 4, 16, 5) - input = flow.Tensor( - x, dtype=flow.float32, requires_grad=True, device=flow.device(device) - ) - target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device)) - - loss = flow.nn.MSELoss(reduction=reduction) - loss = loss.to(device) - of_out = loss(input, target) - np_out = _np_mseloss(x, y)[reduction] - test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5)) - - of_out = of_out.sum() - of_out.backward() - np_grad = _np_mseloss_grad(x, y)[reduction] - test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5)) - - -def _test_mseloss_one_elem_input_backward(test_case, device, reduction): - x = np.array([0]).astype(np.float) - y = np.array([-1]).astype(np.float) +def _test_mseloss_backward(test_case, device, shape, reduction): + x = np.random.randn(*shape) + y = np.random.randn(*shape) input = flow.Tensor( x, dtype=flow.float32, requires_grad=True, device=flow.device(device) ) @@ -115,10 +75,15 @@ def test_mseloss(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [ _test_mseloss_backward, - _test_mseloss_high_dim_input_backward, - _test_mseloss_one_elem_input_backward, ] arg_dict["device"] = ["cpu", "cuda"] + arg_dict["shape"] = [ + (3, 5), + (10, 9, 21), + (14, 22, 9, 21), + (3, 2, 4, 16, 5), + (1,), + ] arg_dict["reduction"] = ["none", "mean", "sum"] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:]) From e54a270468c52c2544612ec7ff0357630f89c612 Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Tue, 8 Jun 2021 10:36:21 +0800 Subject: [PATCH 5/7] rename mseloss testcase --- oneflow/python/test/modules/test_mseloss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oneflow/python/test/modules/test_mseloss.py b/oneflow/python/test/modules/test_mseloss.py index 8621b181b38..3f21e55b411 100644 --- a/oneflow/python/test/modules/test_mseloss.py +++ b/oneflow/python/test/modules/test_mseloss.py @@ -46,7 +46,7 @@ def _np_mseloss_grad(np_input, np_target): } -def _test_mseloss_backward(test_case, device, shape, reduction): +def _test_mseloss_impl(test_case, device, shape, reduction): x = np.random.randn(*shape) y = np.random.randn(*shape) input = flow.Tensor( @@ -74,7 +74,7 @@ class TestMSELossModule(flow.unittest.TestCase): def test_mseloss(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [ - _test_mseloss_backward, + _test_mseloss_impl, ] arg_dict["device"] = ["cpu", "cuda"] arg_dict["shape"] = [ From 65dcf77baa15d947a11e0c21187ae0bb3270cb26 Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Tue, 8 Jun 2021 13:58:52 +0800 Subject: [PATCH 6/7] fix docstring warning --- oneflow/python/nn/modules/loss.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index af790c7fba1..e2fd600e3ca 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -16,6 +16,7 @@ from typing import Optional import oneflow as flow +from oneflow.python.framework.tensor import Tensor from oneflow.python.oneflow_export import oneflow_export, experimental_api from oneflow.python.nn.module import Module from oneflow.python.nn.modules.math_ops import Subtract, Square, Sum, Mean @@ -302,7 +303,7 @@ def forward(self, input, target): class MSELoss(Module): r"""The interface is consistent with PyTorch. The documentation is referenced from: - https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html?highlight=mseloss#torch.nn.MSELoss + https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html?highlight=mseloss#torch.nn.MSELoss Creates a criterion that measures the mean squared error (squared L2 norm) between each element in the input :math:`x` and target :math:`y`. @@ -382,18 +383,20 @@ class MSELoss(Module): """ - def __init__(self, reduction: str = "mean", size_average=True, reduce=True) -> None: + def __init__( + self, reduction: str = "mean", size_average: bool = True, reduce: bool = True + ) -> None: super().__init__() - if size_average is not None and not size_average: + if size_average is False: raise ValueError("Argument size_average is not supported yet") - if reduce is not None and not reduce: + if reduce is False: raise ValueError("Argument reduce is not supported yet") assert reduction in [ "sum", "none", "mean", None, - ], "reduction parameter only support 'sum'/'mean'/'none'/None for now!" + ], "Argument reduction only support 'sum'/'mean'/'none'/None for now!" self.reduction = reduction self.square_op = Square() @@ -401,7 +404,7 @@ def __init__(self, reduction: str = "mean", size_average=True, reduce=True) -> N self.sum_op = Sum() self.mean_op = Mean() - def forward(self, input, target): + def forward(self, input: Tensor, target: Tensor) -> Tensor: mean_squared_difference = self.square_op(self.subtract_op(input, target)) if self.reduction == "mean": return self.mean_op(mean_squared_difference) From c36e3e164322ad3681c79c026ebecadbb2be6e6a Mon Sep 17 00:00:00 2001 From: ShiYongtao <847615435@qq.com> Date: Tue, 8 Jun 2021 18:50:00 +0800 Subject: [PATCH 7/7] format docstring --- oneflow/python/nn/modules/loss.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py index ca7d54f2d37..2a92f8c6ba3 100644 --- a/oneflow/python/nn/modules/loss.py +++ b/oneflow/python/nn/modules/loss.py @@ -304,24 +304,33 @@ class MSELoss(Module): r"""The interface is consistent with PyTorch. The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html?highlight=mseloss#torch.nn.MSELoss + Creates a criterion that measures the mean squared error (squared L2 norm) between each element in the input :math:`x` and target :math:`y`. + The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: + .. math:: \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = \left( x_n - y_n \right)^2, + where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` (default ``'mean'``), then: + .. math:: \ell(x, y) = \begin{cases} \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\ \operatorname{sum}(L), & \text{if reduction} = \text{`sum'.} \end{cases} + :math:`x` and :math:`y` are tensors of arbitrary shapes with a total of :math:`n` elements each. + The mean operation still operates over all the elements, and divides by :math:`n`. + The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``. + Args: size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, the losses are averaged over each loss element in the batch. Note that for @@ -338,15 +347,20 @@ class MSELoss(Module): elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` and :attr:`reduce` are in the process of being deprecated, and in the meantime, specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` + Shape: - Input: :math:`(N, *)` where :math:`*` means, any number of additional dimensions - Target: :math:`(N, *)`, same shape as the input + For example: + .. code-block:: python + >>> import oneflow.experimental as flow >>> import numpy as np >>> flow.enable_eager_execution() + >>> input = flow.Tensor( ... [[-0.02557137, 0.03101675, 1.37493674], ... [0.25599439, -1.08372561, -0.21006816]], dtype=flow.float32) @@ -366,6 +380,7 @@ class MSELoss(Module): >>> out = m(input, target) >>> print(out.numpy()) [8.143618] + """ def __init__(