Enable sparse support in TorchConnector and other minor updates (#571)

adekusar-drl · woodsp-ibm · web-flow · commit 2c771ccb78a5 · 2023-03-27T15:41:28.000+01:00
* minor updates

* update imports

* updates

* working version

* some test optimizations

* unpack

* some tests

* more tests

* fix black, lint, mypy

* add reno

* fix copyright, spell, black

* fix copyright, fix initial_weights

* no sparse support on 3.7

* more skips

* more skips

* add quotes

* Update qiskit_machine_learning/kernels/quantum_kernel.py

Co-authored-by: Steve Wood &lt;40241007+woodsp-ibm@users.noreply.github.com&gt;

* rollback spelling changes

* update reno

* code review

* update tests

* fix copyright

* update reno

* Update releasenotes/notes/add-sparse-torch-connector-a3b9e3d50b405a01.yaml

Co-authored-by: Steve Wood &lt;40241007+woodsp-ibm@users.noreply.github.com&gt;

* update reno more

---------

Co-authored-by: Steve Wood &lt;40241007+woodsp-ibm@users.noreply.github.com&gt;
diff --git a/.pylintdict b/.pylintdict
@@ -66,8 +66,10 @@ discretize
 discretized
 discriminative
 distro
+dok
 dt
 eigenstates
+einsum
 endian
 entangler
 estimatorqnn
diff --git a/qiskit_machine_learning/connectors/torch_connector.py b/qiskit_machine_learning/connectors/torch_connector.py
@@ -1,6 +1,6 @@
 # This code is part of Qiskit.
 #
-# (C) Copyright IBM 2021, 2022.
+# (C) Copyright IBM 2021, 2023.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -11,18 +11,24 @@
 # that they have been altered from the originals.
 
 """A connector to use Qiskit (Quantum) Neural Networks as PyTorch modules."""
+from __future__ import annotations
+
+import sys
+from typing import Tuple, Any, cast
 
-from typing import Tuple, Any, Optional, cast, Union
 import numpy as np
 
 import qiskit_machine_learning.optionals as _optionals
-from ..neural_networks import NeuralNetwork
 from ..exceptions import QiskitMachineLearningError
+from ..neural_networks import NeuralNetwork
 
 if _optionals.HAS_TORCH:
-    from torch import Tensor, sparse_coo_tensor, einsum
+    import torch
+
+    # imports for inheritance and type hints
+    from torch import Tensor
     from torch.autograd import Function
-    from torch.nn import Module, Parameter as TorchParam
+    from torch.nn import Module
 else:
 
     class Function:  # type: ignore
@@ -75,6 +81,7 @@ def forward(  # type: ignore
 
             Raises:
                 QiskitMachineLearningError: Invalid input data.
+                RuntimeError: if connector is configured as sparse and the network is not sparse.
             """
 
             # validate input shape
@@ -94,15 +101,30 @@ def forward(  # type: ignore
             result = neural_network.forward(
                 input_data.detach().cpu().numpy(), weights.detach().cpu().numpy()
             )
-            if neural_network.sparse and sparse:
-                _optionals.HAS_SPARSE.require_now("COO")
-                # pylint: disable=import-error
-                from sparse import SparseArray, COO
+            if ctx.sparse:
+                if neural_network.sparse:
+                    _optionals.HAS_SPARSE.require_now("SparseArray")
+                    # pylint: disable=import-error
+                    from sparse import SparseArray, COO
 
-                result = cast(COO, cast(SparseArray, result).asformat("coo"))
-                result_tensor = sparse_coo_tensor(result.coords, result.data)
+                    # todo: replace output type from DOK to COO?
+                    result = cast(COO, cast(SparseArray, result).asformat("coo"))
+                    result_tensor = torch.sparse_coo_tensor(result.coords, result.data)
+                else:
+                    raise RuntimeError(
+                        "TorchConnector configured as sparse, the network must be sparse as well"
+                    )
             else:
-                result_tensor = Tensor(result)
+                # connector is dense
+                if neural_network.sparse:
+                    # convert to dense
+                    _optionals.HAS_SPARSE.require_now("SparseArray")
+                    from sparse import SparseArray
+
+                    # cast is required by mypy
+                    result = cast(SparseArray, result).todense()
+                result_tensor = torch.from_numpy(result)
+                result_tensor = result_tensor.to(input_data.dtype)
 
             # if the input was not a batch, then remove the batch-dimension from the result,
             # since the neural network will always treat input as a batch and cast to a
@@ -124,6 +146,8 @@ def backward(ctx: Any, grad_output: Tensor) -> Tuple:  # type: ignore
                 grad_output: previous gradient
             Raises:
                 QiskitMachineLearningError: Invalid input data.
+                RuntimeError: if connector is configured as sparse and the network is not sparse.
+
             Returns:
                 gradients for the first two arguments and None for the others
             """
@@ -132,10 +156,6 @@ def backward(ctx: Any, grad_output: Tensor) -> Tuple:  # type: ignore
             input_data, weights = ctx.saved_tensors
             neural_network = ctx.neural_network
 
-            # if sparse output is requested return None, since PyTorch does not support it yet.
-            if neural_network.sparse and ctx.sparse:
-                return None, None, None, None
-
             # validate input shape
             if input_data.shape[-1] != neural_network.num_inputs:
                 raise QiskitMachineLearningError(
@@ -152,46 +172,84 @@ def backward(ctx: Any, grad_output: Tensor) -> Tuple:  # type: ignore
                 input_data.detach().cpu().numpy(), weights.detach().cpu().numpy()
             )
             if input_grad is not None:
-                if neural_network.sparse:
-                    input_grad = sparse_coo_tensor(input_grad.coords, input_grad.data)
-
-                    # cast to dense here, since PyTorch does not support sparse output yet.
-                    # this should only happen if the network returns sparse output but the
-                    # connector is configured to return dense output.
-                    input_grad = input_grad.to_dense()  # this should be eventually removed
-                    input_grad = input_grad.to(grad_output.dtype)
+                if ctx.sparse:
+                    if neural_network.sparse:
+                        _optionals.HAS_SPARSE.require_now("Sparse")
+                        import sparse
+                        from sparse import COO
+
+                        grad_output = grad_output.detach().cpu()
+                        grad_coo = COO(grad_output.indices(), grad_output.values())
+
+                        # Takes gradients from previous layer in backward pass (i.e. later layer in
+                        # forward pass) j for each observation i in the batch. Multiplies this with
+                        # the gradient from this point on backwards with respect to each input k.
+                        # Sums over all j to get total gradient of output w.r.t. each input k and
+                        # batch index i. This operation should preserve the batch dimension to be
+                        # able to do back-prop in a batched manner.
+                        # Pytorch does not support sparse einsum, so we rely on Sparse.
+                        # pylint: disable=no-member
+                        input_grad = sparse.einsum("ij,ijk->ik", grad_coo, input_grad)
+
+                        # return sparse gradients
+                        input_grad = torch.sparse_coo_tensor(input_grad.coords, input_grad.data)
+                    else:
+                        # this exception should never happen
+                        raise RuntimeError(
+                            "TorchConnector configured as sparse, "
+                            "the network must be sparse as well"
+                        )
                 else:
-                    input_grad = Tensor(input_grad).to(grad_output.dtype)
-
-                # Takes gradients from previous layer in backward pass (i.e. later layer in forward
-                # pass) j for each observation i in the batch. Multiplies this with the gradient
-                # from this point on backwards with respect to each input k. Sums over all j
-                # to get total gradient of output w.r.t. each input k and batch index i.
-                # This operation should preserve the batch dimension to be able to do back-prop in
-                # a batched manner.
-                input_grad = einsum("ij,ijk->ik", grad_output.detach().cpu(), input_grad)
+                    # connector is dense
+                    if neural_network.sparse:
+                        # convert to dense
+                        input_grad = input_grad.todense()
+                    input_grad = torch.from_numpy(input_grad)
+                    input_grad = input_grad.to(grad_output.dtype)
+                    # same as above
+                    input_grad = torch.einsum("ij,ijk->ik", grad_output.detach().cpu(), input_grad)
 
                 # place the resulting tensor to the device where they were stored
                 input_grad = input_grad.to(input_data.device)
 
             if weights_grad is not None:
-                if neural_network.sparse:
-                    weights_grad = sparse_coo_tensor(weights_grad.coords, weights_grad.data)
-
-                    # cast to dense here, since PyTorch does not support sparse output yet.
-                    # this should only happen if the network returns sparse output but the
-                    # connector is configured to return dense output.
-                    weights_grad = weights_grad.to_dense()  # this should be eventually removed
-                    weights_grad = weights_grad.to(grad_output.dtype)
+                if ctx.sparse:
+                    if neural_network.sparse:
+                        import sparse
+                        from sparse import COO
+
+                        grad_output = grad_output.detach().cpu()
+                        grad_coo = COO(grad_output.indices(), grad_output.values())
+
+                        # Takes gradients from previous layer in backward pass (i.e. later layer in
+                        # forward pass) j for each observation i in the batch. Multiplies this with
+                        # the gradient from this point on backwards with respect to each
+                        # parameter k. Sums over all i and j to get total gradient of output
+                        # w.r.t. each parameter k. The weights' dimension is independent of the
+                        # batch size.
+                        # pylint: disable=no-member
+                        weights_grad = sparse.einsum("ij,ijk->k", grad_coo, weights_grad)
+
+                        # return sparse gradients
+                        weights_grad = torch.sparse_coo_tensor(
+                            weights_grad.coords, weights_grad.data
+                        )
+                    else:
+                        # this exception should never happen
+                        raise RuntimeError(
+                            "TorchConnector configured as sparse, "
+                            "the network must be sparse as well"
+                        )
                 else:
-                    weights_grad = Tensor(weights_grad).to(grad_output.dtype)
-
-                # Takes gradients from previous layer in backward pass (i.e. later layer in forward
-                # pass) j for each observation i in the batch. Multiplies this with the gradient
-                # from this point on backwards with respect to each parameter k. Sums over all i and
-                # j to get total gradient of output w.r.t. each parameter k.
-                # The weights' dimension is independent of the batch size.
-                weights_grad = einsum("ij,ijk->k", grad_output.detach().cpu(), weights_grad)
+                    if neural_network.sparse:
+                        # convert to dense
+                        weights_grad = weights_grad.todense()
+                    weights_grad = torch.from_numpy(weights_grad)
+                    weights_grad = weights_grad.to(grad_output.dtype)
+                    # same as above
+                    weights_grad = torch.einsum(
+                        "ij,ijk->k", grad_output.detach().cpu(), weights_grad
+                    )
 
                 # place the resulting tensor to the device where they were stored
                 weights_grad = weights_grad.to(weights.device)
@@ -202,8 +260,8 @@ def backward(ctx: Any, grad_output: Tensor) -> Tuple:  # type: ignore
     def __init__(
         self,
         neural_network: NeuralNetwork,
-        initial_weights: Optional[Union[np.ndarray, Tensor]] = None,
-        sparse: Optional[bool] = None,
+        initial_weights: np.ndarray | Tensor | None = None,
+        sparse: bool | None = None,
     ):
         """
         Args:
@@ -216,15 +274,29 @@ def __init__(
             sparse: Whether this connector should return sparse output or not. If sparse is set
                 to None, then the setting from the given neural network is used. Note that sparse
                 output is only returned if the underlying neural network also returns sparse output,
-                otherwise it will be dense independent of the setting. Also note that PyTorch
-                currently does not support sparse back propagation, i.e., if sparse is set to True,
-                the backward pass of this module will return None.
+                otherwise an error will be raised. Sparse support works on python
+                3.8 or higher.
+
+        Raises:
+            QiskitMachineLearningError: If the connector is configured as sparse and the underlying
+                network is not sparse. Or if python version is 3.7.
         """
         super().__init__()
         self._neural_network = neural_network
+        if sparse is None:
+            sparse = self._neural_network.sparse
+        if sparse and sys.version_info < (3, 8):
+            raise QiskitMachineLearningError("Sparse is supported on python 3.8+")
+
         self._sparse = sparse
 
-        weight_param = TorchParam(Tensor(neural_network.num_weights))
+        if self._sparse and not self._neural_network.sparse:
+            # connector is sparse while the underlying neural network is not
+            raise QiskitMachineLearningError(
+                "TorchConnector configured as sparse, the network must be sparse as well"
+            )
+
+        weight_param = torch.nn.Parameter(torch.zeros(neural_network.num_weights))
         # Register param. in graph following PyTorch naming convention
         self.register_parameter("weight", weight_param)
         # If `weight_param` is assigned to `self._weights` after registration,
@@ -237,7 +309,7 @@ def __init__(
         if initial_weights is None:
             self._weights.data.uniform_(-1, 1)
         else:
-            self._weights.data = Tensor(initial_weights)
+            self._weights.data = torch.tensor(initial_weights, dtype=torch.float)
 
     @property
     def neural_network(self) -> NeuralNetwork:
@@ -250,11 +322,11 @@ def weight(self) -> Tensor:
         return self._weights
 
     @property
-    def sparse(self) -> Optional[bool]:
+    def sparse(self) -> bool | None:
         """Returns whether this connector returns sparse output or not."""
         return self._sparse
 
-    def forward(self, input_data: Optional[Tensor] = None) -> Tensor:
+    def forward(self, input_data: Tensor | None = None) -> Tensor:
         """Forward pass.
 
         Args:
@@ -263,7 +335,7 @@ def forward(self, input_data: Optional[Tensor] = None) -> Tensor:
         Returns:
             Result of forward pass of this model.
         """
-        input_ = input_data if input_data is not None else Tensor([])
+        input_ = input_data if input_data is not None else torch.zeros(0)
         return TorchConnector._TorchNNFunction.apply(
             input_, self._weights, self._neural_network, self._sparse
         )
diff --git a/releasenotes/notes/add-sparse-torch-connector-a3b9e3d50b405a01.yaml b/releasenotes/notes/add-sparse-torch-connector-a3b9e3d50b405a01.yaml
@@ -0,0 +1,50 @@
+---
+features:
+  - |
+    The PyTorch connector :class:`~qiskit_machine_learning.connector.TorchConnector` now fully
+    supports sparse output in both forward and backward passes. To enable sparse support, first of
+    all, the underlying quantum neural network must be sparse. In this case, if the `sparse`
+    property of the connector itself is not set, then the connector inherits sparsity from the
+    networks. If the connector is set to be sparse, but the network is not, an exception will be
+    raised. Also you may set the connector to be dense if the network is sparse.
+
+    This snippet illustrates how to create a sparse instance of the connector.
+
+    .. code-block:: python
+
+      import torch
+      from qiskit import QuantumCircuit
+      from qiskit.circuit.library import ZFeatureMap, RealAmplitudes
+
+      from qiskit_machine_learning.connectors import TorchConnector
+      from qiskit_machine_learning.neural_networks import SamplerQNN
+
+      num_qubits = 2
+      fmap = ZFeatureMap(num_qubits, reps=1)
+      ansatz = RealAmplitudes(num_qubits, reps=1)
+      qc = QuantumCircuit(num_qubits)
+      qc.compose(fmap, inplace=True)
+      qc.compose(ansatz, inplace=True)
+
+      qnn = SamplerQNN(
+          circuit=qc,
+          input_params=fmap.parameters,
+          weight_params=ansatz.parameters,
+          sparse=True,
+      )
+
+      connector = TorchConnector(qnn)
+
+      output = connector(torch.tensor([[1., 2.]]))
+      print(output)
+
+      loss = torch.sparse.sum(output)
+      loss.backward()
+
+      grad = connector.weight.grad
+      print(grad)
+
+    In hybrid setup, where a PyTorch-based neural network has classical and quantum layers, sparse
+    operations should not be mixed with dense ones, otherwise exceptions may be thrown by PyTorch.
+
+    Sparse support works on python 3.8+.
diff --git a/test/connectors/test_torch.py b/test/connectors/test_torch.py
@@ -1,6 +1,6 @@
 # This code is part of Qiskit.
 #
-# (C) Copyright IBM 2022.
+# (C) Copyright IBM 2022, 2023.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -98,3 +98,8 @@ def assertLogs(self, logger=None, level=None):
     def assertListEqual(self, list1, list2, msg=None):
         """Assert list equal."""
         raise Exception("Abstract method")
+
+    @abstractmethod
+    def assertRaises(self, expected_exception):
+        """Assert raises an exception."""
+        raise Exception("Abstract method")
diff --git a/test/connectors/test_torch_connector.py b/test/connectors/test_torch_connector.py