TorchJD · ValerianRey · Feb 4, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/src/torchjd/aggregation/_dual_cone_utils.py b/src/torchjd/aggregation/_dual_cone_utils.py
@@ -0,0 +1,60 @@
+from typing import Literal
+
+import numpy as np
+import torch
+from qpsolvers import solve_qp
+from torch import Tensor
+
+
+def _get_projection_weights(
+    gramian: Tensor, weights: Tensor, solver: Literal["quadprog"]
+) -> Tensor:
+    """
+    Computes the weights of the projection of some weights onto the dual cone of a matrix whose
+    gramian is provided. Specifically, this solves for $w$ in the problem defined by (5) in
+    Proposition 1 of [1] when the gramian is $JJ^\top$ and $v$ is given by weights.
+    This is a vectorized version, therefore weights can be a matrix made of columns of weights.
+
+    [1] `Jacobian Descent For Multi-Objective Optimization <https://arxiv.org/pdf/2406.16232>`_.
+    """
+    lagrange_multipliers = _get_lagrange_multipliers(gramian, weights, solver)
+    return lagrange_multipliers + weights
+
+
+def _get_lagrange_multipliers(
+    gramian: Tensor, weights: Tensor, solver: Literal["quadprog"]
+) -> Tensor:
+    weight_matrix = _to_array(weights.reshape([-1, weights.shape[-1]]))
+    gramian_array = _to_array(gramian)
+
+    lagrange_multiplier_vectors = [
+        _get_lagrange_multiplier_vector(gramian_array, weight_vector, solver)
+        for weight_vector in weight_matrix
+    ]
+
+    lagrange_multiplier_matrix = np.stack(lagrange_multiplier_vectors)
+    lagrange_multipliers = (
+        torch.from_numpy(lagrange_multiplier_matrix)
+        .to(device=gramian.device, dtype=gramian.dtype)
+        .reshape(weights.shape)
+    )
+    return lagrange_multipliers
+
+
+def _get_lagrange_multiplier_vector(
+    gramian: np.array, weight_vector: np.array, solver: Literal["quadprog"]
+) -> np.array:
+    """
+    Solves the dual of the projection of a vector of weights onto the dual cone of the matrix J
+    whose gramian is given.
+    """
+    dimension = gramian.shape[0]
+    P = gramian
+    q = gramian @ weight_vector
+    G = -np.eye(dimension)
+    h = np.zeros(dimension)
+    return solve_qp(P, q, G, h, solver=solver)
+
+
+def _to_array(tensor: Tensor) -> np.ndarray:
+    return tensor.cpu().detach().numpy().astype(np.float64)
diff --git a/src/torchjd/aggregation/_gramian_utils.py b/src/torchjd/aggregation/_gramian_utils.py
@@ -10,7 +10,12 @@ def _compute_gramian(matrix: Tensor) -> Tensor:
     return matrix @ matrix.T
 
 
-def _compute_normalized_gramian(matrix: Tensor, norm_eps: float) -> Tensor:
+def _compute_regularized_normalized_gramian(matrix: Tensor, norm_eps: float, reg_eps: float):
+    normalized_gramian = _compute_normalized_gramian(matrix, norm_eps)
+    return _regularize(normalized_gramian, reg_eps)
+
+
+def _compute_normalized_gramian(matrix: Tensor, eps: float) -> Tensor:
     r"""
     Computes :math:`\frac{1}{\sigma_\max^2} J J^T` for an input matrix :math:`J`, where
     :math:`{\sigma_\max^2}` is :math:`J`'s largest singular value.
@@ -35,11 +40,20 @@ def _compute_normalized_gramian(matrix: Tensor, norm_eps: float) -> Tensor:
             "issue on https://github.com/TorchJD/torchjd/issues and paste this error message in it."
         ) from error
     max_singular_value = torch.max(singular_values)
-    if max_singular_value < norm_eps:
+    if max_singular_value < eps:
         scaled_singular_values = torch.zeros_like(singular_values)
     else:
         scaled_singular_values = singular_values / max_singular_value
     normalized_gramian = (
         left_unitary_matrix @ torch.diag(scaled_singular_values**2) @ left_unitary_matrix.T
     )
     return normalized_gramian
+
+
+def _regularize(gramian: Tensor, eps: float) -> Tensor:
+    # Because of numerical errors, `gramian` might have slightly negative eigenvalue(s).
+    # Adding a regularization term which is a small proportion of the identity matrix ensures that the gramian is positive definite.
+    regularization_matrix = eps * torch.eye(
+        gramian.shape[0], dtype=gramian.dtype, device=gramian.device
+    )
+    return gramian + regularization_matrix
diff --git a/src/torchjd/aggregation/dualproj.py b/src/torchjd/aggregation/dualproj.py
@@ -1,11 +1,9 @@
 from typing import Literal
 
-import numpy as np
-import torch
-from qpsolvers import solve_qp
 from torch import Tensor
 
-from ._gramian_utils import _compute_normalized_gramian
+from ._dual_cone_utils import _get_projection_weights
+from ._gramian_utils import _compute_regularized_normalized_gramian
 from ._pref_vector_utils import (
     _check_pref_vector,
     _pref_vector_to_str_suffix,
@@ -107,25 +105,5 @@ def __init__(
 
     def forward(self, matrix: Tensor) -> Tensor:
         weights = self.weighting(matrix)
-        weights_array = weights.cpu().detach().numpy().astype(np.float64)
-
-        gramian = _compute_normalized_gramian(matrix, self.norm_eps)
-        gramian_array = gramian.cpu().detach().numpy().astype(np.float64)
-        dimension = gramian.shape[0]
-
-        # Because of numerical errors, `gramian_array` might have slightly negative eigenvalue(s),
-        # which makes quadprog misbehave. Adding a regularization term which is a small proportion
-        # of the identity matrix ensures that the gramian is positive definite.
-        regularization_array = self.reg_eps * np.eye(dimension)
-        regularized_gramian_array = gramian_array + regularization_array
-
-        P = regularized_gramian_array
-        q = regularized_gramian_array @ weights_array
-        G = -np.eye(dimension)
-        h = np.zeros(dimension)
-
-        projection_weights_array = solve_qp(P, q, G, h, solver=self.solver)
-        projection_weights = torch.from_numpy(projection_weights_array).to(
-            device=matrix.device, dtype=matrix.dtype
-        )
-        return projection_weights + weights
+        gramian = _compute_regularized_normalized_gramian(matrix, self.norm_eps, self.reg_eps)
+        return _get_projection_weights(gramian, weights, self.solver)
diff --git a/src/torchjd/aggregation/upgrad.py b/src/torchjd/aggregation/upgrad.py
@@ -1,11 +1,10 @@
 from typing import Literal
 
-import numpy as np
 import torch
-from qpsolvers import solve_qp
 from torch import Tensor
 
-from ._gramian_utils import _compute_normalized_gramian
+from ._dual_cone_utils import _get_projection_weights
+from ._gramian_utils import _compute_regularized_normalized_gramian
 from ._pref_vector_utils import (
     _check_pref_vector,
     _pref_vector_to_str_suffix,
@@ -102,36 +101,8 @@ def __init__(
 
     def forward(self, matrix: Tensor) -> Tensor:
         weights = self.weighting(matrix)
-        lagrangian = self._compute_lagrangian(matrix, weights)
-        lagrangian_weights = torch.sum(lagrangian, dim=0)
-        result_weights = lagrangian_weights + weights
-        return result_weights
-
-    def _compute_lagrangian(self, matrix: Tensor, weights: Tensor) -> Tensor:
-        gramian = _compute_normalized_gramian(matrix, self.norm_eps)
-        gramian_array = gramian.cpu().detach().numpy().astype(np.float64)
-        dimension = gramian.shape[0]
-
-        regularization_array = self.reg_eps * np.eye(dimension)
-        regularized_gramian_array = gramian_array + regularization_array
-
-        P = regularized_gramian_array
-        G = -np.eye(dimension)
-        h = np.zeros(dimension)
-
-        lagrangian_rows = []
-        for i in range(dimension):
-            weight = weights[i].item()
-            if weight <= 0.0:
-                # In this case, the solution to the quadratic program is always 0,
-                # so we don't need to run solve_qp.
-                lagrangian_rows.append(np.zeros([dimension]))
-            else:
-                q = weight * regularized_gramian_array[i, :]
-                lagrangian_rows.append(solve_qp(P, q, G, h, solver=self.solver))
-
-        lagrangian_array = np.stack(lagrangian_rows)
-        lagrangian = torch.from_numpy(lagrangian_array).to(
-            device=gramian.device, dtype=gramian.dtype
+        gramian = _compute_regularized_normalized_gramian(matrix, self.norm_eps, self.reg_eps)
+        projection_weights_matrix = _get_projection_weights(
+            gramian, torch.diag(weights), self.solver
         )
-        return lagrangian
+        return torch.sum(projection_weights_matrix, dim=0)
diff --git a/tests/unit/aggregation/test_dual_cone_utils.py b/tests/unit/aggregation/test_dual_cone_utils.py
@@ -0,0 +1,53 @@
+import torch
+from pytest import mark
+from torch.testing import assert_close
+
+from torchjd.aggregation._dual_cone_utils import _get_projection_weights
+
+
+@mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)])
+def test_solution_weights(shape: tuple[int, int]):
+    r"""
+    Tests that `_get_projection_weights` returns valid weights corresponding to the projection onto
+    the dual cone of a matrix with the specified shape.
+
+    Validation is performed by verifying that the solution satisfies the `KKT conditions
+    <https://en.wikipedia.org/wiki/Karush%E2%80%93Kuhn%E2%80%93Tucker_conditions>`_ for the
+    quadratic program that projects vectors onto the dual cone of a matrix.
+    Specifically, the solution should satisfy the equivalent set of conditions described in Lemma 4
+    of [1].
+
+    Let:
+    - `u` be a vector of weights,
+    - `G` a positive semi-definite matrix,
+    - Consider the quadratic problem of minimizing `v^\top G v` subject to `u \preceq v`.
+
+    Then `w` is a solution if and only if it satisfies the following three conditions:
+    1. **Dual feasibility:** `u \preceq w`
+    2. **Primal feasibility:** `0 \preceq G w`
+    3. **Complementary slackness:** `u^\top G w = w^\top G w`
+
+    Reference:
+    [1] `Jacobian Descent For Multi-Objective Optimization <https://arxiv.org/pdf/2406.16232>`_.
+    """
+    matrix = torch.randn(shape)
+    weights = torch.rand(shape[0])
+
+    gramian = matrix @ matrix.T
+
+    projection_weights = _get_projection_weights(gramian, weights, "quadprog")
+    dual_gap = projection_weights - weights
+
+    # Dual feasibility
+    dual_gap_positive_part = dual_gap[dual_gap >= 0.0]
+    assert_close(dual_gap_positive_part.norm(), dual_gap.norm(), atol=1e-05, rtol=0)
+
+    primal_gap = gramian @ projection_weights
+
+    # Primal feasibility
+    primal_gap_positive_part = primal_gap[primal_gap >= 0]
+    assert_close(primal_gap_positive_part.norm(), primal_gap.norm(), atol=1e-04, rtol=0)
+
+    # Complementary slackness
+    slackness = dual_gap @ primal_gap
+    assert_close(slackness, torch.zeros_like(slackness), atol=3e-03, rtol=0)
diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py
@@ -1,10 +1,7 @@
 import torch
 from pytest import mark
-from torch.testing import assert_close
 
 from torchjd.aggregation import UPGrad
-from torchjd.aggregation.mean import _MeanWeighting
-from torchjd.aggregation.upgrad import _UPGradWrapper
 
 from ._property_testers import (
     ExpectedStructureProperty,
@@ -18,31 +15,6 @@ class TestUPGrad(ExpectedStructureProperty, NonConflictingProperty, PermutationI
     pass
 
 
-@mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)])
-def test_upgrad_lagrangian_satisfies_kkt_conditions(shape: tuple[int, int]):
-    matrix = torch.randn(shape)
-    weights = torch.rand(shape[0])
-
-    gramian = matrix @ matrix.T
-
-    W = _UPGradWrapper(_MeanWeighting(), norm_eps=0.0001, reg_eps=0.0, solver="quadprog")
-
-    lagrange_multiplier = W._compute_lagrangian(matrix, weights)
-
-    positive_lagrange_multiplier = lagrange_multiplier[lagrange_multiplier >= 0]
-    assert_close(
-        positive_lagrange_multiplier.norm(), lagrange_multiplier.norm(), atol=1e-05, rtol=0
-    )
-
-    constraint = gramian @ (torch.diag(weights) + lagrange_multiplier.T)
-
-    positive_constraint = constraint[constraint >= 0]
-    assert_close(positive_constraint.norm(), constraint.norm(), atol=1e-04, rtol=0)
-
-    slackness = torch.trace(lagrange_multiplier @ constraint)
-    assert_close(slackness, torch.zeros_like(slackness), atol=3e-03, rtol=0)
-
-
 def test_representations():
     A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog")
     assert repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')"