diff --git a/docs/source/examples/basic_usage.rst b/docs/source/examples/basic_usage.rst index bd0d7451..9e4393ec 100644 --- a/docs/source/examples/basic_usage.rst +++ b/docs/source/examples/basic_usage.rst @@ -1,12 +1,14 @@ Basic Usage =========== +This example shows how to use TorchJD to perform an iteration of Jacobian descent on a regression +model with two objectives. In this example, a batch of inputs is forwarded through the model and two +corresponding batches of labels are used to compute two losses. These losses are then backwarded +through the model. The obtained Jacobian matrix, consisting of the gradients of the two losses with +respect to the parameters, is then aggregated using :doc:`UPGrad <../docs/aggregation/upgrad>`, and +the parameters are updated using the resulting aggregation. + -This example shows how to use TorchJD to perform an iteration of Jacobian Descent on a regression -model. In this example, a batch of inputs is forwarded through the model and the corresponding batch -of labels is used to compute a batch of losses. These losses are then backwarded through the model. -The obtained Jacobian matrix, consisting of the gradients of the losses, is then aggregated using -UPGrad, and the parameters are updated using the resulting aggregation. Import several classes from ``torch`` and ``torchjd``: @@ -19,7 +21,7 @@ Import several classes from ``torch`` and ``torchjd``: Define the model and the optimizer, as usual: ->>> model = Sequential(Linear(10, 5), ReLU(), Linear(5, 1)) +>>> model = Sequential(Linear(10, 5), ReLU(), Linear(5, 2)) >>> optimizer = SGD(model.parameters(), lr=0.1) Define the aggregator that will be used to combine the Jacobian matrix: @@ -33,20 +35,19 @@ negatively affected by the update. Now that everything is defined, we can train the model. Define the input and the associated target: >>> input = torch.randn(16, 10) # Batch of 16 input random vectors of length 10 ->>> target = input.sum(dim=1, keepdim=True) # Batch of 16 targets +>>> target1 = torch.randn(16) # First batch of 16 targets +>>> target2 = torch.randn(16) # Second batch of 16 targets -Here, we generate fake data in which each target is equal to the sum of its corresponding input -vector, for the sake of the example. +Here, we generate fake inputs and labels for the sake of the example. We can now compute the losses associated to each element of the batch. ->>> loss_fn = MSELoss(reduction='none') +>>> loss_fn = MSELoss() >>> output = model(input) ->>> losses = loss_fn(output, target) - -Note that setting ``reduction='none'`` is necessary to obtain the element-wise loss vector. +>>> loss1 = loss_fn(output[:, 0], target1) +>>> loss2 = loss_fn(output[:, 1], target2) -The last steps are similar to gradient descent-based optimization. +The last steps are similar to gradient descent-based optimization, but using the two losses. Reset the ``.grad`` field of each model parameter: @@ -54,7 +55,7 @@ Reset the ``.grad`` field of each model parameter: Perform the Jacobian descent backward pass: ->>> torchjd.backward(losses, model.parameters(), A) +>>> torchjd.backward([loss1, loss2], model.parameters(), A) This will populate the ``.grad`` field of each model parameter with the corresponding aggregated Jacobian matrix. diff --git a/tests/doc/test_rst.py b/tests/doc/test_rst.py index 30bfd03a..1515ea7a 100644 --- a/tests/doc/test_rst.py +++ b/tests/doc/test_rst.py @@ -6,20 +6,21 @@ def test_basic_usage(): import torchjd from torchjd.aggregation import UPGrad - model = Sequential(Linear(10, 5), ReLU(), Linear(5, 1)) + model = Sequential(Linear(10, 5), ReLU(), Linear(5, 2)) optimizer = SGD(model.parameters(), lr=0.1) A = UPGrad() - input = torch.randn(16, 10) # Batch of 16 input random vectors of length 10 - target = input.sum(dim=1, keepdim=True) # Batch of 16 targets + target1 = torch.randn(16) # First batch of 16 targets + target2 = torch.randn(16) # Second batch of 16 targets - loss_fn = MSELoss(reduction="none") + loss_fn = MSELoss() output = model(input) - losses = loss_fn(output, target) + loss1 = loss_fn(output[:, 0], target1) + loss2 = loss_fn(output[:, 1], target2) optimizer.zero_grad() - torchjd.backward(losses, model.parameters(), A) + torchjd.backward([loss1, loss2], model.parameters(), A) optimizer.step()