Fix unit test failure.

keras-team · Sep 19, 2023 · 9d8b15a · 9d8b15a
1 parent 95498c0
commit 9d8b15a
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 16 deletions.
diff --git a/keras_core/backend/jax/rnn.py b/keras_core/backend/jax/rnn.py
@@ -1,8 +1,10 @@
+import contextlib
+
 import tree
 from jax import lax
 from jax import numpy as jnp
 
-from keras_core.backend.common.stateless_scope import StatelessScope
+from keras_core.backend.common import stateless_scope
 from keras_core.utils.nest import pack_sequence_as
 
 
@@ -181,10 +183,16 @@ def _step(states, current_input):
 
             scan_xs = inputs
 
-        with StatelessScope():
-            # We must use a stateless scope because `scan` will involve
-            # JAX tracing -- any variable update at this stage would
-            # be a leak.
+        # We must use a stateless scope because `scan` will involve
+        # JAX tracing -- any variable update at this stage would
+        # be a leak.
+        if stateless_scope.in_stateless_scope():
+            # Leverage the parent scope.
+            scope = contextlib.nullcontext()
+        else:
+            scope = stateless_scope.StatelessScope()
+
+        with scope:
             new_states, outputs = lax.scan(
                 f=_step,
                 init=initial_states,

diff --git a/keras_core/backend/jax/trainer.py b/keras_core/backend/jax/trainer.py
@@ -44,7 +44,14 @@ def compute_loss_and_updates(
             return_losses=True,
             **kwargs,
         )
-        loss = self.compute_loss(x, y, y_pred, sample_weight, allow_empty=True)
+
+        trainable_mapping = zip(self.trainable_variables, trainable_variables)
+        with backend.StatelessScope(state_mapping=trainable_mapping):
+            # Note that this is needed for the regularization loss, which need
+            # the latest value of train/non-trainable variables.
+            loss = self.compute_loss(
+                x, y, y_pred, sample_weight, allow_empty=True
+            )
         if losses:
             loss += ops.sum(losses)
         unscaled_loss = loss
@@ -577,8 +584,9 @@ def evaluate(
         ]
         metrics_variables = [v.value for v in self.metrics_variables]
 
-        self._purge_model_variables(trainable_variables=False,
-                                    optimizer_variables=False)
+        self._purge_model_variables(
+            trainable_variables=False, optimizer_variables=False
+        )
         for step, data in epoch_iterator.enumerate_epoch(return_type="np"):
             callbacks.on_test_batch_begin(step)
 
@@ -911,19 +919,21 @@ def _enforce_jax_state_sharding(
             metrics_variables,
         )
 
-    def _purge_model_variables(self, 
-                               trainable_variables=True, 
-                               non_trainable_variables=True, 
-                               optimizer_variables=True, 
-                               metric_variables=True):
+    def _purge_model_variables(
+        self,
+        trainable_variables=True,
+        non_trainable_variables=True,
+        optimizer_variables=True,
+        metric_variables=True,
+    ):
         """Remove all the model variable for memory saving.
-        
+
         During JAX training, since the training function are stateless, we have
         to pass in and get the model weights over and over, during which the
         copy of the weights that attached to the KerasVariable are still and
-        occupying extra memory. We remove those variable to save memory (for 
+        occupying extra memory. We remove those variable to save memory (for
         better memory utilization) at the beginning of the epoch, and reattach
-        the value back to variables at the end of the epoch, via 
+        the value back to variables at the end of the epoch, via
         `jax_state_sync()`.
         """
         if trainable_variables:

diff --git a/keras_core/layers/layer.py b/keras_core/layers/layer.py
@@ -1040,6 +1040,8 @@ def losses(self):
             losses.extend(layer._get_own_losses())
         weight_regularization_losses = []
         for v in self.trainable_weights:
+            if backend.in_stateless_scope():
+                v = backend.get_stateless_scope().get_current_value(v)
             regularizer = getattr(v, "regularizer", None)
             if regularizer:
                 weight_regularization_losses.append(regularizer(v))