From 8d41a766c1536bcc1410d6e268f9c97c486acf1c Mon Sep 17 00:00:00 2001
From: Vahid Reza Khazaie <vrkh1996@gmail.com>
Date: Wed, 30 Oct 2024 10:38:03 -0400
Subject: [PATCH] revise ijepa trainer class

---
 mmlearn/tasks/ijepa_pretraining.py | 52 ++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/mmlearn/tasks/ijepa_pretraining.py b/mmlearn/tasks/ijepa_pretraining.py
index 6e4c5d8..6b3b716 100644
--- a/mmlearn/tasks/ijepa_pretraining.py
+++ b/mmlearn/tasks/ijepa_pretraining.py
@@ -36,6 +36,8 @@ class IJEPA(L.LightningModule):
         Initial momentum for EMA of target encoder, by default 0.996.
     ema_decay_end : float, optional
         Final momentum for EMA of target encoder, by default 1.0.
+    ema_anneal_end_step : int, optional
+        Number of steps to anneal EMA momentum to `ema_decay_end`, by default 1000.
     loss_fn : Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]], optional
         Loss function to use, by default None.
     compute_validation_loss : bool, optional
@@ -55,6 +57,7 @@ def __init__(
         lr_scheduler: Optional[Any] = None,
         ema_decay: float = 0.996,
         ema_decay_end: float = 1.0,
+        ema_anneal_end_step: int = 1000,
         loss_fn: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
         compute_validation_loss: bool = True,
         compute_test_loss: bool = True,
@@ -76,30 +79,35 @@ def __init__(
         self.encoder = encoder
         self.predictor = predictor
 
-        self.ema = ExponentialMovingAverage(encoder, ema_decay, ema_decay_end, 1000)
+        self.ema = ExponentialMovingAverage(
+            encoder,
+            ema_decay,
+            ema_decay_end,
+            ema_anneal_end_step,
+            device_id=self.device,
+        )
 
     def training_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor:
         """Perform a single training step."""
-        return self._shared_step(batch, batch_idx, step_type="train", is_training=True)
+        return self._shared_step(batch, batch_idx, step_type="train")
 
     def validation_step(
         self, batch: Dict[str, Any], batch_idx: int
     ) -> Optional[torch.Tensor]:
         """Run a single validation step."""
-        return self._shared_step(batch, batch_idx, step_type="val", is_training=False)
+        return self._shared_step(batch, batch_idx, step_type="val")
 
     def test_step(
         self, batch: Dict[str, Any], batch_idx: int
     ) -> Optional[torch.Tensor]:
         """Run a single test step."""
-        return self._shared_step(batch, batch_idx, step_type="test", is_training=False)
+        return self._shared_step(batch, batch_idx, step_type="test")
 
     def _shared_step(
         self,
         batch: Dict[str, Any],
         batch_idx: int,
         step_type: str,
-        is_training: bool = False,
     ) -> Optional[torch.Tensor]:
         images = batch[Modalities.RGB.name]
 
@@ -135,7 +143,7 @@ def _shared_step(
             sync_dist=True,
         )
 
-        if is_training:
+        if step_type == "train":
             # EMA update of target encoder
             self.ema.step(self.encoder)
 
@@ -240,3 +248,35 @@ def on_test_epoch_start(self) -> None:
     def on_test_epoch_end(self) -> None:
         """Actions at the end of the test epoch."""
         self._on_eval_epoch_end("test")
+
+    def _on_eval_epoch_start(self, step_type: str) -> None:
+        """Initialize states or configurations at the start of an evaluation epoch.
+
+        Parameters
+        ----------
+        step_type : str
+            Type of the evaluation phase ("val" or "test").
+        """
+        if (
+            step_type == "val"
+            and self.compute_validation_loss
+            or step_type == "test"
+            and self.compute_test_loss
+        ):
+            self.log(f"{step_type}/start", 1, prog_bar=True, sync_dist=True)
+
+    def _on_eval_epoch_end(self, step_type: str) -> None:
+        """Finalize states or logging at the end of an evaluation epoch.
+
+        Parameters
+        ----------
+        step_type : str
+            Type of the evaluation phase ("val" or "test").
+        """
+        if (
+            step_type == "val"
+            and self.compute_validation_loss
+            or step_type == "test"
+            and self.compute_test_loss
+        ):
+            self.log(f"{step_type}/end", 1, prog_bar=True, sync_dist=True)