From c7fd876a7656fd24ed3ccca59deab395dc4cd46d Mon Sep 17 00:00:00 2001
From: Alessandro Sordoni <alsordon@microsoft.com>
Date: Mon, 5 Aug 2024 12:10:10 -0700
Subject: [PATCH] remove length printing

---
 mttl/models/expert_model.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/mttl/models/expert_model.py b/mttl/models/expert_model.py
index 9f48409ed..d22e01fa3 100644
--- a/mttl/models/expert_model.py
+++ b/mttl/models/expert_model.py
@@ -83,7 +83,6 @@ def forward(self, batch, reduction="mean"):
         input_ids = batch["input_ids"]
         labels = batch["labels"]
 
-        print(input_ids.shape[-1])
         outputs = self.model.forward(input_ids, attention_mask=batch["attention_mask"])
 
         # calculate loss, could also be done inside of the model
@@ -153,6 +152,18 @@ def training_step(self, batch, _):
             f"{self._log_pref}train/total_loss", total_loss, on_step=True, prog_bar=True
         )
 
+        # get peak and avg memory
+        peak_memory = torch.cuda.max_memory_allocated() / 1024**3
+        memory = torch.cuda.memory_allocated() / 1024**3
+
+        self.log(
+            f"{self._log_pref}train/peak_memory",
+            peak_memory,
+            on_step=True,
+            prog_bar=True,
+        )
+        self.log(f"{self._log_pref}train/memory", memory, on_step=True, prog_bar=True)
+
         for i, pg in enumerate(self.optimizers().optimizer.param_groups):
             self.log(f"train/lr_{i}", pg["lr"])
         return total_loss