From c7fd876a7656fd24ed3ccca59deab395dc4cd46d Mon Sep 17 00:00:00 2001 From: Alessandro Sordoni Date: Mon, 5 Aug 2024 12:10:10 -0700 Subject: [PATCH] remove length printing --- mttl/models/expert_model.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mttl/models/expert_model.py b/mttl/models/expert_model.py index 9f48409ed..d22e01fa3 100644 --- a/mttl/models/expert_model.py +++ b/mttl/models/expert_model.py @@ -83,7 +83,6 @@ def forward(self, batch, reduction="mean"): input_ids = batch["input_ids"] labels = batch["labels"] - print(input_ids.shape[-1]) outputs = self.model.forward(input_ids, attention_mask=batch["attention_mask"]) # calculate loss, could also be done inside of the model @@ -153,6 +152,18 @@ def training_step(self, batch, _): f"{self._log_pref}train/total_loss", total_loss, on_step=True, prog_bar=True ) + # get peak and avg memory + peak_memory = torch.cuda.max_memory_allocated() / 1024**3 + memory = torch.cuda.memory_allocated() / 1024**3 + + self.log( + f"{self._log_pref}train/peak_memory", + peak_memory, + on_step=True, + prog_bar=True, + ) + self.log(f"{self._log_pref}train/memory", memory, on_step=True, prog_bar=True) + for i, pg in enumerate(self.optimizers().optimizer.param_groups): self.log(f"train/lr_{i}", pg["lr"]) return total_loss