diff --git a/server/lorax_server/models/flash_distilbert.py b/server/lorax_server/models/flash_distilbert.py
index c2b9ff7bd..d6cda8a72 100644
--- a/server/lorax_server/models/flash_distilbert.py
+++ b/server/lorax_server/models/flash_distilbert.py
@@ -129,7 +129,8 @@ def supports_classification(self) -> bool:
     def warmup(self, batch: FlashEmbeddingClassificationBatch, max_new_tokens: int) -> int | None:
         # Note: This is meant to 1) preallocate the memory by doing a forward pass
         # and then just returning the max seqlen since for embeddings we are never generating
-        _ = self.embed(batch)
+        # TODO: (magdy) add the forward pass and debug this
+        # _ = self.embed(batch)
         return batch.max_s
 
     def generate_token(self, batch: FlashEmbeddingClassificationBatch) -> None: