Minor Bug Fixes - LLaMa Embedding (#12146)

* Minor Bug Fixes - LLaMa Embedding Signed-off-by: Sam Oluwalana <[email protected]> * Apply isort and black reformatting Signed-off-by: artbataev <[email protected]> * Add type checking Signed-off-by: Sam Oluwalana <[email protected]> --------- Signed-off-by: Sam Oluwalana <[email protected]> Signed-off-by: artbataev <[email protected]> Co-authored-by: artbataev <[email protected]>
NVIDIA · Feb 12, 2025 · a682ea9 · a682ea9
1 parent d977f42
commit a682ea9
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/nemo/collections/llm/gpt/model/hf_llama_embedding.py b/nemo/collections/llm/gpt/model/hf_llama_embedding.py
@@ -156,7 +156,7 @@ def forward(
 
         loss = None
         if labels is not None:
-            labels = labels.to(logits.device)
+            labels = labels.to(pooled_logits.device)
             if self.config.problem_type is None:
                 if self.num_labels == 1:
                     self.config.problem_type = "regression"

diff --git a/nemo/collections/llm/gpt/model/llama_embedding.py b/nemo/collections/llm/gpt/model/llama_embedding.py
@@ -31,12 +31,15 @@
 from nemo.collections.llm.utils import Config
 from nemo.lightning import OptimizerModule, io
 from nemo.lightning.pytorch.utils import dtype_from_hf
+from nemo.utils import logging
 from nemo.utils.import_utils import safe_import
 
 if TYPE_CHECKING:
     from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel
 
     from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec
+    from nemo.collections.llm.gpt.model.hf_llama_embedding import LlamaBidirectionalModel
+
 _, HAVE_TE = safe_import("transformer_engine")
 
 
@@ -271,7 +274,7 @@ class LlamaEmbeddingExporter(io.ModelConnector[LlamaEmbeddingModel, "LlamaBidire
     Note that NV Embedding LLama uses customized LlamaBidirectionalConfig config.
     """
 
-    def init(self, dtype=torch.bfloat16) -> "LlamaForCausalLM":
+    def init(self, dtype=torch.bfloat16) -> "LlamaBidirectionalModel":
         from transformers.modeling_utils import no_init_weights
 
         from nemo.collections.llm.gpt.model.hf_llama_embedding import LlamaBidirectionalModel