diff --git a/docs/source/onnxruntime/usage_guides/models.mdx b/docs/source/onnxruntime/usage_guides/models.mdx index 1292e755c06..905e6632c05 100644 --- a/docs/source/onnxruntime/usage_guides/models.mdx +++ b/docs/source/onnxruntime/usage_guides/models.mdx @@ -13,11 +13,11 @@ Once your model was [exported to the ONNX format](https://huggingface.co/docs/op ```diff from transformers import AutoTokenizer, pipeline -- from transformers import AutoModelForQuestionAnswering -+ from optimum.onnxruntime import ORTModelForQuestionAnswering +- from transformers import AutoModelForCausalLM ++ from optimum.onnxruntime import ORTModelForCausalLM -- model = AutoModelForQuestionAnswering.from_pretrained("meta-llama/Llama-3.2-1B) # PyTorch checkpoint -+ model = ORTModelForQuestionAnswering.from_pretrained("onnx-community/Llama-3.2-1B", subfolder="onnx") # ONNX checkpoint +- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B) # PyTorch checkpoint ++ model = ORTModelForCausalLM.from_pretrained("onnx-community/Llama-3.2-1B", subfolder="onnx") # ONNX checkpoint tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) @@ -74,4 +74,4 @@ You can also call `push_to_hub` directly on your model to upload it to the [Hub] # Push the onnx model to HF Hub >>> model.push_to_hub(output_dir, repository_id="my-onnx-repo") # doctest: +SKIP -``` \ No newline at end of file +```