diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 69e3596591..00ef684475 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18969,7 +18969,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -19008,12 +19008,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -20305,7 +20305,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -84507,6 +84507,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -84530,7 +84532,14 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "externalDocs": { + "url": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms" + }, + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included. Otherwise, the request will fail.", "type": "string" } }, @@ -84565,7 +84574,9 @@ "inference._types.TaskTypeHuggingFace": { "type": "string", "enum": [ - "text_embedding" + "text_embedding", + "chat_completion", + "completion" ] }, "inference._types.JinaAITaskType": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 8ece9d0693..1d04cd8937 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -10330,7 +10330,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -10369,12 +10369,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -11666,7 +11666,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -54081,6 +54081,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -54104,7 +54106,14 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "externalDocs": { + "url": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms" + }, + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included. Otherwise, the request will fail.", "type": "string" } }, @@ -54139,7 +54148,9 @@ "inference._types.TaskTypeHuggingFace": { "type": "string", "enum": [ - "text_embedding" + "text_embedding", + "chat_completion", + "completion" ] }, "inference._types.JinaAITaskType": { diff --git a/output/schema/schema.json b/output/schema/schema.json index e09a0f7340..14031c72e8 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9337,7 +9337,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -10065,7 +10065,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -156249,7 +156249,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -156261,7 +156261,9 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "extDocId": "huggingface-chat-completion-interface", + "extDocUrl": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms", "name": "url", "required": true, "type": { @@ -156271,9 +156273,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included. Otherwise, the request will fail.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L877-L898" + "specLocation": "inference/_types/CommonTypes.ts#L877-L909" }, { "kind": "enum", @@ -156286,11 +156300,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L904-L906" + "specLocation": "inference/_types/CommonTypes.ts#L917-L919" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -156299,7 +156319,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L900-L902" + "specLocation": "inference/_types/CommonTypes.ts#L911-L915" }, { "kind": "interface", @@ -157262,7 +157282,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L908-L937" + "specLocation": "inference/_types/CommonTypes.ts#L921-L950" }, { "kind": "enum", @@ -157275,7 +157295,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L967-L969" + "specLocation": "inference/_types/CommonTypes.ts#L980-L982" }, { "kind": "enum", @@ -157294,7 +157314,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L971-L975" + "specLocation": "inference/_types/CommonTypes.ts#L984-L988" }, { "kind": "interface", @@ -157340,7 +157360,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L939-L960" + "specLocation": "inference/_types/CommonTypes.ts#L952-L973" }, { "kind": "enum", @@ -157356,7 +157376,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L962-L965" + "specLocation": "inference/_types/CommonTypes.ts#L975-L978" }, { "kind": "enum", @@ -157378,7 +157398,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L977-L982" + "specLocation": "inference/_types/CommonTypes.ts#L990-L995" }, { "kind": "interface", @@ -157536,7 +157556,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L984-L1011" + "specLocation": "inference/_types/CommonTypes.ts#L997-L1024" }, { "kind": "enum", @@ -157549,7 +157569,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1019-L1021" + "specLocation": "inference/_types/CommonTypes.ts#L1032-L1034" }, { "kind": "enum", @@ -157568,7 +157588,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1013-L1017" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1030" }, { "kind": "interface", @@ -157655,7 +157675,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1023-L1065" + "specLocation": "inference/_types/CommonTypes.ts#L1036-L1078" }, { "kind": "enum", @@ -157668,7 +157688,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1081-L1083" + "specLocation": "inference/_types/CommonTypes.ts#L1094-L1096" }, { "kind": "interface", @@ -157690,7 +157710,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1067-L1073" + "specLocation": "inference/_types/CommonTypes.ts#L1080-L1086" }, { "kind": "enum", @@ -157709,7 +157729,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1075-L1079" + "specLocation": "inference/_types/CommonTypes.ts#L1088-L1092" }, { "kind": "interface", @@ -158208,13 +158228,19 @@ "members": [ { "name": "text_embedding" + }, + { + "name": "chat_completion" + }, + { + "name": "completion" } ], "name": { "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L88-L90" + "specLocation": "inference/_types/TaskType.ts#L88-L92" }, { "kind": "enum", @@ -158249,7 +158275,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L92-L96" + "specLocation": "inference/_types/TaskType.ts#L94-L98" }, { "kind": "enum", @@ -158268,7 +158294,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L98-L102" + "specLocation": "inference/_types/TaskType.ts#L100-L104" }, { "kind": "enum", @@ -158284,7 +158310,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L104-L107" + "specLocation": "inference/_types/TaskType.ts#L106-L109" }, { "kind": "enum", @@ -158297,7 +158323,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L109-L111" + "specLocation": "inference/_types/TaskType.ts#L111-L113" }, { "kind": "interface", @@ -158543,7 +158569,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1085-L1116" + "specLocation": "inference/_types/CommonTypes.ts#L1098-L1129" }, { "kind": "enum", @@ -158556,7 +158582,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1149-L1151" + "specLocation": "inference/_types/CommonTypes.ts#L1162-L1164" }, { "kind": "interface", @@ -158616,7 +158642,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1118-L1142" + "specLocation": "inference/_types/CommonTypes.ts#L1131-L1155" }, { "kind": "enum", @@ -158632,7 +158658,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1144-L1147" + "specLocation": "inference/_types/CommonTypes.ts#L1157-L1160" }, { "kind": "interface", @@ -158720,7 +158746,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1153-L1190" + "specLocation": "inference/_types/CommonTypes.ts#L1166-L1203" }, { "kind": "enum", @@ -158733,7 +158759,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1196-L1198" + "specLocation": "inference/_types/CommonTypes.ts#L1209-L1211" }, { "kind": "enum", @@ -158746,7 +158772,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1192-L1194" + "specLocation": "inference/_types/CommonTypes.ts#L1205-L1207" }, { "kind": "request", @@ -158764,7 +158790,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -158773,13 +158799,13 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "method_request": "POST _inference/chat_completion/openai-completion/_stream", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "method_request": "POST _inference/chat_completion/openai-completion/_stream", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" @@ -160749,7 +160775,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -160795,7 +160821,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "kind": "response", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 43858166d9..b84d027513 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13666,11 +13666,12 @@ export interface InferenceHuggingFaceServiceSettings { api_key: string rate_limit?: InferenceRateLimitSetting url: string + model_id?: string } export type InferenceHuggingFaceServiceType = 'hugging_face' -export type InferenceHuggingFaceTaskType = 'text_embedding' +export type InferenceHuggingFaceTaskType = 'chat_completion' | 'completion' | 'text_embedding' export interface InferenceInferenceChunkingSettings { max_chunk_size?: integer @@ -13899,7 +13900,7 @@ export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion' export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank' -export type InferenceTaskTypeHuggingFace = 'text_embedding' +export type InferenceTaskTypeHuggingFace = 'text_embedding' | 'chat_completion' | 'completion' export type InferenceTaskTypeJinaAi = 'text_embedding' | 'rerank' diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index d190c3013d..c256b7d4c5 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -260,6 +260,7 @@ grok,https://www.elastic.co/docs/explore-analyze/scripting/grok grok-processor,https://www.elastic.co/docs/reference/enrich-processor/grok-processor gsub-processor,https://www.elastic.co/docs/reference/enrich-processor/gsub-processor health-api,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-health-report +huggingface-chat-completion-interface,https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms huggingface-tokens,https://huggingface.co/settings/tokens ilm-delete-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-delete-lifecycle ilm-explain-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-explain-lifecycle diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 772d12a74d..ce6eeec3ff 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -888,16 +888,29 @@ export class HuggingFaceServiceSettings { api_key: string /** * This setting helps to minimize the number of rate limit errors returned from Hugging Face. - * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000. + * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks. + * Hugging Face does not publish a universal rate limit — actual limits may vary. + * It is recommended to adjust this value based on the capacity and limits of your specific deployment environment. */ rate_limit?: RateLimitSetting /** * The URL endpoint to use for the requests. + * For `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`. + * If the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown. + * @ext_doc_id huggingface-chat-completion-interface */ url: string + /** + * The name of the HuggingFace model to use for the inference task. + * For `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints. + * For the `text_embedding` task, this field should not be included. Otherwise, the request will fail. + */ + model_id?: string } export enum HuggingFaceTaskType { + chat_completion, + completion, text_embedding } diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 0e2a3807ea..d7dd35d589 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -86,7 +86,9 @@ export enum TaskTypeGoogleVertexAI { } export enum TaskTypeHuggingFace { - text_embedding + text_embedding, + chat_completion, + completion } export enum TaskTypeMistral { diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index 03591ec226..6602d9448d 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -30,7 +30,7 @@ import { RequestChatCompletion } from '@inference/_types/CommonTypes' * NOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming. * The Chat completion inference API and the Stream inference API differ in their response structure and capabilities. * The Chat completion inference API provides more comprehensive customization options through more fields and function calling support. - * If you use the `openai` service or the `elastic` service, use the Chat completion inference API. + * If you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API. * @rest_spec_name inference.chat_completion_unified * @availability stack since=8.18.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public diff --git a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml index 37b54591fc..848372aa5e 100644 --- a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml +++ b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml @@ -1,5 +1,5 @@ summary: A chat completion task with tool_calls -description: Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`. +description: Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`. method_request: 'POST _inference/chat_completion/openai-completion/_stream' # type: "request" value: |- diff --git a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml index 1eec86dddf..3ed58d2f9d 100644 --- a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml +++ b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml @@ -1,5 +1,5 @@ summary: A chat completion task with tools and tool_calls -description: Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`. +description: Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`. method_request: 'POST _inference/chat_completion/openai-completion/_stream' # type: "request" value: |- diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 9c2b4855a7..6b4ebea972 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -30,12 +30,15 @@ import { InferenceChunkingSettings } from '@inference/_types/Services' * Create a Hugging Face inference endpoint. * * Create an inference endpoint to perform an inference task with the `hugging_face` service. + * Supported tasks include: `text_embedding`, `completion`, and `chat_completion`. * - * You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. - * Select the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section. - * Create the endpoint and copy the URL after the endpoint initialization has been finished. - * - * The following models are recommended for the Hugging Face service: + * To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint. + * Select a model that supports the task you intend to use. + + * For Elastic's `text_embedding` task: + * The selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section. + * After the endpoint has initialized, copy the generated endpoint URL. + * Recommended models for `text_embedding` task: * * * `all-MiniLM-L6-v2` * * `all-MiniLM-L12-v2` @@ -44,6 +47,15 @@ import { InferenceChunkingSettings } from '@inference/_types/Services' * * `e5-small-v2` * * `multilingual-e5-base` * * `multilingual-e5-small` + * + * For Elastic's `chat_completion` and `completion` tasks: + * The selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task. + * After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use. + * Recommended models for `chat_completion` and `completion` tasks: + * + * * `Mistral-7B-Instruct-v0.2` + * * `QwQ-32B` + * * `Phi-3-mini-128k-instruct` * @rest_spec_name inference.put_hugging_face * @availability stack since=8.12.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public