diff --git a/docs/deployment/cloud/outscale.mdx b/docs/deployment/cloud/outscale.mdx new file mode 100644 index 0000000..3503e69 --- /dev/null +++ b/docs/deployment/cloud/outscale.mdx @@ -0,0 +1,181 @@ +--- +id: outscale +title: Outscale +sidebar_position: 3.26 +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Introduction + +Mistral AI models are available on the Outscale platform as managed deployments. +Through the Outscale marketplace, you can subscribe to a Mistral service that will, +on your behalf, provision a virtual machine and a GPU then deploy the model on it. + + +As of today, the following models are available: + +- Mistral Small (2409) +- Codestral + +For more details, visit the [models](../../../getting-started/models/models_overview) page. + +## Getting started + +The following sections outline the steps to query a Mistral model on the Outscale platform. + +### Deploying the model + +Follow the steps described in the +[Outscale documentation](https://docs.outscale.com/en/userguide/Subscribing-To-a-Mistral-Service-and-Deploying-it.html) to deploy a service +with the model of your choice. + +### Querying the model (chat completion) + +Deployed models expose a REST API that you can query using Mistral's SDK or plain HTTP calls. +To run the examples below you will need to set the following environment variables: + +- `OUTSCALE_SERVER_URL`: the URL of the VM hosting your Mistral model +- `OUTSCALE_MODEL_NAME`: the name of the model to query (e.g. `small`, `codestral`) + + + + + ```bash + echo $OUTSCALE_SERVER_URL/v1/chat/completions + echo $OUTSCALE_MODEL_NAME + curl --location $OUTSCALE_SRV_URL/v1/chat/completions \ + --header "Content-Type: application/json" \ + --header "Accept: application/json" \ + --data '{ + "model": "'"$OUTSCALE_MODEL_NAME"'", + "temperature": 0, + "messages": [ + {"role": "user", "content": "Who is the best French painter? Answer in one short sentence."} + ], + "stream": false + }' + ``` + + + ```python + import os + from mistralai import Mistral + + client = Mistral(server_url=os.environ["OUTSCALE_SERVER_URL"]) + + resp = client.chat.complete( + model=os.environ["OUTSCALE_MODEL_NAME"], + messages=[ + { + "role": "user", + "content": "Who is the best French painter? Answer in one short sentence.", + } + ], + temperature=0 + ) + + print(resp.choices[0].message.content) + ``` + + + ```typescript + import { Mistral } from "@mistralai/mistralai"; + + const client = new Mistral({ + serverURL: process.env.OUTSCALE_SERVER_URL || "" + }); + + const modelName = process.env.OUTSCALE_MODEL_NAME|| ""; + + async function chatCompletion(user_msg: string) { + const resp = await client.chat.complete({ + model: modelName, + messages: [ + { + content: user_msg, + role: "user", + }, + ], + }); + if (resp.choices && resp.choices.length > 0) { + console.log(resp.choices[0]); + } + } + + chatCompletion("Who is the best French painter? Answer in one short sentence."); + ``` + + + +### Querying the model (FIM completion) + +Codestral can be queried using an additional completion mode called fill-in-the-middle (FIM). +For more information, see the +[code generation section](../../../capabilities/code_generation/#fill-in-the-middle-endpoint). + + + + + ```bash + curl --location $OUTSCALE_SERVER_URL/v1/fim/completions \ + --header "Content-Type: application/json" \ + --header "Accept: application/json" \ + --data '{ + "model": "'"$OUTSCALE_MODEL_NAME"'", + "prompt": "def count_words_in_file(file_path: str) -> int:", + "suffix": "return n_words", + "stream": false + }' + ``` + + + ```python + import os + from mistralai import Mistral + + client = Mistral(server_url=os.environ["OUTSCALE_SERVER_URL"]) + + resp = client.fim.complete( + model = os.environ["OUTSCALE_MODEL_NAME"], + prompt="def count_words_in_file(file_path: str) -> int:", + suffix="return n_words" + ) + + print(resp.choices[0].message.content) + ``` + + + ```typescript + import { Mistral} from "@mistralai/mistralai"; + + const client = new Mistral({ + serverURL: process.env.OUTSCALE_SERVER_URL || "" + }); + + const modelName = "codestral"; + + async function fimCompletion(prompt: string, suffix: string) { + const resp = await client.fim.complete({ + model: modelName, + prompt: prompt, + suffix: suffix + }); + if (resp.choices && resp.choices.length > 0) { + console.log(resp.choices[0]); + } + } + + fimCompletion("def count_words_in_file(file_path: str) -> int:", + "return n_words"); + ``` + + + +## Going further + +For more information and examples, you can check: + +- The [Outscale documentation](https://docs.outscale.com/en/userguide/Subscribing-To-a-Mistral-Service-and-Deploying-it.html) + explaining how to subscribe to a Mistral service and deploy it. diff --git a/docs/deployment/cloud/overview.mdx b/docs/deployment/cloud/overview.mdx index 3f39b4c..a5e8125 100644 --- a/docs/deployment/cloud/overview.mdx +++ b/docs/deployment/cloud/overview.mdx @@ -12,4 +12,5 @@ In particular, Mistral's optimized commercial models are available on: - [Google Cloud Vertex AI Model Garden](../vertex) - [Snowflake Cortex](../sfcortex) - [IBM watsonx](../ibm-watsonx) +- [Outscale](../outscale) diff --git a/docs/guides/tokenization.mdx b/docs/guides/tokenization.mdx index d477bdd..46d48b4 100644 --- a/docs/guides/tokenization.mdx +++ b/docs/guides/tokenization.mdx @@ -30,10 +30,10 @@ Note that we are still iterating on the tokenizer. Things may change and this is We have released three versions of our tokenizers powering different sets of models. -- v1: `open-mistral-7b`, `open-mixtral-8x7b`, `mistral-embed` -- v2: `mistral-small-latest`, `mistral-large-latest` -- v3: `open-mixtral-8x22b` -- v3 (tekken): `open-mistral-nemo` +- v1: `mistral-embed`, `open-mixtral-8x7b` +- v2: `mistral-small-2402` (deprecated), `mistral-large-2402` +- v3: `open-mixtral-8x22b`, `mistral-large-latest`, `mistral-small-latest`, `open-mistral-7b` +- v3 (tekken): `open-mistral-nemo`, `ministral-8b-latest` This guide will focus on our latest v3 (tekken) tokenizer and v3 tokenizer. diff --git a/openapi.yaml b/openapi.yaml index 6a34919..58488d5 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -1986,8 +1986,8 @@ components: maximum: 1.5 minimum: 0 title: Temperature - default: 0.7 - description: "What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both." + default: 0.3 + description: "What sampling temperature to use, we recommend between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both." top_p: type: number maximum: 1 @@ -2002,13 +2002,6 @@ components: - type: "null" title: Max Tokens description: "The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length." - min_tokens: - anyOf: - - type: integer - minimum: 0 - - type: "null" - title: Min Tokens - description: The minimum number of tokens to generate in the completion. stream: type: boolean title: Stream @@ -2399,13 +2392,6 @@ components: - type: "null" title: Max Tokens description: "The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length." - min_tokens: - anyOf: - - type: integer - minimum: 0 - - type: "null" - title: Min Tokens - description: The minimum number of tokens to generate in the completion. stream: type: boolean title: Stream diff --git a/version.txt b/version.txt index 38b5ada..c8fe2be 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.93 +v0.0.15