Update version to v0.0.93

mistralai · Oct 25, 2024 · e5f312b · e5f312b
1 parent bfaa003
commit e5f312b
Show file tree

Hide file tree

Showing 5 changed files with 189 additions and 21 deletions.
diff --git a/docs/deployment/cloud/outscale.mdx b/docs/deployment/cloud/outscale.mdx
@@ -0,0 +1,181 @@
+---
+id: outscale
+title: Outscale
+sidebar_position: 3.26
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+## Introduction
+
+Mistral AI models are available on the Outscale platform as managed deployments.
+Through the Outscale marketplace, you can subscribe to a Mistral service that will,
+on your behalf, provision a virtual machine and a GPU then deploy the model on it.
+
+
+As of today, the following models are available:
+
+- Mistral Small (2409)
+- Codestral
+
+For more details, visit the [models](../../../getting-started/models/models_overview) page.
+
+## Getting started
+
+The following sections outline the steps to query a Mistral model on the Outscale platform.
+
+### Deploying the model
+
+Follow the steps described in the
+[Outscale documentation](https://docs.outscale.com/en/userguide/Subscribing-To-a-Mistral-Service-and-Deploying-it.html) to deploy a service
+with the model of your choice. 
+
+### Querying the model (chat completion)
+
+Deployed models expose a REST API that you can query using Mistral's SDK or plain HTTP calls.
+To run the examples below you will need to set the following environment variables:
+
+- `OUTSCALE_SERVER_URL`: the URL of the VM hosting your Mistral model
+- `OUTSCALE_MODEL_NAME`: the name of the model to query (e.g. `small`, `codestral`)
+
+
+<Tabs>
+    <TabItem value="curl" label="cURL">
+        ```bash
+        echo $OUTSCALE_SERVER_URL/v1/chat/completions
+        echo $OUTSCALE_MODEL_NAME
+        curl --location $OUTSCALE_SRV_URL/v1/chat/completions \
+          --header "Content-Type: application/json" \
+          --header "Accept: application/json" \
+          --data '{
+              "model": "'"$OUTSCALE_MODEL_NAME"'",
+              "temperature": 0,
+              "messages": [
+                {"role": "user", "content": "Who is the best French painter? Answer in one short sentence."}
+              ],
+              "stream": false
+            }'
+        ```
+    </TabItem>
+    <TabItem value="python" label="Python">
+        ```python
+        import os
+        from mistralai import Mistral
+
+        client = Mistral(server_url=os.environ["OUTSCALE_SERVER_URL"])
+
+        resp = client.chat.complete(
+            model=os.environ["OUTSCALE_MODEL_NAME"],
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Who is the best French painter? Answer in one short sentence.",
+                }
+            ],
+            temperature=0
+        )
+
+        print(resp.choices[0].message.content)
+        ```
+    </TabItem>
+    <TabItem value="ts" label="TypeScript">
+        ```typescript
+        import { Mistral } from "@mistralai/mistralai";
+
+        const client = new Mistral({
+            serverURL: process.env.OUTSCALE_SERVER_URL || ""
+        });
+
+        const modelName = process.env.OUTSCALE_MODEL_NAME|| "";
+
+        async function chatCompletion(user_msg: string) {
+            const resp = await client.chat.complete({
+                model: modelName,
+                messages: [
+                    {
+                        content: user_msg,
+                        role: "user",
+                    },
+                ],
+            });
+            if (resp.choices && resp.choices.length > 0) {
+                console.log(resp.choices[0]);
+            }
+        }
+
+        chatCompletion("Who is the best French painter? Answer in one short sentence.");
+        ```
+    </TabItem>
+</Tabs>
+
+### Querying the model (FIM completion)
+
+Codestral can be queried using an additional completion mode called fill-in-the-middle (FIM).
+For more information, see the
+[code generation section](../../../capabilities/code_generation/#fill-in-the-middle-endpoint).
+
+
+<Tabs>
+    <TabItem value="curl" label="cURL">
+       ```bash
+        curl --location $OUTSCALE_SERVER_URL/v1/fim/completions \
+          --header "Content-Type: application/json" \
+          --header "Accept: application/json" \
+          --data '{
+              "model": "'"$OUTSCALE_MODEL_NAME"'",
+              "prompt": "def count_words_in_file(file_path: str) -> int:",
+              "suffix": "return n_words",
+              "stream": false
+            }'
+        ```
+    </TabItem>
+    <TabItem value="python" label="Python">
+       ```python
+        import os
+        from mistralai import Mistral
+
+        client = Mistral(server_url=os.environ["OUTSCALE_SERVER_URL"])
+
+        resp = client.fim.complete(
+            model = os.environ["OUTSCALE_MODEL_NAME"],
+            prompt="def count_words_in_file(file_path: str) -> int:",
+            suffix="return n_words"
+        )
+
+        print(resp.choices[0].message.content)
+       ```
+    </TabItem>
+    <TabItem value="ts" label="TypeScript">
+       ```typescript
+        import { Mistral} from "@mistralai/mistralai";
+
+        const client = new Mistral({
+            serverURL: process.env.OUTSCALE_SERVER_URL || ""
+        });
+
+        const modelName = "codestral";
+
+        async function fimCompletion(prompt: string, suffix: string) {
+            const resp = await client.fim.complete({
+                model: modelName,
+                prompt: prompt,
+                suffix: suffix
+            });
+            if (resp.choices && resp.choices.length > 0) {
+                console.log(resp.choices[0]);
+            }
+        }
+
+        fimCompletion("def count_words_in_file(file_path: str) -> int:",
+                      "return n_words");
+       ```
+    </TabItem>
+</Tabs>
+
+## Going further
+
+For more information and examples, you can check:
+
+- The [Outscale documentation](https://docs.outscale.com/en/userguide/Subscribing-To-a-Mistral-Service-and-Deploying-it.html)
+  explaining how to subscribe to a Mistral service and deploy it.
diff --git a/docs/deployment/cloud/overview.mdx b/docs/deployment/cloud/overview.mdx
@@ -12,4 +12,5 @@ In particular, Mistral's optimized commercial models are available on:
 - [Google Cloud Vertex AI Model Garden](../vertex)
 - [Snowflake Cortex](../sfcortex)
 - [IBM watsonx](../ibm-watsonx)
+- [Outscale](../outscale)
 
diff --git a/docs/guides/tokenization.mdx b/docs/guides/tokenization.mdx
@@ -30,10 +30,10 @@ Note that we are still iterating on the tokenizer. Things may change and this is
 
 We have released three versions of our tokenizers powering different sets of models. 
 
-- v1: `open-mistral-7b`, `open-mixtral-8x7b`, `mistral-embed`
-- v2: `mistral-small-latest`, `mistral-large-latest`
-- v3: `open-mixtral-8x22b`
-- v3 (tekken): `open-mistral-nemo`
+- v1: `mistral-embed`, `open-mixtral-8x7b`
+- v2: `mistral-small-2402` (deprecated), `mistral-large-2402`
+- v3: `open-mixtral-8x22b`, `mistral-large-latest`, `mistral-small-latest`, `open-mistral-7b`
+- v3 (tekken): `open-mistral-nemo`, `ministral-8b-latest`
 
 This guide will focus on our latest v3 (tekken) tokenizer and v3 tokenizer. 
 

diff --git a/openapi.yaml b/openapi.yaml
@@ -1986,8 +1986,8 @@ components:
           maximum: 1.5
           minimum: 0
           title: Temperature
-          default: 0.7
-          description: "What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both."
+          default: 0.3
+          description: "What sampling temperature to use, we recommend between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both."
         top_p:
           type: number
           maximum: 1
@@ -2002,13 +2002,6 @@ components:
             - type: "null"
           title: Max Tokens
           description: "The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length."
-        min_tokens:
-          anyOf:
-            - type: integer
-              minimum: 0
-            - type: "null"
-          title: Min Tokens
-          description: The minimum number of tokens to generate in the completion.
         stream:
           type: boolean
           title: Stream
@@ -2399,13 +2392,6 @@ components:
             - type: "null"
           title: Max Tokens
           description: "The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length."
-        min_tokens:
-          anyOf:
-            - type: integer
-              minimum: 0
-            - type: "null"
-          title: Min Tokens
-          description: The minimum number of tokens to generate in the completion.
         stream:
           type: boolean
           title: Stream

diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-v0.0.93
+v0.0.15