From 99237c7f9598d1f6d1f9838d3fdd50674ac2be0e Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 24 Jul 2024 15:28:35 +0000 Subject: [PATCH] Update version to v0.0.63 --- docs/capabilities/finetuning.mdx | 2 +- docs/deployment/cloud/overview.mdx | 1 + docs/deployment/cloud/vertex.mdx | 252 ++++++++++++++++++++ docs/getting-started/Open-weight-models.mdx | 26 +- docs/getting-started/changelog.mdx | 4 + docs/getting-started/introduction.mdx | 26 +- docs/getting-started/models.mdx | 47 ++-- version.txt | 2 +- 8 files changed, 308 insertions(+), 52 deletions(-) create mode 100644 docs/deployment/cloud/vertex.mdx diff --git a/docs/capabilities/finetuning.mdx b/docs/capabilities/finetuning.mdx index c34460b..9cde21f 100644 --- a/docs/capabilities/finetuning.mdx +++ b/docs/capabilities/finetuning.mdx @@ -222,7 +222,7 @@ curl https://api.mistral.ai/v1/files \ ## Create a fine-tuning job The next step is to create a fine-tuning job. -- model: the specific model you would like to fine-tune. The choices are `open-mistral-7b` (v0.3) and `mistral-small-latest` (`mistral-small-2402`). +- model: the specific model you would like to fine-tune. The choices are `open-mistral-7b` (v0.3), `mistral-small-latest` (`mistral-small-2402`), `codestral-latest` (`codestral-2405`), `open-mistral-nemo` and , `mistral-large-latest` (`mistral-large-2407`). - training_files: a collection of training file IDs, which can consist of a single file or multiple files - validation_files: a collection of validation file IDs, which can consist of a single file or multiple files - hyperparameters: two adjustable hyperparameters, "training_step" and "learning_rate", that users can modify. diff --git a/docs/deployment/cloud/overview.mdx b/docs/deployment/cloud/overview.mdx index aeb4b3b..da389a1 100644 --- a/docs/deployment/cloud/overview.mdx +++ b/docs/deployment/cloud/overview.mdx @@ -9,5 +9,6 @@ In particular, Mistral's optimized commercial models are available on: - [Azure AI](../azure) - [AWS Bedrock](../aws) +- [Google Cloud Vertex AI Model Garden](../vertex) - Snowflake Cortex diff --git a/docs/deployment/cloud/vertex.mdx b/docs/deployment/cloud/vertex.mdx new file mode 100644 index 0000000..17d8d7d --- /dev/null +++ b/docs/deployment/cloud/vertex.mdx @@ -0,0 +1,252 @@ +--- +id: vertex +title: Vertex AI +sidebar_position: 3.23 +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +You can deploy the following Mistral AI models from Google Cloud Vertex AI's Model Garden: + +- Mistral NeMo +- Codestral (instruct and FIM modes) +- Mistral Large + +## Pre-requisites + +In order to query the model you will need: + +- Access to a Google Cloud Project with the Vertex AI API enabled +- Relevant IAM permissions to be able to enable the model and query endpoints through the following roles: + - [Vertex AI User IAM role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user). + - Consumer Procurement Entitlement Manager role + +On the client side, you will also need: +- The `gcloud` CLI to authenticate against the Google Cloud APIs, please refer to +[this page](https://cloud.google.com/docs/authentication/provide-credentials-adc#google-idp) +for more details. +- A Python virtual environment with the `mistralai-google-cloud` client package installed. +- The following environment variables properly set up: + - `GOOGLE_PROJECT_ID`: a Google Cloud Project ID with the the Vertex AI API enabled + - `GOOGLE_REGION`: a Google Cloud region where Mistral models are available + (e.g. `europe-west4`) + +## Querying the models (instruct mode) + + + + + + ```python + import httpx + import google.auth + from google.auth.transport.requests import Request + import os + + + def get_credentials() -> str: + credentials, project_id = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + credentials.refresh(Request()) + return credentials.token + + + def build_endpoint_url( + region: str, + project_id: str, + model_name: str, + model_version: str, + streaming: bool = False, + ) -> str: + base_url = f"https://{region}-aiplatform.googleapis.com/v1/" + project_fragment = f"projects/{project_id}" + location_fragment = f"locations/{region}" + specifier = "streamRawPredict" if streaming else "rawPredict" + model_fragment = f"publishers/mistralai/models/{model_name}@{model_version}" + url = f"{base_url}{'/'.join([project_fragment, location_fragment, model_fragment])}:{specifier}" + return url + + + # Retrieve Google Cloud Project ID and Region from environment variables + project_id = os.environ.get("GOOGLE_PROJECT_ID") + region = os.environ.get("GOOGLE_REGION") + + # Retrieve Google Cloud credentials. + access_token = get_credentials() + + model = "mistral-nemo" # Replace with the model you want to use + model_version = "2407" # Replace with the model version you want to use + is_streamed = False # Change to True to stream token responses + + # Build URL + url = build_endpoint_url( + project_id=project_id, + region=region, + model_name=model, + model_version=model_version, + streaming=is_streamed + ) + + # Define query headers + headers = { + "Authorization": f"Bearer {access_token}", + "Accept": "application/json", + } + + # Define POST payload + data = { + "model": model, + "messages": [{"role": "user", "content": "Who is the best French painter?"}], + "stream": is_streamed, + } + # Make the call + with httpx.Client() as client: + resp = client.post(url, json=data, headers=headers, timeout=None) + print(resp.text) + + ``` + + + + + ```bash + MODEL="mistral-nemo" + MODEL_VERSION="2407" + + url="https://$GOOGLE_REGION-aiplatform.googleapis.com/v1/projects/$GOOGLE_PROJECT_ID/locations/$GOOGLE_REGION/publishers/mistralai/models/$MODEL@$MODEL_VERSION:rawPredict" + + curl \ + -X POST \ + -H "Authorization: Bearer $(gcloud auth print-access-token)" \ + -H "Content-Type: application/json" \ + $url \ + --data '{ + "model": "'"$MODEL"'", + "temperature": 0, + "messages": [ + {"role": "user", "content": "What is the best French cheese?"} + ] + }' + + ``` + + + +## Querying Codestral in FIM mode + + + + + + ```python + import httpx + import google.auth + from google.auth.transport.requests import Request + import os + + + def get_credentials() -> str: + credentials, project_id = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + credentials.refresh(Request()) + return credentials.token + + + def build_endpoint_url( + region: str, + project_id: str, + model_name: str, + model_version: str, + streaming: bool = False, + ) -> str: + base_url = f"https://{region}-aiplatform.googleapis.com/v1/" + project_fragment = f"projects/{project_id}" + location_fragment = f"locations/{region}" + specifier = "streamRawPredict" if streaming else "rawPredict" + model_fragment = f"publishers/mistralai/models/{model_name}@{model_version}" + url = f"{base_url}{'/'.join([project_fragment, location_fragment, model_fragment])}:{specifier}" + return url + + + # Retrieve Google Cloud Project ID and Region from environment variables + project_id = os.environ.get("GOOGLE_PROJECT_ID") + region = os.environ.get("GOOGLE_REGION") + + # Retrieve Google Cloud credentials. + access_token = get_credentials() + + model = "codestral" + model_version = "2405" + is_streamed = False # Change to True to stream token responses + + # Build URL + url = build_endpoint_url( + project_id=project_id, + region=region, + model_name=model, + model_version=model_version, + streaming=is_streamed + ) + + # Define query headers + headers = { + "Authorization": f"Bearer {access_token}", + "Accept": "application/json", + } + + # Define POST payload + data = { + "model": model, + "prompt": "def say_hello(name: str) -> str:", + "suffix": "return n_words" + } + # Make the call + with httpx.Client() as client: + resp = client.post(url, json=data, headers=headers, timeout=None) + print(resp.text) + + + ``` + + + + + ```bash + MODEL="codestral" + MODEL_VERSION="2405" + + url="https://$GOOGLE_REGION-aiplatform.googleapis.com/v1/projects/$GOOGLE_PROJECT_ID/locations/$GOOGLE_REGION/publishers/mistralai/models/$MODEL@$MODEL_VERSION:rawPredict" + + + curl \ + -X POST \ + -H "Authorization: Bearer $(gcloud auth print-access-token)" \ + -H "Content-Type: application/json" \ + $url \ + --data '{ + "model":"'"$MODEL"'", + "prompt": "def count_words_in_file(file_path: str) -> int:", + "suffix": "return n_words" + }' + + ``` + + + + +## Going further + +For more information and examples, you can check: + +- The Google Cloud [Partner Models](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral) + documentation page. +- The Vertex Model Cards for [Mistral Large](https://console.cloud.google.com/vertex-ai/publishers/mistralai/model-garden/mistral-large), + [Mistral-NeMo](https://console.cloud.google.com/vertex-ai/publishers/mistralai/model-garden/mistral-nemo) and + [Codestral](https://console.cloud.google.com/vertex-ai/publishers/mistralai/model-garden/codestral). +- The [Getting Started Colab Notebook](https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/mistralai_intro.ipynb) + for Mistral models on Vertex, along with the [source file on GitHub](https://github.com/GoogleCloudPlatform/vertex-ai-samples/tree/main/notebooks/official/generative_ai/mistralai_intro.ipynb). + diff --git a/docs/getting-started/Open-weight-models.mdx b/docs/getting-started/Open-weight-models.mdx index 5723123..e7e7eec 100644 --- a/docs/getting-started/Open-weight-models.mdx +++ b/docs/getting-started/Open-weight-models.mdx @@ -4,22 +4,12 @@ title: Open-weight models sidebar_position: 1.4 --- -We open-source both pre-trained models and fine-tuned models. These models are not tuned for safety as we want to empower users to test and refine moderation based on their use cases. For safer models, follow our [guardrailing tutorial](/capabilities/guardrailing). - -| Model | Available Open-weight|Available via API| Description | Max Tokens| API Endpoints| -|--------------------|:--------------------:|:--------------------:|:--------------------:|:--------------------:|:--------------------:| -| Mistral 7B | :heavy_check_mark:
Apache2 |:heavy_check_mark: |The first dense model released by Mistral AI, perfect for experimentation, customization, and quick iteration. At the time of the release, it matched the capabilities of models up to 30B parameters. Learn more on our [blog post](https://mistral.ai/news/announcing-mistral-7b/)| 32k | `open-mistral-7b`| -| Mixtral 8x7B |:heavy_check_mark:
Apache2 | :heavy_check_mark: |A sparse mixture of experts model. As such, it leverages up to 45B parameters but only uses about 12B during inference, leading to better inference throughput at the cost of more vRAM. Learn more on the dedicated [blog post](https://mistral.ai/news/mixtral-of-experts/)| 32k | `open-mixtral-8x7b`| -| Mixtral 8x22B |:heavy_check_mark:
Apache2 | :heavy_check_mark: |A bigger sparse mixture of experts model. As such, it leverages up to 141B parameters but only uses about 39B during inference, leading to better inference throughput at the cost of more vRAM. Learn more on the dedicated [blog post](https://mistral.ai/news/mixtral-8x22b/)| 64k | `open-mixtral-8x22b`| -| Codestral |:heavy_check_mark:
MNPL|:heavy_check_mark: | A cutting-edge generative model that has been specifically designed and optimized for code generation tasks, including fill-in-the-middle and code completion | 32k | `codestral-latest`| -| Codestral Mamba | :heavy_check_mark:
Apache2 | :heavy_check_mark: | A Mamba 2 language model specialized in code generation. Learn more on our [blog post](https://mistral.ai/news/codestral-mamba/) | 256k | `open-codestral-mamba`| -| Mathstral | :heavy_check_mark:
Apache2 | | A math-specific 7B model designed for math reasoning and scientific tasks. Learn more on our [blog post](https://mistral.ai/news/mathstral/) | 32k | NA| -| Mistral NeMo | :heavy_check_mark:
Apache2 | :heavy_check_mark: | A 12B model built with the partnership with Nvidia. It is easy to use and a drop-in replacement in any system using Mistral 7B that it supersedes. Learn more on our [blog post](https://mistral.ai/news/mistral-nemo/) | 128k | `open-mistral-nemo`| +We open-source both pre-trained models and instruction-tuned models. These models are not tuned for safety as we want to empower users to test and refine moderation based on their use cases. For safer models, follow our [guardrailing tutorial](/capabilities/guardrailing). ## License - Mistral 7B, Mixtral 8x7B, Mixtral 8x22B, Codestral Mamba, Mathstral, and Mistral NeMo are under [Apache 2 License](https://choosealicense.com/licenses/apache-2.0/), which permits their use without any constraints. - Codestral is under [Mistral AI Non-Production (MNPL) License](https://mistral.ai/licences/MNPL-0.1.md). - +- Mistral Large is under [Mistral Research License](https://mistral.ai/licenses/MRL-0.1.md). ## Downloading @@ -37,10 +27,11 @@ We open-source both pre-trained models and fine-tuned models. These models are n | Mixtral-8x22B-Instruct-v0.1/
Mixtral-8x22B-Instruct-v0.3 | [Hugging Face](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1)
[raw_weights](https://models.mistralcdn.com/mixtral-8x22b-v0-3/mixtral-8x22B-Instruct-v0.3.tar) (md5sum: `471a02a6902706a2f1e44a693813855b`)|- 32768 vocabulary size | | Mixtral-8x22B-v0.3 | [raw_weights](https://models.mistralcdn.com/mixtral-8x22b-v0-3/mixtral-8x22B-v0.3.tar) (md5sum: `a2fa75117174f87d1197e3a4eb50371a`) | - 32768 vocabulary size
- Supports v3 Tokenizer | | Codestral-22B-v0.1 | [Hugging Face](https://huggingface.co/mistralai/Codestral-22B-v0.1)
[raw_weights](https://models.mistralcdn.com/codestral-22b-v0-1/codestral-22B-v0.1.tar) (md5sum: `1ea95d474a1d374b1d1b20a8e0159de3`) | - 32768 vocabulary size
- Supports v3 Tokenizer | -| Codestral-Mamba-7B-v0.1 | [Hugging Face](https://huggingface.co/mistralai/mamba-codestral-7B-v0.1)
[raw_weights](https://models.mistralcdn.com/codestral-mamba-7b-v0-1/codestral-mamba-7B-v0.1.tar)(md5sum: `d3993e4024d1395910c55db0d11db163`) | - 32768 vocabulary size
- Supports v3 Tokenizer | -| Mathstral-7B-v0.1 | [Hugging Face](https://huggingface.co/mistralai/mathstral-7B-v0.1)
[raw_weights](https://models.mistralcdn.com/mathstral-7b-v0-1/mathstral-7B-v0.1.tar)(md5sum: `5f05443e94489c261462794b1016f10b`) | - 32768 vocabulary size
- Supports v3 Tokenizer | -| Mistral-NeMo-Base-2407 | [Hugging Face](https://huggingface.co/mistralai/Mistral-Nemo-Base-2407)
[raw_weights](https://models.mistralcdn.com/mistral-nemo-2407/mistral-nemo-base-2407.tar)(md5sum: `c5d079ac4b55fc1ae35f51f0a3c0eb83`) | - 131k vocabulary size
- Supports tekken.json tokenizer | -| Mistral-NeMo-Instruct-2407 | [Hugging Face](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)
[raw_weights](https://models.mistralcdn.com/mistral-nemo-2407/mistral-nemo-instruct-2407.tar)(md5sum: `296fbdf911cb88e6f0be74cd04827fe7`) | - 131k vocabulary size
- Supports tekken.json tokenizer
- Supports function calling | +| Codestral-Mamba-7B-v0.1 | [Hugging Face](https://huggingface.co/mistralai/mamba-codestral-7B-v0.1)
[raw_weights](https://models.mistralcdn.com/codestral-mamba-7b-v0-1/codestral-mamba-7B-v0.1.tar) (md5sum: `d3993e4024d1395910c55db0d11db163`) | - 32768 vocabulary size
- Supports v3 Tokenizer | +| Mathstral-7B-v0.1 | [Hugging Face](https://huggingface.co/mistralai/mathstral-7B-v0.1)
[raw_weights](https://models.mistralcdn.com/mathstral-7b-v0-1/mathstral-7B-v0.1.tar) (md5sum: `5f05443e94489c261462794b1016f10b`) | - 32768 vocabulary size
- Supports v3 Tokenizer | +| Mistral-NeMo-Base-2407 | [Hugging Face](https://huggingface.co/mistralai/Mistral-Nemo-Base-2407)
[raw_weights](https://models.mistralcdn.com/mistral-nemo-2407/mistral-nemo-base-2407.tar) (md5sum: `c5d079ac4b55fc1ae35f51f0a3c0eb83`) | - 131k vocabulary size
- Supports tekken.json tokenizer | +| Mistral-NeMo-Instruct-2407 | [Hugging Face](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)
[raw_weights](https://models.mistralcdn.com/mistral-nemo-2407/mistral-nemo-instruct-2407.tar) (md5sum: `296fbdf911cb88e6f0be74cd04827fe7`) | - 131k vocabulary size
- Supports tekken.json tokenizer
- Supports function calling | +| Mistral-Large-Instruct-2407 | [Hugging Face](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407)
[raw_weights](https://models.mistralcdn.com/mistral-large-2407/mistral-large-instruct-2407.tar) (md5sum: `fc602155f9e39151fba81fcaab2fa7c4`)| - 32768 vocabulary size
- Supports v3 Tokenizer
- Supports function calling | ## Sizes @@ -53,7 +44,8 @@ We open-source both pre-trained models and fine-tuned models. These models are n | Codestral-22B-v0.1 | 22.2B | 22.2B | 60 | | Codestral-Mamba-7B-v0.1 | 7.3B | 7.3B | 16 | | Mathstral-7B-v0.1 | 7.3B | 7.3B | 16 | -| Mistral-NeMo-12B-v0.1 | 12B | 12B | 28 - bf16
16 - fp8 | +| Mistral-NeMo-Instruct-2407 | 12B | 12B | 28 - bf16
16 - fp8 | +| Mistral-Large-Instruct-2407 | 123B | 123B | 228 | ## How to run? Check out [mistral-inference](https://github.com/mistralai/mistral-inference/), a Python package for running our models. You can install `mistral-inference` by diff --git a/docs/getting-started/changelog.mdx b/docs/getting-started/changelog.mdx index 30c14ee..e59edb0 100644 --- a/docs/getting-started/changelog.mdx +++ b/docs/getting-started/changelog.mdx @@ -6,6 +6,10 @@ sidebar_position: 1.8 This is the list of changes to the Mistral API. +July 24, 2024 +- We released Mistral Large 2 (`mistral-large-2407`). +- We added fine-tuning support for Codestral, Mistral Nemo and Mistral Large. Now the model choices for fine-tuning are `open-mistral-7b` (v0.3), `mistral-small-latest` (`mistral-small-2402`), `codestral-latest` (`codestral-2405`), `open-mistral-nemo` and , `mistral-large-latest` (`mistral-large-2407`) + July 18, 2024 - We released Mistral NeMo (`open-mistral-nemo`). diff --git a/docs/getting-started/introduction.mdx b/docs/getting-started/introduction.mdx index 9404e97..cef065b 100644 --- a/docs/getting-started/introduction.mdx +++ b/docs/getting-started/introduction.mdx @@ -10,26 +10,25 @@ Mistral AI is a research lab building the best open source models in the world. ## Mistral AI Large Language Models (LLMs) -We release both open source and commercial models, driving innovation and convenience for our developer community. Our models are state-of-the-art for their multilingual, code generation, maths, and advanced reasoning capabilities. +We release state-of-the-art generalist models, specialized models, and research models, driving innovation and convenience for our developer community. Our models are state-of-the-art for their multilingual, code generation, maths, and advanced reasoning capabilities. -### Open Source +### State-of-the-art generalist models +- Mistral Large, our top-tier reasoning model for high-complexity tasks with the lastest version v2 released [July 2024](https://mistral.ai/news/mistral-large-2407/) +- Mistral NeMo, our best multilingual open source model released [July 2024](https://mistral.ai/news/mistral-nemo/) + +### Specialized models +- Codestral, our cutting-edge language model for coding released [May 2024](https://mistral.ai/news/codestral/) +- Mistral Embeddings, our state-of-the-art semantic for extracting representation of text extracts + +### Research models - Mistral 7b, our first dense model released [September 2023](https://mistral.ai/news/announcing-mistral-7b/) - Mixtral 8x7b, our first sparse mixture-of-experts released [December 2023](https://mistral.ai/news/mixtral-of-experts/) - Mixtral 8x22b, our best open source model to date released [April 2024](https://mistral.ai/news/mixtral-8x22b/) - Mathstral 7b, our first math open source model released [July 2024](https://mistral.ai/news/mathstral/) -- Codestral Mamba 7b, our first mamba 2 open source model released [July 2024](https://mistral.ai/news/codestral-mamba/) -- Mistral NeMo 7b, our best multilingual open source model released [July 2024](https://mistral.ai/news/mistral-nemo/) - -### Commercial - -- Mistral Small, our cost-efficient reasoning model for low-latency workloads -- Mistral Medium, useful for intermediate tasks that require moderate reasoning; please note that this model will be deprecated in the coming months -- Mistral Large, our top-tier reasoning model for high-complexity tasks -- Mistral Embeddings, our state-of-the-art semantic for extracting representation of text extracts -- Codestral, our cutting-edge language model for coding +- Codestral Mamba, our first mamba 2 open source model released [July 2024](https://mistral.ai/news/codestral-mamba/) -For our commercial models, we are always improving and iteratively deploying them. Keep up to date on our model versioning [here](/getting-started/models/#api-versioning). +Learn more about our models [here](/getting-started/models/). ## Explore the Mistral AI APIs @@ -39,5 +38,6 @@ The [Mistral AI APIs](https://console.mistral.ai/) empower LLM applications via: - [Code generation](/capabilities/code_generation), enpowers code generation tasks, including fill-in-the-middle and code completion - [Embeddings](/capabilities/embeddings), useful for RAG where it represents the meaning of text as a list of numbers - [Function calling](/capabilities/function_calling), enables Mistral models to connect to external tools +- [Fine-tuning](/capabilities/finetuning), enables developers to create customized and specilized models - [JSON mode](/capabilities/json_mode), enables developers to set the response format to json_object - [Guardrailing](/capabilities/guardrailing), enables developers to enforce policies at the system level of Mistral models diff --git a/docs/getting-started/models.mdx b/docs/getting-started/models.mdx index d19698b..d11897f 100644 --- a/docs/getting-started/models.mdx +++ b/docs/getting-started/models.mdx @@ -6,24 +6,31 @@ sidebar_position: 1.3 ## Overview -Mistral provides two types of models: open-weights models (Mistral 7B, Mixtral 8x7B, Mixtral 8x22B) and optimized commercial models (Mistral Small, Mistral Medium, Mistral Large, and Mistral Embeddings). -- The open-weights models are highly efficient and available under a fully permissive Apache 2 license. -They are ideal for customization, such as fine-tuning, due to their portability, control, and fast performance. -- On the other hand, the optimized commercial models are designed for high performance and are available through flexible deployment options. +Mistral provides three types of models: state-of-the-art generalist models, specialized models, and research models. + +- **State-of-the-art generalist models** + +| Model | Available Open-weight|Available via API| Description | Max Tokens| API Endpoints| +|--------------------|:--------------------:|:--------------------:|:--------------------:|:--------------------:|:--------------------:| +| Mistral Large |:heavy_check_mark:
[Mistral Research License](https://mistral.ai/licenses/MRL-0.1.md)| :heavy_check_mark: |Our flagship model with state-of-the-art reasoning, knowledge, and coding capabilities. It's ideal for complex tasks that require large reasoning capabilities or are highly specialized (Synthetic Text Generation, Code Generation, RAG, or Agents). Learn more on our [blog post](https://mistral.ai/news/mistral-large-2407/)| 128k | `mistral-large-latest`| +| Mistral NeMo | :heavy_check_mark:
Apache2 | :heavy_check_mark: | A 12B model built with the partnership with Nvidia. It is easy to use and a drop-in replacement in any system using Mistral 7B that it supersedes. Learn more on our [blog post](https://mistral.ai/news/mistral-nemo/) | 128k | `open-mistral-nemo`| + +- **Specialized models** + +| Model | Available Open-weight|Available via API| Description | Max Tokens| API Endpoints| +|--------------------|:--------------------:|:--------------------:|:--------------------:|:--------------------:|:--------------------:| +| Codestral |:heavy_check_mark:
[Mistral AI Non-Production License](https://mistral.ai/licenses/MNPL-0.1.md)|:heavy_check_mark: | A cutting-edge generative model that has been specifically designed and optimized for code generation tasks, including fill-in-the-middle and code completion. Learn more on our [blog post](https://mistral.ai/news/codestral/) | 32k | `codestral-latest`| +| Mistral Embeddings ||:heavy_check_mark: | A model that converts text into numerical vectors of embeddings in 1024 dimensions. Embedding models enable retrieval and retrieval-augmented generation applications. It achieves a retrieval score of 55.26 on MTEB | 8k | `mistral-embed`| + +- **Research models** | Model | Available Open-weight|Available via API| Description | Max Tokens| API Endpoints| |--------------------|:--------------------:|:--------------------:|:--------------------:|:--------------------:|:--------------------:| | Mistral 7B | :heavy_check_mark:
Apache2 |:heavy_check_mark: |The first dense model released by Mistral AI, perfect for experimentation, customization, and quick iteration. At the time of the release, it matched the capabilities of models up to 30B parameters. Learn more on our [blog post](https://mistral.ai/news/announcing-mistral-7b/)| 32k | `open-mistral-7b`| | Mixtral 8x7B |:heavy_check_mark:
Apache2 | :heavy_check_mark: |A sparse mixture of experts model. As such, it leverages up to 45B parameters but only uses about 12B during inference, leading to better inference throughput at the cost of more vRAM. Learn more on the dedicated [blog post](https://mistral.ai/news/mixtral-of-experts/)| 32k | `open-mixtral-8x7b`| | Mixtral 8x22B |:heavy_check_mark:
Apache2 | :heavy_check_mark: |A bigger sparse mixture of experts model. As such, it leverages up to 141B parameters but only uses about 39B during inference, leading to better inference throughput at the cost of more vRAM. Learn more on the dedicated [blog post](https://mistral.ai/news/mixtral-8x22b/)| 64k | `open-mixtral-8x22b`| -| Mistral Small ||:heavy_check_mark: |Suitable for simple tasks that one can do in bulk (Classification, Customer Support, or Text Generation)| 32k | `mistral-small-latest`| -| Mistral Medium
(will be deprecated in the coming months) ||:heavy_check_mark: |Ideal for intermediate tasks that require moderate reasoning (Data extraction, Summarizing a Document, Writing emails, Writing a Job Description, or Writing Product Descriptions)| 32k | `mistral-medium-latest`| -| Mistral Large || :heavy_check_mark: |Our flagship model that's ideal for complex tasks that require large reasoning capabilities or are highly specialized (Synthetic Text Generation, Code Generation, RAG, or Agents). Learn more on our [blog post](https://mistral.ai/news/mistral-large/)| 32k | `mistral-large-latest`| -| Mistral Embeddings ||:heavy_check_mark: | A model that converts text into numerical vectors of embeddings in 1024 dimensions. Embedding models enable retrieval and retrieval-augmented generation applications. It achieves a retrieval score of 55.26 on MTEB | 8k | `mistral-embed`| -| Codestral |:heavy_check_mark:
MNPL|:heavy_check_mark: | A cutting-edge generative model that has been specifically designed and optimized for code generation tasks, including fill-in-the-middle and code completion | 32k | `codestral-latest`| -| Codestral Mamba | :heavy_check_mark:
Apache2 | :heavy_check_mark: | A Mamba 2 language model specialized in code generation. Learn more on our [blog post](https://mistral.ai/news/codestral-mamba/) | 256k | `open-codestral-mamba`| | Mathstral | :heavy_check_mark:
Apache2 | | A math-specific 7B model designed for math reasoning and scientific tasks. Learn more on our [blog post](https://mistral.ai/news/mathstral/) | 32k | NA| -| Mistral NeMo | :heavy_check_mark:
Apache2 | :heavy_check_mark: | A 12B model built with the partnership with Nvidia. It is easy to use and a drop-in replacement in any system using Mistral 7B that it supersedes. Learn more on our [blog post](https://mistral.ai/news/mistral-nemo/) | 128k | `open-mistral-nemo`| +| Codestral Mamba | :heavy_check_mark:
Apache2 | :heavy_check_mark: | A Mamba 2 language model specialized in code generation. Learn more on our [blog post](https://mistral.ai/news/codestral-mamba/) | 256k | `open-codestral-mamba`| ## Pricing @@ -38,11 +45,11 @@ Additionally, be prepared for the deprecation of certain endpoints in the coming Here are the details of the available versions: - `open-mistral-nemo`: currently points to `open-mistral-nemo-2407`. -- `mistral-small-latest`: currently points to `mistral-small-2402`. +- `mistral-small-latest`: currently points to `mistral-small-2402`. Mistral Small will be deprecated shortly. - `mistral-medium-latest`: currently points to `mistral-medium-2312`. The previous `mistral-medium` has been dated and tagged as `mistral-medium-2312`. Mistral Medium will be deprecated shortly. -- `mistral-large-latest`: currently points to `mistral-large-2402`. +- `mistral-large-latest`: currently points to `mistral-large-2407`. `mistral-large-2402` will be deprecated shortly. - `codestral-latest`: currently points to `codestral-2405`. ## Benchmarks results @@ -55,7 +62,7 @@ You can find the benchmark results in the following blog posts: or outperforms GPT3.5 on most standard benchmarks. It handles English, French, Italian, German and Spanish, and shows strong performance in code generation. - [Mixtral 8x22B](https://mistral.ai/news/mixtral-8x22b/): our most performant open model. It handles English, French, Italian, German, Spanish and performs strongly on code-related tasks. Natively handles function calling. -- [Mistral Large](https://mistral.ai/news/mistral-large/): a cutting-edge text generation model with top-tier reasoning capabilities. +- [Mistral Large](https://mistral.ai/news/mistral-large-2407/): a cutting-edge text generation model with top-tier reasoning capabilities. It can be used for complex multilingual reasoning tasks, including text understanding, transformation, and code generation. - [Codestral](https://mistral.ai/news/codestral/): as a 22B model, Codestral sets a new standard on the performance/latency space for code generation compared to previous models used for coding. - [Codestral-Mamba](https://mistral.ai/news/codestral-mamba/): we have trained this model with advanced code and reasoning capabilities, enabling the model to have a strong performance on par with SOTA transformer-based models. @@ -78,10 +85,10 @@ Today, Mistral models are behind many LLM applications at scale. Here is a brief When selecting a model, it is essential to evaluate the performance, and cost trade-offs. Depending on what’s most important for your application, your choice may differ significantly. Note that the models will be updated over time, the information we share below only reflect the current state of the models. -In general, the larger the model, the better the performance. For instance, when looking at the popular benchmark MMLU (Massive Multitask Language Understanding), the performance ranking of Mistral’s models is as follows: Mistral Large > Mistral 8x22B > Mistral Small > Mixtral 8x7B > Mistral 7B. Notably, Mistral Large is currently outperforming all other four models across almost all benchmarks. - -drawing +In general, the larger the model, the better the performance. For instance, when looking at the popular benchmark MMLU (Massive Multitask Language Understanding), the performance ranking of Mistral’s models is as follows: +- Mistral Large (84.0%) > Mistral 8x22B (77.8%) > Mistral Small (72.2%) > Mixtral 8x7B (70.6%) > Mistral NeMo (68%) > Mistral 7B (62.5%). +Notably, Mistral Large is currently outperforming all other four models across almost all benchmarks. In addition to the benchmarks mentioned above, you can also refer to various other independent benchmarks, such as https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard and https://artificialanalysis.ai/, to gain more insight into the performance and speed of different large language models. By considering the performance, speed, and cost details, hopefully you can find the best model that suits your application's needs. @@ -89,8 +96,8 @@ As a general rule, if you are new to using Large Language Models, you can always ### Use cases -### Mistral Small: Simple tasks that one can do in bulk -Mistral Small is the ideal choice for simpe tasks that one can do in builk - like Classification, Customer Support, or Text Generation. It offers excellent performance at an affordable price point. For instance, it can be effectively used for a classification task to classify if an email is spam or not: +### Mistral 7B: Simple tasks that one can do in bulk +Mistral 7B is the ideal choice for simpe tasks that one can do in builk - like Classification, Customer Support, or Text Generation. It offers excellent performance at an affordable price point. For instance, it can be effectively used for a classification task to classify if an email is spam or not: **Prompt:** ``` @@ -99,7 +106,7 @@ Classify the following email to determine if it is spam or not. Only respond wit 🎉 Urgent! You've Won a $1,000,000 Cash Prize! 💰 To claim your prize, please click on the link below: https://bit.ly/claim-your-prize ``` -Mistral Small, Mistral 8x22B, and Mistral Large all can accurately classify this email correctly as “Spam”. Mistral Small is capable to provide the correct classification as the larger models. So it is the most efficient and affordable choice for this kind of tasks. +All of our models can accurately classify this email correctly as “Spam”. Mistral 7B is capable to provide the correct classification as the larger models. So it is the most efficient and affordable choice for this kind of tasks. ### Mistral 8x22B: Intermediate tasks that require language transformation Mistral 8x22B is the ideal for intermediate tasks that require moderate reasoning - like Data extraction, Summarizing a Document, Writing a Job Description, or Writing Product Descriptions. Mistral 8x22B strikes a balance between performance and capability, making it suitable for a wide range of tasks that only require language transformaion. For example, Mistral 8x22B can write an email: diff --git a/version.txt b/version.txt index 4f5e8e3..c8fe2be 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.63 +v0.0.15