From e7fc45e24f66f30e028e4de333b59574110a1a62 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 25 Sep 2024 12:56:25 +0200 Subject: [PATCH 01/11] version bump --- assets/misc/meilisearch-collection-postman.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/misc/meilisearch-collection-postman.json b/assets/misc/meilisearch-collection-postman.json index 2bcb5b7e8..17cd48325 100644 --- a/assets/misc/meilisearch-collection-postman.json +++ b/assets/misc/meilisearch-collection-postman.json @@ -1,7 +1,7 @@ { "info": { "_postman_id": "719caa45-6643-4393-9b84-e8bc6a70d074", - "name": "Meilisearch v1.10", + "name": "Meilisearch v1.11", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", "_exporter_id": "8898306" }, From 461aa0bea73aac510d3dd2f724408cb2af2489df Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 9 Oct 2024 12:54:51 +0200 Subject: [PATCH 02/11] Bump postman collection version (#3021) --- assets/misc/meilisearch-collection-postman.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/misc/meilisearch-collection-postman.json b/assets/misc/meilisearch-collection-postman.json index 17cd48325..ac16f5d1f 100644 --- a/assets/misc/meilisearch-collection-postman.json +++ b/assets/misc/meilisearch-collection-postman.json @@ -1,9 +1,9 @@ { "info": { - "_postman_id": "719caa45-6643-4393-9b84-e8bc6a70d074", + "_postman_id": "cc6bb097-033d-4f65-8704-f10e4e4b10d0", "name": "Meilisearch v1.11", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", - "_exporter_id": "8898306" + "_exporter_id": "25294324" }, "item": [ { From 85da89e723e6af50696ec1792d5bfe995444322d Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 9 Oct 2024 12:55:08 +0200 Subject: [PATCH 03/11] bump docker version (#3020) --- guides/docker.mdx | 16 ++++++++-------- .../self_hosted/install_meilisearch_locally.mdx | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/guides/docker.mdx b/guides/docker.mdx index bd14bb8ab..c03eed619 100644 --- a/guides/docker.mdx +++ b/guides/docker.mdx @@ -14,7 +14,7 @@ Docker is a tool that bundles applications into containers. Docker containers en Docker containers are distributed in images. To use Meilisearch, use the `docker pull` command to download a Meilisearch image: ```sh -docker pull getmeili/meilisearch:v1.10 +docker pull getmeili/meilisearch:v1.11 ``` Meilisearch deploys a new Docker image with every release of the engine. Each image is tagged with the corresponding Meilisearch version, indicated in the above example by the text following the `:` symbol. You can see [the full list of available Meilisearch Docker images](https://hub.docker.com/r/getmeili/meilisearch/tags#!) on Docker Hub. @@ -31,7 +31,7 @@ After completing the previous step, use `docker run` to launch the Meilisearch i docker run -it --rm \ -p 7700:7700 \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 + getmeili/meilisearch:v1.11 ``` ### Configure Meilisearch @@ -47,7 +47,7 @@ docker run -it --rm \ -p 7700:7700 \ -e MEILI_MASTER_KEY='MASTER_KEY'\ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 + getmeili/meilisearch:v1.11 ``` #### Passing instance options with CLI arguments @@ -58,7 +58,7 @@ If you want to pass command-line arguments to Meilisearch with Docker, you must docker run -it --rm \ -p 7700:7700 \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 \ + getmeili/meilisearch:v1.11 \ meilisearch --master-key="MASTER_KEY" ``` @@ -76,7 +76,7 @@ To keep your data intact between reboots, specify a dedicated volume by running docker run -it --rm \ -p 7700:7700 \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 + getmeili/meilisearch:v1.11 ``` The example above uses `$(pwd)/meili_data`, which is a directory in the host machine. Depending on your OS, mounting volumes from the host to the container might result in performance loss and is only recommended when developing your application. @@ -91,7 +91,7 @@ To import a dump, use Meilisearch's `--import-dump` command-line option and spec docker run -it --rm \ -p 7700:7700 \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 \ + getmeili/meilisearch:v1.11 \ meilisearch --import-dump /meili_data/dumps/20200813-042312213.dump ``` @@ -111,7 +111,7 @@ To generate a Meilisearch snapshot with Docker, launch Meilisearch with `--sched docker run -it --rm \ -p 7700:7700 \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 \ + getmeili/meilisearch:v1.11 \ meilisearch --schedule-snapshot --snapshot-dir /meili_data/snapshots ``` @@ -123,7 +123,7 @@ To import a snapshot, launch Meilisearch with the `--import-snapshot` option: docker run -it --rm \ -p 7700:7700 \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 \ + getmeili/meilisearch:v1.11 \ meilisearch --import-snapshot /meili_data/snapshots/data.ms.snapshot ``` diff --git a/learn/self_hosted/install_meilisearch_locally.mdx b/learn/self_hosted/install_meilisearch_locally.mdx index b295fbe1f..fb0125f03 100644 --- a/learn/self_hosted/install_meilisearch_locally.mdx +++ b/learn/self_hosted/install_meilisearch_locally.mdx @@ -54,14 +54,14 @@ These commands launch the **latest stable release** of Meilisearch. ```bash # Fetch the latest version of Meilisearch image from DockerHub -docker pull getmeili/meilisearch:v1.10 +docker pull getmeili/meilisearch:v1.11 # Launch Meilisearch in development mode with a master key docker run -it --rm \ -p 7700:7700 \ -e MEILI_ENV='development' \ -v $(pwd)/meili_data:/meili_data \ - getmeili/meilisearch:v1.10 + getmeili/meilisearch:v1.11 # Use ${pwd} instead of $(pwd) in PowerShell ``` From a3072d49eb9a733cdf1ab49195a3f45eee094e61 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 9 Oct 2024 13:02:48 +0200 Subject: [PATCH 04/11] v1.11: AI-powered search updates (#3011) --------- Co-authored-by: Louis Dureuil --- .code-samples.meilisearch.yaml | 4 +-- .../getting_started_with_ai_search.mdx | 6 ++-- reference/api/search.mdx | 8 ++++- reference/api/settings.mdx | 35 ++++++++++++++++--- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 927a27968..8d4fe3615 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -1242,7 +1242,7 @@ search_parameter_guide_hybrid_1: |- "q": "kitchen utensils", "hybrid": { "semanticRatio": 0.9, - "embedder": "default" + "embedder": "EMBEDDER_NAME" } }' search_parameter_guide_vector_1: |- @@ -1321,7 +1321,7 @@ search_parameter_reference_retrieve_vectors_1: |- "q": "kitchen utensils", "retrieveVectors": true, "hybrid": { - "embedder": "default" + "embedder": "EMBEDDER_NAME" } }' search_parameter_reference_distinct_1: |- diff --git a/learn/ai_powered_search/getting_started_with_ai_search.mdx b/learn/ai_powered_search/getting_started_with_ai_search.mdx index 954ee0bd0..726382f87 100644 --- a/learn/ai_powered_search/getting_started_with_ai_search.mdx +++ b/learn/ai_powered_search/getting_started_with_ai_search.mdx @@ -50,7 +50,7 @@ curl \ Next, you must generate vector embeddings for all documents in your dataset. Embeddings are mathematical representations of the meanings of words and sentences in your documents. Meilisearch relies on external providers to generate these embeddings. Use OpenAI for this tutorial. -Use the `embedders` index setting of the [update `/settings` endpoint](/reference/api/settings?utm_campaign=vector-search&utm_source=docs&utm_medium=vector-search-guide) to configure a default [OpenAI](https://platform.openai.com/) embedder: +Use the `embedders` index setting of the [update `/settings` endpoint](/reference/api/settings?utm_campaign=vector-search&utm_source=docs&utm_medium=vector-search-guide) to configure an [OpenAI](https://platform.openai.com/) embedder: ```sh curl \ @@ -58,7 +58,7 @@ curl \ -H 'Content-Type: application/json' \ --data-binary '{ "embedders": { - "default": { + "openai": { "source": "openAi", "apiKey": "OPEN_AI_API_KEY", "model": "text-embedding-3-small", @@ -91,7 +91,7 @@ curl \ --data-binary '{ "q": "kitchen utensils made of wood", "hybrid": { - "embedder": "default", + "embedder": "openai", "semanticRatio": 0.7 } }' diff --git a/reference/api/search.mdx b/reference/api/search.mdx index af9ac67f8..464487421 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -1181,7 +1181,7 @@ Configures Meilisearch to return search results based on a query's meaning and c `hybrid` must be an object. It accepts two fields: `embedder` and `semanticRatio`. -`embedder` must be a string indicating an embedder configured with the `/settings` endpoint. If you don't specify an embedder and your index contains a single embedder, Meilisearch uses it by default. If an index contains multiple embedders, Meilisearch will use the embedder named `default`. +`embedder` must be a string indicating an embedder configured with the `/settings` endpoint. It is mandatory to specify a valid embedder when performing AI-powered searches. `semanticRatio` must be a number between `0.0` and `1.0` indicating the proportion between keyword and semantic search results. `0.0` causes Meilisearch to only return keyword results. `1.0` causes Meilisearch to only return meaning-based results. Defaults to `0.5`. @@ -1205,6 +1205,12 @@ Use a custom vector to perform a search query. Must be an array of numbers corre `vector` dimensions must match the dimensions of the embedder. + +If a query does not specify `q`, but contains both `vector` and `hybrid.semanticRatio` bigger than `0`, Meilisearch performs a pure semantic search. + +If `q` is missing and `semanticRatio` is explicitly set to `0`, Meilisearch performs a placeholder search without any vector search results. + + #### Example diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index dfaba3a7e..427dedf15 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -2183,12 +2183,14 @@ These embedder objects may contain the following fields: | **`url`** | String | `http://localhost:11434/api/embeddings` | The URL Meilisearch contacts when querying the embedder | | **`apiKey`** | String | Empty | Authentication token Meilisearch should send with each request to the embedder. If not present, Meilisearch will attempt to read it from environment variables | | **`model`** | String | Empty | The model your embedder uses when generating vectors | -| **`documentTemplate`** | String | `{% for field in fields %}{{field.name}}: {{field.value}}\n{% endfor %}` | Template defining the data Meilisearch sends the embedder | +| **`documentTemplate`** | String | `{% for field in fields %} {% if field.is_searchable and not field.value == nil %}{{ field.name }}: {{ field.value }} {% endif %} {% endfor %}` | Template defining the data Meilisearch sends to the embedder | +| **`documentTemplateMaxBytes`** | Integer | `400` | Maximum allowed size of rendered document template | | **`dimensions`** | Integer | Empty | Number of dimensions in the chosen model. If not supplied, Meilisearch tries to infer this value | | **`revision`** | String | Empty | Model revision hash | | **`distribution`** | Object | Empty | Describes the natural distribution of search results. Must contain two fields, `mean` and `sigma`, each containing a numeric value between `0` and `1` | | **`request`** | Object | Empty | A JSON value representing the request Meilisearch makes to the remote embedder | | **`response`** | Object | Empty | A JSON value representing the request Meilisearch expects from the remote embedder | +| **`binaryQuantized`** | Boolean | Empty | Once set to `true`, irreversibly converts all vector dimensions to 1-bit values | ### Get embedder settings @@ -2242,6 +2244,7 @@ Partially update the embedder settings for an index. When this setting is update "apiKey": , "model": , "documentTemplate": , + "documentTemplateMaxBytes": , "dimensions": , "revision": , "distribution": { @@ -2250,7 +2253,8 @@ Partially update the embedder settings for an index. When this setting is update }, "request": { … }, "response": { … }, - "headers": { … } + "headers": { … }, + "binaryQuantized": } } ``` @@ -2295,7 +2299,7 @@ This field is incompatible with `huggingFace` and `userProvided` embedders. The model your embedder uses when generating vectors. These are the officially supported models Meilisearch supports: -- `openAi`: `openai-text-embedding-ada-002`, `text-embedding-3-small`, and `text-embedding-3-large` +- `openAi`: `text-embedding-3-small`, `text-embedding-3-large`, `openai-text-embedding-ada-002` - `huggingFace`: `BAAI/bge-base-en-v1.5` Other models, such as [HuggingFace's BERT models](https://huggingface.co/models?other=bert) or those provided by Ollama and REST embedders may also be compatible with Meilisearch. @@ -2313,12 +2317,25 @@ This field is incompatible with `rest` and `userProvided` embedders. You may use the following context values: - `{{doc.FIELD}}`: `doc` stands for the document itself. `FIELD` must correspond to an attribute present on all documents value will be replaced by the value of that field in the input document -- `{{fields}}`: a list of all the `field`s appearing in any document in the index. Each `field` object in this list has two properties: `name` and `value`. If a `field` does not exist in a document, `value` is `nil` +- `{{fields}}`: a list of all the `field`s appearing in any document in the index. Each `field` object in this list has the following properties: + - `name`: the field's attribute + - `value`: the field's value + - `is_searchable`: whether the field is present in the searchable attributes list -For best results, build short templates that only contain highly relevant data. If working with a long field, consider [truncating it](https://shopify.github.io/liquid/filters/truncatewords/). If you do not manually set it, `documentTemplate` will include all document fields. This may lead to suboptimal performance and relevancy. +If a `field` does not exist in a document, its `value` is `nil`. + +For best results, build short templates that only contain highly relevant data. If working with a long field, consider [truncating it](https://shopify.github.io/liquid/filters/truncatewords/). If you do not manually set it, `documentTemplate` will include all searchable and non-null document fields. This may lead to suboptimal performance and relevancy. This field is optional but strongly encouraged for all embedders. +##### `documentTemplateMaxBytes` + +The maximum size of a rendered document template. Longer texts are truncated to fit the configured limit. + +`documentTemplateMaxBytes` must be an integer. It defaults to `400`. + +This field is optional for all embedders. + ##### `dimensions` Number of dimensions in the chosen model. If not supplied, Meilisearch tries to infer this value. @@ -2460,6 +2477,14 @@ This field is optional when using the `rest` embedder. This field is incompatible with all other embedders. +##### `binaryQuantized` + +When set to `true`, compresses vectors by representing each of its dimensions with 1-bit values. This reduces relevancy of semantic search results, but greatly reduces database size. + + +**Activating `binaryQuantized` is irreversible.** Once enabled, Meilisearch converts all vectors and discards all vector data that does fit within 1-bit. The only way to recover the vectors' original values is to re-vectorize the whole index in a new embedder. + + #### Example From 578a2af94993e49903a147debd035d8321503697 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 9 Oct 2024 13:03:17 +0200 Subject: [PATCH 05/11] v1.11: Federated search facets (#3012) --------- Co-authored-by: Louis Dureuil --- reference/api/multi_search.mdx | 73 ++++++++++++++++++++++++++++++++ reference/errors/error_codes.mdx | 28 ++++++++++++ 2 files changed, 101 insertions(+) diff --git a/reference/api/multi_search.mdx b/reference/api/multi_search.mdx index f7565e363..a5731c48f 100644 --- a/reference/api/multi_search.mdx +++ b/reference/api/multi_search.mdx @@ -34,9 +34,82 @@ Use `federation` to receive a single list with all search results from all speci | :--------------------------------------------------------------------------- | :--------------- | :------------ | :-------------------------------------------------- | | **[`offset`](/reference/api/search#offset)** | Integer | `0` | Number of documents to skip | | **[`limit`](/reference/api/search#limit)** | Integer | `20` | Maximum number of documents returned | +| **[`facetsByIndex`](/#facetsbyindex)** | Object of arrays | `null` | Display facet information for the specified indexes | +| **[`mergeFacets`](/#mergefacets)** | Object | `null` | Display facet information for the specified indexes | If `federation` is missing or `null`, Meilisearch returns a list of multiple search result objects, with each item from the list corresponding to a search query in the request. +##### `facetsByIndex` + +`facetsByIndex` must be an object. Its keys must correspond to indexes in your Meilisearch project. Each key must be associated with an array of attributes in the filterable attributes list of that index: + +```json +"facetsByIndex": { + "INDEX_A": ["ATTRIBUTE_X", "ATTRIBUTE_Y"], + "INDEX_B": ["ATTRIBUTE_Z"] +} +``` + +When you specify `facetsByIndex`, multi-search responses include an extra `facetsByIndex` field. The response's `facetsByIndex` is an object with one field for each queried index: + +```json +{ + "hits" [ … ], + … + "facetsByIndex": { + "INDEX_A": { + "distribution": { + "ATTRIBUTE_X": { + "KEY": , + "KEY": , + … + }, + "ATTRIBUTE_Y": { + "KEY": , + … + } + }, + "stats": { + "KEY": { + "min": , + "max": + } + } + }, + "INDEX_B": { + … + } + } +} +``` + +##### `mergeFacets` + +`mergeFacets` must be an object and may contain the following fields: + +- `maxValuesPerFacet`: must be an integer. When specified, indicates the maximum number of returned values for a single facet. Defaults to the value assigned to [the `maxValuesPerFacet` index setting](/reference/api/settings#faceting) + +When both `facetsByIndex` and `mergeFacets` are present and not null, facet information included in multi-search responses is merged across all queried indexes. Instead of `facetsByIndex`, the response includes two extra fields: `facetDistribution` and `facetStats`: + +```json +{ + "hits": [ … ], + … + "facetFederation": { + "ATTRIBUTE": { + "VALUE": , + "VALUE": + } + }, + "facetStats": { + "ATTRIBUTE": { + "min": , + "max": + } + } +} +``` + ##### Merge algorithm for federated searches Federated search's merged results are returned in decreasing ranking score. To obtain the final list of results, Meilisearch compares with the following procedure: diff --git a/reference/errors/error_codes.mdx b/reference/errors/error_codes.mdx index ae5076c9f..cf3cb53ee 100644 --- a/reference/errors/error_codes.mdx +++ b/reference/errors/error_codes.mdx @@ -213,6 +213,34 @@ A multi-search query contains a negative value for `federated.weight`. Two or more queries in a multi-search request have incompatible results. +## `invalid_multi_search_facets` + +`federation.facetsByIndex.` contains a value that is not in the filterable attributes list. + +## `invalid_multi_search_sort_facet_values_by` + +`federation.mergeFacets.sortFacetValuesBy` is not a string or doesn’t have one of the allowed values. + +## `invalid_multi_search_query_facets` + +A query in the queries array contains `facets` when federation is present and non-`null`. + +## `invalid_multi_search_merge_facets` + +`federation.mergeFacets` is not an object or contains unexpected fields. + +## `invalid_multi_search_max_values_per_facet` + +`federation.mergeFacets.maxValuesPerFacet` is not a positive integer. + +## `invalid_multi_search_facet_order` + +Two or more indexes have a different `faceting.sortFacetValuesBy` for the same requested facet. + +## `invalid_multi_search_facets_by_index` + +`facetsByIndex` is not an object or contains unknown fields. + ## `invalid_search_attributes_to_crop` The [`attributesToCrop`](/reference/api/search#attributes-to-crop) parameter is invalid. It should be an array of strings, a string, or set to `null`. From 452c18d720a21a2366ffc6a998a9c06ce0e11a35 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 9 Oct 2024 14:09:06 +0200 Subject: [PATCH 06/11] v1.11: Add `STARTS WITH` to filter expression reference (#3019) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Clément Renault --- .../filter_expression_reference.mdx | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/learn/filtering_and_sorting/filter_expression_reference.mdx b/learn/filtering_and_sorting/filter_expression_reference.mdx index 1670d4118..b78675b5e 100644 --- a/learn/filtering_and_sorting/filter_expression_reference.mdx +++ b/learn/filtering_and_sorting/filter_expression_reference.mdx @@ -161,7 +161,7 @@ NOT genres IN [horror, comedy] `CONTAINS` filters results containing partial matches to the specified string pattern, similar to a [SQL `LIKE`](https://dev.mysql.com/doc/refman/8.4/en/string-comparison-functions.html#operator_like). -The following expression returns all dairy products whose name start with `"kef"`, such as kefir: +The following expression returns all dairy products whose names contain `"kef"`: ``` dairy_products.name CONTAINS kef @@ -185,6 +185,40 @@ curl \ "containsFilter": true }' ``` + +This will also enable the [`STARTS WITH`](#starts_with) operator. + + +### `STARTS WITH` + +`STARTS WITH` filters results whose values start with the specified string pattern. + +The following expression returns all dairy products whose name start with `"kef"`: + +``` +dairy_products.name STARTS WITH kef +``` + +The negated form of the above expression can be written as: + +``` +dairy_products.name NOT STARTS WITH kef +NOT dairy_product.name STARTS WITH kef +``` + + +This is an experimental feature. Use the experimental features endpoint to activate it: + +```sh +curl \ + -X PATCH 'http://localhost:7700/experimental-features/' \ + -H 'Content-Type: application/json' \ + --data-binary '{ + "containsFilter": true + }' +``` + +This will also enable the [`CONTAINS`](#contains) operator. ### `NOT` From 71d4ac6bb1c9d5c36047c60defeea346debe5046 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Thu, 10 Oct 2024 11:24:18 +0200 Subject: [PATCH 07/11] update telemetry table (#3022) --- learn/resources/telemetry.mdx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/learn/resources/telemetry.mdx b/learn/resources/telemetry.mdx index 941177719..c0ef6115c 100644 --- a/learn/resources/telemetry.mdx +++ b/learn/resources/telemetry.mdx @@ -240,13 +240,14 @@ This list is liable to change with every new version of Meilisearch. It's not be | `vector_store` | `true` if the vector store feature is enabled, otherwise `false` | true | `attributes_to_search_on.total_number_of_uses` | `true` if the vector store feature is enabled, otherwise `false` | true | `vector.max_vector_size` | Highest number of dimensions given for the `vector` parameter in this batch | 1536 -| `vector.retrieve_vectors` | true if the retrieve_vectors parameter has been used in this batch. | false | +| `vector.retrieve_vectors` | true if the retrieve_vectors parameter has been used in this batch. | false | `hybrid.enabled` | `true` if hybrid search been used in the aggregated event, otherwise `false` | true | `hybrid.semantic_ratio` | `true` if semanticRatio was used in this batch, otherwise false | false -| `hybrid.embedder` | `true` if a specific embedder was used in this batch, otherwise false | true | `embedders.total` | Numbers of defined embedders | 2 | `embedders.sources` | An array representing the different provided sources | [”huggingFace”, “userProvided”] | `embedders.document_template_used` | A boolean indicating if one of the provided embedders has a custom template defined | true +| `embedders.document_template_max_bytes` | a value indicating the largest value for document TemplateMaxBytes across all embedder | 400 +| `embedders.binary_quantization_used` | `true` if the user updated the binary quantized field of the embedded settings | `false` | `infos.task_queue_webhook` | `true` if the instance is launched with a task queue webhook, otherwise `false` | `false` | `infos.experimental_search_queue_size` | Size of the search queue | 750 | `locales` | List of locales used with `/search` and `/settings` routes | [”fra”, “eng”] From 2280e4d486d7acf161f6ee975560ae272ebbaba7 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 16 Oct 2024 15:10:13 +0200 Subject: [PATCH 08/11] add support for bigrams (#3018) --- .code-samples.meilisearch.yaml | 2 +- reference/api/search.mdx | 3 +-- reference/api/settings.mdx | 4 +++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 8d4fe3615..975296ad7 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -1373,7 +1373,7 @@ search_parameter_reference_locales_1: |- -X POST 'http://localhost:7700/indexes/INDEX_NAME/search' \ -H 'Content-Type: application/json' \ --data-binary '{ - "q": "進撃の巨人", + "q": "QUERY TEXT IN JAPANESE", "locales": ["jpn"] }' get_localized_attribute_settings_1: |- diff --git a/reference/api/search.mdx b/reference/api/search.mdx index 464487421..0de79b11d 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -1254,7 +1254,7 @@ Return document embedding data with search results. If `true`, Meilisearch will ### Query locales **Parameter**: `locales`
-**Expected value**: array of [supported ISO-639-2B locales](/reference/api/settings#localized-attributes-object)
+**Expected value**: array of [supported ISO-639 locales](/reference/api/settings#localized-attributes-object)
**Default value**: `[]` By default, Meilisearch auto-detects the language of a query. Use this parameter to explicitly state the language of a query. @@ -1281,7 +1281,6 @@ For full control over the way Meilisearch detects languages during indexing and { "id": 0, "title": "DOCUMENT NAME", - "overview_cn": "OVERVIEW TEXT IN CHINESE", "overview_jp": "OVERVIEW TEXT IN JAPANESE" } … diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index 427dedf15..4b73d6221 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -815,7 +815,9 @@ Locale objects must have the following fields: #### `locales` -Meilisearch supports the following `locales`: `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`. +Meilisearch supports the following [ISO-639-3](https://iso639-3.sil.org/) three-letter `locales`: `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`. + +You may alternatively use [ISO-639-1 two-letter equivalents](https://iso639-3.sil.org/code_tables/639/data) to the supported `locales`. You may also assign an empty array to `locales`. In this case, Meilisearch will auto-detect the language of the associated `attributePatterns`. From 8fb4fd620f105d88e9fa5631c02ad92c8cd078fd Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 16 Oct 2024 18:01:25 +0200 Subject: [PATCH 09/11] fix broken links --- learn/filtering_and_sorting/filter_expression_reference.mdx | 2 +- reference/api/multi_search.mdx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/learn/filtering_and_sorting/filter_expression_reference.mdx b/learn/filtering_and_sorting/filter_expression_reference.mdx index b78675b5e..76c40e5cb 100644 --- a/learn/filtering_and_sorting/filter_expression_reference.mdx +++ b/learn/filtering_and_sorting/filter_expression_reference.mdx @@ -186,7 +186,7 @@ curl \ }' ``` -This will also enable the [`STARTS WITH`](#starts_with) operator. +This will also enable the [`STARTS WITH`](#starts-with) operator. ### `STARTS WITH` diff --git a/reference/api/multi_search.mdx b/reference/api/multi_search.mdx index a5731c48f..858cd7d07 100644 --- a/reference/api/multi_search.mdx +++ b/reference/api/multi_search.mdx @@ -34,8 +34,8 @@ Use `federation` to receive a single list with all search results from all speci | :--------------------------------------------------------------------------- | :--------------- | :------------ | :-------------------------------------------------- | | **[`offset`](/reference/api/search#offset)** | Integer | `0` | Number of documents to skip | | **[`limit`](/reference/api/search#limit)** | Integer | `20` | Maximum number of documents returned | -| **[`facetsByIndex`](/#facetsbyindex)** | Object of arrays | `null` | Display facet information for the specified indexes | -| **[`mergeFacets`](/#mergefacets)** | Object | `null` | Display facet information for the specified indexes | +| **[`facetsByIndex`](#facetsbyindex)** | Object of arrays | `null` | Display facet information for the specified indexes | +| **[`mergeFacets`](#mergefacets)** | Object | `null` | Display facet information for the specified indexes | If `federation` is missing or `null`, Meilisearch returns a list of multiple search result objects, with each item from the list corresponding to a search query in the request. From 16b2f60fb7e514e9ee90e7826627112fc284a861 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Thu, 17 Oct 2024 11:57:54 +0200 Subject: [PATCH 10/11] v1.11: Binary quantization usage recommendation (#3027) --- learn/indexing/indexing_best_practices.mdx | 10 ++++++++++ reference/api/settings.mdx | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/learn/indexing/indexing_best_practices.mdx b/learn/indexing/indexing_best_practices.mdx index 1def9f30d..c0071fa13 100644 --- a/learn/indexing/indexing_best_practices.mdx +++ b/learn/indexing/indexing_best_practices.mdx @@ -60,3 +60,13 @@ If you have followed the previous tips in this guide and are still experiencing Indexing is a memory-intensive and multi-threaded operation. The more memory and processor cores available, the faster Meilisearch will index new documents. When trying to improve indexing speed, using a machine with more processor cores is more effective than increasing RAM. Due to how Meilisearch works, it is best to avoid HDDs (Hard Disk Drives) as they can easily become performance bottlenecks. + +## Enable binary quantization when using AI-powered search + +If you are experiencing performance issues when indexing documents for AI-powered search, consider enabling [binary quantization](/reference/api/settings#binaryquantized) for your embedders. Binary quantization compresses vectors by representing each dimension with 1-bit values. This reduces the relevancy of semantic search results, but greatly improves performance. + +Binary quantization works best with large datasets containing more than 1M documents and using models with more than 1400 dimensions. + + +**Activating binary quantization is irreversible.** Once enabled, Meilisearch converts all vectors and discards all vector data that does fit within 1-bit. The only way to recover the vectors' original values is to re-vectorize the whole index in a new embedder. + diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index eca3ad56d..0dad732be 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -2483,7 +2483,9 @@ This field is incompatible with all other embedders. ##### `binaryQuantized` -When set to `true`, compresses vectors by representing each of its dimensions with 1-bit values. This reduces relevancy of semantic search results, but greatly reduces database size. +When set to `true`, compresses vectors by representing each dimension with 1-bit values. This reduces the relevancy of semantic search results, but greatly reduces database size. + +This option can be useful when working with large Meilisearch projects. Consider activating it if your project contains more than one million documents and uses models with more than 1400 dimensions. **Activating `binaryQuantized` is irreversible.** Once enabled, Meilisearch converts all vectors and discards all vector data that does fit within 1-bit. The only way to recover the vectors' original values is to re-vectorize the whole index in a new embedder. From 75567ed37aad070fd3d2ca4b3238047fd8b4a9d6 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Thu, 24 Oct 2024 11:06:11 +0200 Subject: [PATCH 11/11] add missing `embedder` fields where it is now mandatory --- .code-samples.meilisearch.yaml | 10 +++++++--- reference/api/similar.mdx | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 975296ad7..aed75ccdc 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -1248,7 +1248,10 @@ search_parameter_guide_hybrid_1: |- search_parameter_guide_vector_1: |- curl -X POST 'localhost:7700/indexes/INDEX_NAME/search' \ -H 'content-type: application/json' \ - --data-binary '{ "vector": [0, 1, 2] }' + --data-binary '{ + "vector": [0, 1, 2], + "embedder": "EMBEDDER_NAME" + }' get_search_cutoff_1: |- curl \ -X GET 'http://localhost:7700/indexes/movies/settings/search-cutoff-ms' @@ -1355,11 +1358,12 @@ get_similar_post_1: |- -H 'Content-Type: application/json' \ -H 'Authorization: Bearer DEFAULT_SEARCH_API_KEY' \ --data-binary '{ - "id": TARGET_DOCUMENT_ID + "id": TARGET_DOCUMENT_ID, + "embedder": "EMBEDDER_NAME" }' get_similar_get_1: |- curl \ - -X GET 'http://localhost:7700/indexes/INDEX_NAME/similar?id=TARGET_DOCUMENT_ID' + -X GET 'http://localhost:7700/indexes/INDEX_NAME/similar?id=TARGET_DOCUMENT_ID&embedder=EMBEDDER_NAME' search_parameter_reference_ranking_score_threshold_1: |- curl \ -X POST 'http://localhost:7700/indexes/INDEX_NAME/search' \ diff --git a/reference/api/similar.mdx b/reference/api/similar.mdx index f2ac0f540..bb23ca0f9 100644 --- a/reference/api/similar.mdx +++ b/reference/api/similar.mdx @@ -26,7 +26,7 @@ Retrieve documents similar to a specific search result. | Parameter | Type | Default value | Description | | ---------------------------------------------------------------------------- | ---------------- | ------------- | ---------------------------------------------- | | **`id`** | String or number | `null` | Identifier of the target document (mandatory) | -| **[`embedder`](/reference/api/search#hybrid-search-experimental)** | String | `"default"` | Embedder to use when computing recommendations | +| **[`embedder`](/reference/api/search#hybrid-search-experimental)** | String | `"default"` | Embedder to use when computing recommendations. Mandatory | | **[`attributesToRetrieve`](/reference/api/search#attributes-to-retrieve)** | Array of strings | `["*"]` | Attributes to display in the returned documents| | **[`offset`](/reference/api/search#offset)** | Integer | `0` | Number of documents to skip | | **[`limit`](/reference/api/search#limit)** | Integer | `20` | Maximum number of documents returned | @@ -83,7 +83,7 @@ Retrieve documents similar to a specific search result. | Parameter | Type | Default value | Description | | ---------------------------------------------------------------------------- | ---------------- | ------------- | ---------------------------------------------- | | **`id`** | String or number | `null` | Identifier of the target document (mandatory) | -| **[`embedder`](/reference/api/search#hybrid-search-experimental)** | String | `"default"` | Embedder to use when computing recommendations | +| **[`embedder`](/reference/api/search#hybrid-search-experimental)** | String | `"default"` | Embedder to use when computing recommendations. Mandatory | | **[`attributesToRetrieve`](/reference/api/search#attributes-to-retrieve)** | Array of strings | `["*"]` | Attributes to display in the returned documents| | **[`offset`](/reference/api/search#offset)** | Integer | `0` | Number of documents to skip | | **[`limit`](/reference/api/search#limit)** | Integer | `20` | Maximum number of documents returned |