From b2ff61fa4c2aed981cdef28e88d9ddd0218f310f Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:53:15 +0800 Subject: [PATCH] Added code examples (#1999) ### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ Issue link:#1915 ### Type of change - [x] Documentation Update --- .../CONTRIBUTING.md => CONTRIBUTING.md | 7 +- docs/getstarted/build_from_source.mdx | 2 +- docs/getstarted/deploy_infinity_server.mdx | 14 +- docs/references/http_api_reference.mdx | 45 +++- docs/references/pysdk_api_reference.md | 197 +++++++++++------- 5 files changed, 170 insertions(+), 95 deletions(-) rename docs/references/CONTRIBUTING.md => CONTRIBUTING.md (96%) diff --git a/docs/references/CONTRIBUTING.md b/CONTRIBUTING.md similarity index 96% rename from docs/references/CONTRIBUTING.md rename to CONTRIBUTING.md index 248479bc84..f8e22a8fb9 100644 --- a/docs/references/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,11 +1,6 @@ ---- -sidebar_position: 0 -slug: /contribution_guidelines ---- - # Contribution Guidelines -Thanks for wanting to contribute to Infinity. This document offers guidelines and major considerations for submitting your contributions. +Thanks for wanting to contribute to Infinity. This document offers guidelines and major considerations for submitting your contributions. - To report a bug, file a [GitHub issue](https://github.com/infiniflow/infinity/issues/new/choose) with us. - For further questions, you can explore existing discussions or initiate a new one in [Discussions](https://github.com/orgs/infiniflow/discussions). diff --git a/docs/getstarted/build_from_source.mdx b/docs/getstarted/build_from_source.mdx index 5254c3042f..1d8d312c78 100644 --- a/docs/getstarted/build_from_source.mdx +++ b/docs/getstarted/build_from_source.mdx @@ -51,7 +51,7 @@ The `cmake` build type (`CMAKE_BUILD_TYPE`) can be one of the following: Optimizes with `-O3`; without symbol information. The built executables are significantly smaller than those of `RelWithDebInfo`. -:::note +:::tip NOTE The following procedures set `CMAKE_BUILD_TYPE` to `Debug`. Change it as you see necessary. ::: diff --git a/docs/getstarted/deploy_infinity_server.mdx b/docs/getstarted/deploy_infinity_server.mdx index 2112200e0c..37a2798d57 100644 --- a/docs/getstarted/deploy_infinity_server.mdx +++ b/docs/getstarted/deploy_infinity_server.mdx @@ -41,8 +41,8 @@ import infinity_embedded infinity_obj = infinity_embedded.connect("absolute/path/to/save/to") ``` -:::note -For detailed information about the Python API, see the [Python API Reference](../references/pysdk_api_reference.md). +:::tip NOTE +For detailed information about the capabilities and usage of Infinity's Python API, see the [Python API Reference](../references/pysdk_api_reference.md). ::: @@ -117,8 +117,8 @@ res = table_object.output(["*"]) print(res) ``` -:::note -For detailed information about the Python API, see the [Python API Reference](../references/pysdk_api_reference.md). +:::tip NOTE +For detailed information about the capabilities and usage of Infinity's Python API, see the [Python API Reference](../references/pysdk_api_reference.md). ::: ## Deploy Infinity using binary @@ -180,7 +180,7 @@ pip install infinity-sdk==0.4.0.dev2 ```python import infinity -infinity_obj = infinity.connect(infinity.NetworkAddress("", 23817)) +infinity_object = infinity.connect(infinity.NetworkAddress("", 23817)) db_object = infinity_object.get_database("default_db") table_object = db_object.create_table("my_table", {"num": {"type": "integer"}, "body": {"type": "varchar"}, "vec": {"type": "vector, 4, float"}}) table_object.insert([{"num": 1, "body": "unnecessary and harmful", "vec": [1.0, 1.2, 0.8, 0.9]}]) @@ -191,7 +191,7 @@ res = table_object.output(["*"]) print(res) ``` -:::note -For detailed information about the Python API, see the [Python API Reference](../references/pysdk_api_reference.md). +:::tip NOTE +For detailed information about the capabilities and usage of Infinity's Python API, see the [Python API Reference](../references/pysdk_api_reference.md). ::: diff --git a/docs/references/http_api_reference.mdx b/docs/references/http_api_reference.mdx index a93c8ee260..d610dd28f3 100644 --- a/docs/references/http_api_reference.mdx +++ b/docs/references/http_api_reference.mdx @@ -822,7 +822,7 @@ Creates an index on a specified table. If an index with the same name exists, th #### Request example -- Creates an HNSW index on a vector column: +- Creates an HNSW index on a dense vector or a multivector column: ```shell curl --request POST \ @@ -833,7 +833,7 @@ curl --request POST \ { "fields": [ - "vector_column" + "dense_column" ], "index": { @@ -868,7 +868,7 @@ curl --request POST \ } ' ``` -- Creates a BMP index on a tensor column +- Creates a BMP index on a sparse column ```shell curl --request POST \ @@ -879,10 +879,11 @@ curl --request POST \ { "fields": [ - "vector_column" + "sparse_column" ], "index": { + "type": "BMP", "block_size": "16", "compress_type": "raw" }, @@ -890,6 +891,27 @@ curl --request POST \ } ' ``` +- Creates a secondary index on a varchar column + +```shell +curl --request POST \ + --url http://localhost:23820/databases/{database_name}/tables/{table_name}/indexes/{index_name} \ + --header 'accept: application/json' \ + --header 'content-type: application/json' \ + --data ' + { + "fields": + [ + "varchar_column" + ], + "index": + { + "type": "Secondary" + }, + "create_option": "ignore_if_exists" + } ' +``` + #### Request parameters - `database_name`: (*Path parameter*) @@ -1758,9 +1780,12 @@ curl --request GET \ "match_method": "sparse", "fields": "sparse_column", "query_vector": {"10":1.1, "20":2.2, "30": 3.3}, - "metric_type": "l2", + "metric_type": "ip", "topn": 3, - "params": {"ef": "150"} + "params": { + "alpha": "1.0", + "beta": "1.0" + } }, { "fusion_method": "rrf", @@ -1832,9 +1857,11 @@ curl --request GET \ - `"uint8"`. - `metric_type`: `string`, *Required* The distance metric to use in similarity search. Used *only* when `"match_method"` is set to `"dense"` or `"sparse"`. - - `"ip"`: Inner product. - - `"l2"`: Euclidean distance. - - `"cosine"`: Cosine similarity. + - When `"match_method"` is set to `"dense"`: + - `"ip"`: Inner product. + - `"l2"`: Euclidean distance. + - `"cosine"`: Cosine similarity. + - When `"match_method"` is set to `"sparse"`, only `"ip"` can be used. - `"fusion_method"`: (*Body parameter*), `enum`, *Required when you have specified multiple matching methods* - `"rrf"`: [Reciprocal rank fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) RRF is a method for combining multiple result sets with varying relevance indicators into a single result set. It requires no tuning, and the relevance indicators need not be related to achieve high-quality results. RRF is particularly useful when you are uncertain of the relative importance of each retrieval way. diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 268d4efbed..726175d252 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -74,7 +74,7 @@ This allows for bug fixes without requiring changes to the configuration file. ### Returns -- Success: An `infinity.local_infinity.infinity.LocalInfinityConnection` object in Python module mode or an `infinity.remote_thrift.infinity.RemoteThriftInfinityConnection` object in client-server mode. +- Success: An `infinity.local_infinity.infinity.LocalInfinityConnection` object in embedded mode or an `infinity.remote_thrift.infinity.RemoteThriftInfinityConnection` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` - A non-zero value indicating a specific error condition. - `error_msg`: `str` - A message providing additional details about the error. @@ -169,7 +169,7 @@ If `ConflictType` is not set, it defaults to `Error`. ### Returns -- Success: An `infinity.local_infinity.db.LocalDatabase` object in Python module mode or an `infinity.remote_thrift.db.RemoteDatabase` object in client-server mode. +- Success: An `infinity.local_infinity.db.LocalDatabase` object in embedded mode or an `infinity.remote_thrift.db.RemoteDatabase` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` - A non-zero value indicating a specific error condition. - `error_msg`: `str` - A message providing additional details about the error. @@ -265,7 +265,7 @@ infinity_object.drop_database("my_database", ConflictType.Ignore) ## list_databases ```python -Infinity.list_databases() +infinity_object.list_databases() ``` Retrieves a list of all available databases within the Infinity system. @@ -293,7 +293,7 @@ print(res.db_names) # ['my_database', 'database_1'] ## get_database ```python -Infinity.get_database(database_name) +infinity_object.get_database(database_name) ``` Retrieves a database object by its name. @@ -306,7 +306,7 @@ A non-empty string indicating the name of the database to retrieve. ### Returns -- Success: An `infinity.local_infinity.db.LocalDatabase` object in Python module mode or an `infinity.remote_thrift.db.RemoteDatabase` object in client-server mode. +- Success: An `infinity.local_infinity.db.LocalDatabase` object in embedded mode or an `infinity.remote_thrift.db.RemoteDatabase` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` - A non-zero value indicating a specific error condition. - `error_msg`: `str` - A message providing additional details about the error. @@ -449,7 +449,7 @@ If `ConflictType` is not set, it defaults to `Error`. ### Returns -- Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_infinity.table.RemoteTable` object in client-server mode. +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_infinity.table.RemoteTable` object in client-server mode. - Failure: `InfinityException`: - `error_code`: `int` - A non-zero value indicating a specific error condition. - `error_msg`: `str` - A message providing additional details about the error. @@ -501,7 +501,7 @@ You can build a HNSW index on the vector column to speed up the match_dense sear # - `vector`: The column is a vector column # - `128`: The vector dimension # - `float`: The primitive data type of the vectors. Can be `float`/`float32`, `float16`, `bfloat16`, `uint8` or `int8` -db_object.create_table("my_table", {"c1": {"type": "vector,128,float"}}, None) +db_object.create_table("my_table", {"c1": {"type": "vector,128,float"}}) ``` @@ -516,7 +516,7 @@ You can build an HNSW index on the multi-vector column to accelerate match_dense # - `multivector`: The column is a multi-vector column # - `128`: The basic vector dimension # - `float`: The primitive data type of the basic vectors. Can be `float`/`float32`, `float16`, `bfloat16`, `uint8` or `int8` -db_object.create_table("my_table", {"c1": {"type": "multivector,128,float"}}, None) +db_object.create_table("my_table", {"c1": {"type": "multivector,128,float"}}) ``` @@ -625,6 +625,91 @@ db_object.drop_table("my_table", ConflictType.Ignore) --- +## add_columns + +```python +table_object.add_columns(column_defs) +``` + +### Parameters + +#### column_defs: `dict[str, dict[str, Any]]`, *Required* + +A dictionary defining the columns to add. Each key in the dictionary is a column name (`str`), with a corresponding 'value' dictionary defining the column's data type and default value. See the description of `create_table()`'s `columns_definition` for all available settings. + +:::caution NOTE +You must specify a default value each time you add a column. +::: + +### Examples + +#### Add an Integer column, a float column, and a varchar/string column at once + +```python +table_obj.add_columns({"column_name1": {"type": "integer", "default": 0}, "column_name2": {"type": "float", "default": 0.0}, "column_name3": {"type": "varchar", "default": ""}}) +``` + +#### Add a dense vector column + +```python +table_obj.add_columns({"column_name1": {"type": "vector,4,float", "default": [1.0, 1.2, 2.4, 4.6]}}) +``` + +#### Add a multivector column + +```python +table_obj.add_columns({"column_name1": {"type": "multivector,4,float", "default": [[1.0, 0.0, 0.0, 0.0], [1.2, 0.0, 0.0, 0.0]]}}) +``` + +#### Add a tensor column + +```python +table_obj.add_columns({"column_name1": {"type": "tensor,4,float", "default": [[1.0, 0.0, 0.0, 0.0], [1.2, 0.0, 0.0, 0.0]]}}) +``` + +#### Add a sparse column + +```python +from infinity.common import SparseVector +table_obj.add_columns({"column_name1": {"type": "sparse,128,float,int", "default": SparseVector([10, 20, 30], [1.1, 2.2, 3.3])}}) +``` + +Or, you can set the default value in a different format: + +```python +table_obj.add_columns({"column_name1": {"type": "sparse,128,float,int", "default": {"10":1.1, "20":2.2, "30": 3.3}}}) +``` + +--- + +## drop_columns + +```python +table_object.drop_columns(column_names) +``` + +### Parameters + +#### column_names: `list[str]`, *Required* + +A list of strings representing the names of the columns to drop. + +### Examples + +#### Remove one column from the table + +```python +table_object.drop_columns(["column_name"]) +``` + +#### Remove two columns at once + +```python +table_object.drop_columns(["column_name1", "column_name2"]) +``` + +--- + ## get_table ```python @@ -641,7 +726,7 @@ A non-empty string indicating the name of the table to retrieve. ### Returns -- Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_infinity.table.RemoteTable` object in client-server mode. +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_infinity.table.RemoteTable` object in client-server mode. - Failure: `InfinityException`: - `error_code`: `int` - A non-zero value indicating a specific error condition. - `error_msg`: `str` - A message providing additional details about the error. @@ -716,7 +801,7 @@ An `IndexInfo` structure contains three fields,`column_name`, `index_type`, and The name of the column to build index on. Must not be empty. - **index_type**: `IndexType`, *Required* Index type. You may want to import `infinity.index` to set `IndexType`: `from infinity.index import IndexType` - - `Hnsw`: An HNSW index. + - `Hnsw`: An HNSW index. Works with dense vectors, and multivectors only. - `FullText`: A full-text index. - `Secondary`: A secondary index. Works with structured data only. - `BMP`: A Block-Max Pruning index. Works with sparse vectors only. @@ -787,24 +872,24 @@ A structure containing these attributes: ### Examples -#### Create an HNSW index +#### Create an HNSW index on a dense vector column ```python {1} from infinity.index import IndexInfo, IndexType # Create a table named "test_index_hnsw" with a 1024-dimensional float vector column "c1" -table_object = db_object.create_table("test_index_hnsw", {"c1": {"type": "vector,1024,float"}}, None) +table_object = db_object.create_table("test_index_hnsw", {"c1": {"type": "vector,1024,float"}}) # Create an HNSW index named "my_index" on column "c1" with default parameter settings: # - "M": "16", # - "ef_construction": "50", # - "encode": "plain" # Only the "metric" parameter (required) is explicitly set to L2 distance. -table_object.create_index("my_index",IndexInfo("c1", IndexType.Hnsw, {"metric": "l2"}), None) +table_object.create_index("my_index",IndexInfo("c1", IndexType.Hnsw, {"metric": "l2"})) ``` ```python {1} from infinity.index import IndexInfo, IndexType # Create a table named "test_index_hnsw" with a 1024-dimensional float vector column "c1" -table_object = db_object.create_table("test_index_hnsw", {"c1": {"type": "vector,1024,float"}}, None) +table_object = db_object.create_table("test_index_hnsw", {"c1": {"type": "vector,1024,float"}}) # Create an HNSW index named "my_index" on column "c1" # Settings for "M", "ef_construction", and "metric" are the same as above, except: # "encoding" is set to "lvq" @@ -824,12 +909,26 @@ table_object.create_index( ) ``` +#### Create an HNSW index on a multi-vector column + +```python +from infinity.index import IndexInfo, IndexType +# Create a table named "test_index_hnsw" with a 4-dimensional float multivector column "c1" +table_object = db_object.create_table("test_index_hnsw", {"c1": {"type": "multivector,4,float"}}) +# Create an HNSW index named "my_index" on column "c1" with default parameter settings: +# - "M": "16", +# - "ef_construction": "50", +# - "encode": "plain" +# Only the "metric" parameter (required) is explicitly set to L2 distance. +table_object.create_index("my_index",IndexInfo("c1", IndexType.Hnsw, {"metric": "l2"})) +``` + #### Create a full-text index ```python from infinity.index import IndexInfo, IndexType # Create a table named "test_index_fulltext" with a varchar column "body" -table_object = db_object.create_table("test_index_fulltext", {"body": {"type": "varchar"}}, None) +table_object = db_object.create_table("test_index_fulltext", {"body": {"type": "varchar"}}) # Create a full-text index named "my_index" on column "body" with default parameter settings: # - "ANALYZER": "standard" table_object.create_index( @@ -845,7 +944,7 @@ table_object.create_index( ```python from infinity.index import IndexInfo, IndexType # Create a table named "test_index_fulltext" with a varchar column "body" -table_object = db_object.create_table("test_index_fulltext", {"body": {"type": "varchar"}}, None) +table_object = db_object.create_table("test_index_fulltext", {"body": {"type": "varchar"}}) # Create a full-text index named "my_index" on column "body" # Setting "ANALYZER" to "standard" (same as the above) table_object.create_index( @@ -866,7 +965,7 @@ table_object.create_index( ```python {11} from infinity.index import IndexInfo, IndexType # Create a table named "test_index_secondary" with a varchar column "body" -table_object = db_object.create_table("test_index_secondary", {"c1": {"type": "varchar"}}, None) +table_object = db_object.create_table("test_index_secondary", {"c1": {"type": "varchar"}}) # Create a secondary index named "my_index" on column "c1" table_object.create_index( "my_index", @@ -883,7 +982,7 @@ table_object.create_index( ```python {13} from infinity.index import IndexInfo, IndexType # Create a table named "test_index_bmp" with a sparse vector column "c1" -table_object = db_object.create_table("test_index_bmp", {"c1": {"type": "sparse,30000,float,int16"}}, None) +table_object = db_object.create_table("test_index_bmp", {"c1": {"type": "sparse,30000,float,int16"}}) # Create a BMP index named "my_index" on column "c1" with default parameter settings: # - "block_size": "16" # - "compress_type": "compress" @@ -900,7 +999,7 @@ table_object.create_index( ```python {13,14} from infinity.index import IndexInfo, IndexType # Create a table named "test_index_bmp" with a sparse vector column "c1" -table_object = db_object.create_table("test_index_bmp", {"c1": {"type": "sparse,30000,float,int16"}}, None) +table_object = db_object.create_table("test_index_bmp", {"c1": {"type": "sparse,30000,float,int16"}}) # Create a BMP index named "my_index" on column "c1" # Settings for "block_size" and "compress_type" are the same as above table_object.create_index( @@ -1055,7 +1154,7 @@ table_instance = db_instance.create_table("primitive_table", { table_instance.insert({"c1": 1, "c7": "Tom", "c12": True}) ``` -#### Insert vectors +#### Insert dense vectors ```python # Create a table with a integer column and a 3-d vector column: @@ -1377,7 +1476,7 @@ The list must contain at least one element. Empty lists are not allowed. ### Returns -An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. :::tip NOTE This method specifies the projection columns for the current table but does not directly produce displayable data. To display the query results, use `output()` in conjunction with methods like `to_result()`, `to_df()`, `to_pl()`, or `to_arrow()` to materialize the data. @@ -1467,7 +1566,7 @@ Currently, only 'and' and 'or' logical expressions are supported. ### Returns -An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. :::tip NOTE This method specifies a filtering condition for the rows in the current table but does not directly produce displayable data. To display the query results, use `filter()` in conjunction with methods like `to_result()`, `to_df()`, `to_pl()`, or `to_arrow()` to materialize the data. @@ -1537,7 +1636,7 @@ A dictionary representing additional KNN or ANN search parameters. Currently onl ### Returns -- Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` A non-zero value indicating a specific error condition. - `error_msg`: `str` A message providing additional details about the error. @@ -1642,7 +1741,7 @@ A dictionary representing additional parameters for the sparse vector search. Fo ### Returns -- Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` A non-zero value indicating a specific error condition. - `error_msg`: `str` A message providing additional details about the error. @@ -1764,7 +1863,7 @@ An optional dictionary specifying the following search options: ### Returns -- Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` A non-zero value indicating a specific error condition. - `error_msg`: `str` A message providing additional details about the error. @@ -1857,7 +1956,7 @@ A dictionary representing additional options for the selected reranking method: ### Returns -- Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. - Failure: `InfinityException` - `error_code`: `int` A non-zero value indicating a specific error condition. - `error_msg`: `str` A message providing additional details about the error. @@ -1918,52 +2017,6 @@ table_object.output(["num", "body", "vec", "sparse_column", "year", "tensor", "_ --- -## add_columns - -```python -table_object.add_columns(column_defs) -``` - -### Parameters - -#### column_defs: `dict[str, dict[str, Any]]`, *Required* - -A dictionary defining the columns to add. Each key in the dictionary is a column name (`str`), with a corresponding 'value' dictionary defining the column's data type and default value. See the description of `create_table()`'s `columns_definition` for all available settings. - -:::caution NOTE -You must specify a default value each time you add a column. -::: - - -#### Examples -```python -# Add an integer column and a varchar column at once -table_obj.add_columns({"new_column_name1": {"type": "integer", "default": 0}, "new_column_name2": {"type": "varchar", "default": ""}}) -``` - ---- - -# drop_columns - -```python -table_object.drop_columns(column_names) -``` - -### Parameters - -#### column_names: `list[str]`, *Required* - -A list of strings representing the names of the columns to drop. - -#### Examples - -```python -# Remove two columns at once -table_object.drop_columns(["column_name1", "column_name2"]) -``` - ---- - ## to_result