From c916407f371cddc9bf67bb724dc8d21b58006e81 Mon Sep 17 00:00:00 2001 From: wt Date: Mon, 9 Sep 2024 19:55:06 +0800 Subject: [PATCH] test: add query expr test cases (#36073) 1. query with expr under different scalar index types 2. test framework supports preparing one piece of data and multiple parameter queries Signed-off-by: wangting0128 --- tests/python_client/base/client_base.py | 71 ++- tests/python_client/common/code_mapping.py | 4 + tests/python_client/common/common_func.py | 67 +++ tests/python_client/common/common_params.py | 14 + .../testcases/test_mix_scenes.py | 524 ++++++++++++++++++ tests/scripts/ci_e2e_4am.sh | 4 +- 6 files changed, 676 insertions(+), 8 deletions(-) create mode 100644 tests/python_client/testcases/test_mix_scenes.py diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index 227ef57c5a180..96bc7a9257500 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -18,7 +18,7 @@ from common import common_type as ct from common.common_params import IndexPrams -from pymilvus import ResourceGroupInfo +from pymilvus import ResourceGroupInfo, DataType class Base: @@ -35,6 +35,7 @@ class Base: resource_group_list = [] high_level_api_wrap = None skip_connection = False + def setup_class(self): log.info("[setup_class] Start setup class...") @@ -44,6 +45,9 @@ def teardown_class(self): def setup_method(self, method): log.info(("*" * 35) + " setup " + ("*" * 35)) log.info("[setup_method] Start setup test case %s." % method.__name__) + self._setup_objects() + + def _setup_objects(self): self.connection_wrap = ApiConnectionsWrapper() self.utility_wrap = ApiUtilityWrapper() self.collection_wrap = ApiCollectionWrapper() @@ -57,7 +61,9 @@ def setup_method(self, method): def teardown_method(self, method): log.info(("*" * 35) + " teardown " + ("*" * 35)) log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + self._teardown_objects() + def _teardown_objects(self): try: """ Drop collection before disconnect """ if not self.connection_wrap.has_connection(alias=DefaultConfig.DEFAULT_USING)[0]: @@ -80,7 +86,8 @@ def teardown_method(self, method): rgs_list = self.utility_wrap.list_resource_groups()[0] for rg_name in self.resource_group_list: if rg_name is not None and rg_name in rgs_list: - rg = self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0] + rg = \ + self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0] if isinstance(rg, ResourceGroupInfo): if rg.num_available_node > 0: self.utility_wrap.transfer_node(source=rg_name, @@ -266,9 +273,9 @@ def init_collection_general(self, prefix="test", insert_data=False, nb=ct.defaul primary_field=primary_field) if vector_data_type == ct.sparse_vector: default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field, - enable_dynamic_field=enable_dynamic_field, - with_json=with_json, - multiple_dim_array=multiple_dim_array) + enable_dynamic_field=enable_dynamic_field, + with_json=with_json, + multiple_dim_array=multiple_dim_array) if is_all_data_type: default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim, primary_field=primary_field, @@ -390,7 +397,8 @@ def init_user_with_privilege(self, privilege_object, object_name, privilege, db_ self.utility_wrap.create_role() # grant privilege to the role - self.utility_wrap.role_grant(object=privilege_object, object_name=object_name, privilege=privilege, db_name=db_name) + self.utility_wrap.role_grant(object=privilege_object, object_name=object_name, privilege=privilege, + db_name=db_name) # bind the role to the user self.utility_wrap.role_add_user(tmp_user) @@ -417,3 +425,54 @@ def show_indexes(self, collection_obj: ApiCollectionWrapper = None): indexes = {n.field_name: n.params for n in self.collection_wrap.indexes} log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes)) return indexes + + +class TestCaseClassBase(TestcaseBase): + """ + Setup objects on class + """ + + def setup_class(self): + log.info("[setup_class] " + " Start setup class ".center(100, "~")) + self._setup_objects(self) + + def teardown_class(self): + log.info("[teardown_class]" + " Start teardown class ".center(100, "~")) + self._teardown_objects(self) + + def setup_method(self, method): + log.info(" setup ".center(80, "*")) + log.info("[setup_method] Start setup test case %s." % method.__name__) + + def teardown_method(self, method): + log.info(" teardown ".center(80, "*")) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + + @property + def all_scalar_fields(self): + dtypes = [DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR, DataType.BOOL, + DataType.FLOAT, DataType.DOUBLE] + dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name] + return dtype_names + + @property + def all_index_scalar_fields(self): + return list(set(self.all_scalar_fields) - {DataType.JSON.name}) + + @property + def inverted_support_dtype_names(self): + return self.all_index_scalar_fields + + @property + def inverted_not_support_dtype_names(self): + return [DataType.JSON.name] + + @property + def bitmap_support_dtype_names(self): + dtypes = [DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.BOOL, DataType.VARCHAR] + dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + return dtype_names + + @property + def bitmap_not_support_dtype_names(self): + return list(set(self.all_scalar_fields) - set(self.bitmap_support_dtype_names)) diff --git a/tests/python_client/common/code_mapping.py b/tests/python_client/common/code_mapping.py index b5254f1a1f552..8396878e5b884 100644 --- a/tests/python_client/common/code_mapping.py +++ b/tests/python_client/common/code_mapping.py @@ -38,3 +38,7 @@ class IndexErrorMessage(ExceptionsMessage): VectorMetricTypeExist = "metric type not set for vector index" CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field" CheckBitmapOnPK = "create bitmap index on primary key not supported" + + +class QueryErrorMessage(ExceptionsMessage): + ParseExpressionFailed = "failed to create query plan: cannot parse expression: " diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index b377babbfae63..e91d47f03df3a 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -21,6 +21,8 @@ from customize.milvus_operator import MilvusOperator import pickle fake = Faker() + +from common.common_params import Expr """" Methods of processing data """ @@ -1293,6 +1295,11 @@ def gen_data_by_collection_field(field, nb=None, start=None): if nb is None: return [np.float32(random.random()) for _ in range(max_capacity)] return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)] + if element_type == DataType.DOUBLE: + if nb is None: + return [np.float64(random.random()) for _ in range(max_capacity)] + return [[np.float64(random.random()) for _ in range(max_capacity)] for _ in range(nb)] + if element_type == DataType.VARCHAR: max_length = field.params['max_length'] max_length = min(20, max_length - 1) @@ -1335,6 +1342,24 @@ def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = return data +def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}) -> dict: + """ + generate default value according to the collection fields, + which can replace the value of the specified field + + return: + : + """ + data = {} + for field in schema.fields: + default_value = default_values.get(field.name, None) + if default_value is not None: + data[field.name] = default_value + elif field.auto_id is False: + data[field.name] = gen_data_by_collection_field(field, nb, start_id * nb) + return data + + def gen_json_files_for_bulk_insert(data, schema, data_dir): for d in data: if len(d) > 0: @@ -1746,6 +1771,48 @@ def gen_integer_overflow_expressions(): return expressions +def gen_modulo_expression(expr_fields): + exprs = [] + for field in expr_fields: + exprs.extend([ + (Expr.EQ(Expr.MOD(field, 10).subset, 1).value, field), + (Expr.LT(Expr.MOD(field, 17).subset, 9).value, field), + (Expr.LE(Expr.MOD(field, 100).subset, 50).value, field), + (Expr.GT(Expr.MOD(field, 50).subset, 40).value, field), + (Expr.GE(Expr.MOD(field, 29).subset, 15).value, field), + (Expr.NE(Expr.MOD(field, 29).subset, 10).value, field), + ]) + return exprs + + +def gen_varchar_expression(expr_fields): + exprs = [] + for field in expr_fields: + exprs.extend([ + (Expr.like(field, "a%").value, field, r'^a.*'), + (Expr.LIKE(field, "%b").value, field, r'.*b$'), + (Expr.AND(Expr.like(field, "%b").subset, Expr.LIKE(field, "z%").subset).value, field, r'^z.*b$'), + (Expr.And(Expr.like(field, "i%").subset, Expr.LIKE(field, "%j").subset).value, field, r'^i.*j$'), + (Expr.OR(Expr.like(field, "%h%").subset, Expr.LIKE(field, "%jo").subset).value, field, fr'(?:h.*|.*jo$)'), + (Expr.Or(Expr.like(field, "ip%").subset, Expr.LIKE(field, "%yu%").subset).value, field, fr'(?:^ip.*|.*yu)'), + ]) + return exprs + + +def gen_number_operation(expr_fields): + exprs = [] + for field in expr_fields: + exprs.extend([ + (Expr.LT(Expr.ADD(field, 23), 100).value, field), + (Expr.LT(Expr.ADD(-23, field), 121).value, field), + (Expr.LE(Expr.SUB(field, 123), 99).value, field), + (Expr.GT(Expr.MUL(field, 2), 88).value, field), + (Expr.GT(Expr.MUL(3, field), 137).value, field), + (Expr.GE(Expr.DIV(field, 30), 20).value, field), + ]) + return exprs + + def l2(x, y): return np.linalg.norm(np.array(x) - np.array(y)) diff --git a/tests/python_client/common/common_params.py b/tests/python_client/common/common_params.py index a83ccd73ca069..3013d072289f4 100644 --- a/tests/python_client/common/common_params.py +++ b/tests/python_client/common/common_params.py @@ -1,6 +1,8 @@ from dataclasses import dataclass from typing import List, Dict +from pymilvus import DataType + """ Define param names""" @@ -55,6 +57,10 @@ def subset(self): def __repr__(self): return self.expr + @property + def value(self): + return self.expr + class Expr: # BooleanConstant: 'true' | 'True' | 'TRUE' | 'false' | 'False' | 'FALSE' @@ -344,6 +350,10 @@ class DefaultScalarIndexParams: def Default(field: str): return {field: IndexPrams()} + @staticmethod + def list_default(fields: List[str]) -> Dict[str, IndexPrams]: + return {n: IndexPrams() for n in fields} + @staticmethod def Trie(field: str): return {field: IndexPrams(index_type=IndexName.Trie)} @@ -356,6 +366,10 @@ def STL_SORT(field: str): def INVERTED(field: str): return {field: IndexPrams(index_type=IndexName.INVERTED)} + @staticmethod + def list_inverted(fields: List[str]) -> Dict[str, IndexPrams]: + return {n: IndexPrams(index_type=IndexName.INVERTED) for n in fields} + @staticmethod def BITMAP(field: str): return {field: IndexPrams(index_type=IndexName.BITMAP)} diff --git a/tests/python_client/testcases/test_mix_scenes.py b/tests/python_client/testcases/test_mix_scenes.py new file mode 100644 index 0000000000000..87327c56bf0e3 --- /dev/null +++ b/tests/python_client/testcases/test_mix_scenes.py @@ -0,0 +1,524 @@ +import re +import pytest +from pymilvus import DataType + +from common.common_type import CaseLabel, CheckTasks +from common import common_type as ct +from common import common_func as cf +from common.code_mapping import QueryErrorMessage as qem +from common.common_params import ( + IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, Expr +) +from base.client_base import TestcaseBase, TestCaseClassBase + + +@pytest.mark.xdist_group("TestNoIndexDQLExpr") +class TestNoIndexDQLExpr(TestCaseClassBase): + """ + Scalar fields are not indexed, and verify DQL requests + + Author: Ting.Wang + """ + + def setup_class(self): + super().setup_class(self) + + # connect to server before testing + self._connect(self) + + # init params + self.primary_field, nb = "int64_pk", 3000 + + # create a collection with fields + self.collection_wrap.init_collection( + name=cf.gen_unique_str("test_no_index_dql_expr"), + schema=cf.set_collection_schema( + fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields], + field_params={ + self.primary_field: FieldParams(is_primary=True).to_dict, + DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict, + DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict, + DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict + }, + ) + ) + + # prepare data (> 1024 triggering index building) + self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb) + + @pytest.fixture(scope="class", autouse=True) + def prepare_data(self): + self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result) + + # flush collection, segment sealed + self.collection_wrap.flush() + + # build `Hybrid index` on empty collection + index_params = { + **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name), + **DefaultVectorIndexParams.IVF_FLAT(DataType.BFLOAT16_VECTOR.name), + **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), + **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name) + } + self.build_multi_index(index_params=index_params) + assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) + + # load collection + self.collection_wrap.load() + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, output_fields", [ + (Expr.In(Expr.MOD('INT8', 13).subset, [0, 1, 2]).value, ['INT8']), + (Expr.Nin(Expr.MOD('INT16', 100).subset, [10, 20, 30, 40]).value, ['INT16']), + ]) + def test_no_index_query_with_invalid_expr(self, expr, output_fields): + """ + target: + 1. check invalid expr + method: + 1. prepare some data + 2. query with the invalid expr + expected: + 1. raises expected error + """ + # query + self.collection_wrap.query(expr=expr, check_task=CheckTasks.err_res, + check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed}) + + @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054") + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_no_index_query_with_modulo(self, expr, expr_field, limit): + """ + target: + 1. check modulo expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_no_index_query_with_string(self, expr, expr_field, limit, rex): + """ + target: + 1. check string expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_no_index_query_with_operation(self, expr, expr_field, limit): + """ + target: + 1. check number operation + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + +@pytest.mark.xdist_group("TestHybridIndexDQLExpr") +class TestHybridIndexDQLExpr(TestCaseClassBase): + """ + Scalar fields build Hybrid index, and verify DQL requests + + Author: Ting.Wang + """ + + def setup_class(self): + super().setup_class(self) + + # connect to server before testing + self._connect(self) + + # init params + self.primary_field, nb = "int64_pk", 3000 + + # create a collection with fields + self.collection_wrap.init_collection( + name=cf.gen_unique_str("test_hybrid_index_dql_expr"), + schema=cf.set_collection_schema( + fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields], + field_params={ + self.primary_field: FieldParams(is_primary=True).to_dict, + DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict, + DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict, + DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict + }, + ) + ) + + # prepare data (> 1024 triggering index building) + self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb) + + @pytest.fixture(scope="class", autouse=True) + def prepare_data(self): + self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result) + + # flush collection, segment sealed + self.collection_wrap.flush() + + # build `Hybrid index` on empty collection + index_params = { + **DefaultVectorIndexParams.DISKANN(DataType.FLOAT16_VECTOR.name), + **DefaultVectorIndexParams.IVF_SQ8(DataType.BFLOAT16_VECTOR.name), + **DefaultVectorIndexParams.SPARSE_INVERTED_INDEX(DataType.SPARSE_FLOAT_VECTOR.name), + **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), + # build Hybrid index + **DefaultScalarIndexParams.list_default([self.primary_field] + self.all_index_scalar_fields) + } + self.build_multi_index(index_params=index_params) + assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) + + # load collection + self.collection_wrap.load() + + @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054") + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_hybrid_index_query_with_modulo(self, expr, expr_field, limit): + """ + target: + 1. check modulo expression + method: + 1. prepare some data and build `Hybrid index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_hybrid_index_query_with_string(self, expr, expr_field, limit, rex): + """ + target: + 1. check string expression + method: + 1. prepare some data and build `Hybrid index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_hybrid_index_query_with_operation(self, expr, expr_field, limit): + """ + target: + 1. check number operation + method: + 1. prepare some data and build `Hybrid index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + +@pytest.mark.xdist_group("TestInvertedIndexDQLExpr") +class TestInvertedIndexDQLExpr(TestCaseClassBase): + """ + Scalar fields build INVERTED index, and verify DQL requests + + Author: Ting.Wang + """ + + def setup_class(self): + super().setup_class(self) + + # connect to server before testing + self._connect(self) + + # init params + self.primary_field, nb = "int64_pk", 3000 + + # create a collection with fields + self.collection_wrap.init_collection( + name=cf.gen_unique_str("test_inverted_index_dql_expr"), + schema=cf.set_collection_schema( + fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields], + field_params={ + self.primary_field: FieldParams(is_primary=True).to_dict, + DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict, + DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict, + DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict + }, + ) + ) + + # prepare data (> 1024 triggering index building) + self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb) + + @pytest.fixture(scope="class", autouse=True) + def prepare_data(self): + self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result) + + # flush collection, segment sealed + self.collection_wrap.flush() + + # build `Hybrid index` on empty collection + index_params = { + **DefaultVectorIndexParams.IVF_FLAT(DataType.FLOAT16_VECTOR.name), + **DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name), + **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), + **DefaultVectorIndexParams.BIN_FLAT(DataType.BINARY_VECTOR.name), + # build Hybrid index + **DefaultScalarIndexParams.list_inverted([self.primary_field] + self.inverted_support_dtype_names) + } + self.build_multi_index(index_params=index_params) + assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) + + # load collection + self.collection_wrap.load() + + @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054") + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_inverted_index_query_with_modulo(self, expr, expr_field, limit): + """ + target: + 1. check modulo expression + method: + 1. prepare some data and build `INVERTED index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_inverted_index_query_with_string(self, expr, expr_field, limit, rex): + """ + target: + 1. check string expression + method: + 1. prepare some data and build `INVERTED index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_inverted_index_query_with_operation(self, expr, expr_field, limit): + """ + target: + 1. check number operation + method: + 1. prepare some data and build `INVERTED index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + +@pytest.mark.xdist_group("TestBitmapIndexDQLExpr") +class TestBitmapIndexDQLExpr(TestCaseClassBase): + """ + Scalar fields build BITMAP index, and verify DQL requests + + Author: Ting.Wang + """ + + def setup_class(self): + super().setup_class(self) + + # connect to server before testing + self._connect(self) + + # init params + self.primary_field, nb = "int64_pk", 3000 + + # create a collection with fields + self.collection_wrap.init_collection( + name=cf.gen_unique_str("test_bitmap_index_dql_expr"), + schema=cf.set_collection_schema( + fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields], + field_params={ + self.primary_field: FieldParams(is_primary=True).to_dict, + DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict, + DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict, + DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict + }, + ) + ) + + # prepare data (> 1024 triggering index building) + self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb) + + @pytest.fixture(scope="class", autouse=True) + def prepare_data(self): + self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result) + + # flush collection, segment sealed + self.collection_wrap.flush() + + # build `Hybrid index` on empty collection + index_params = { + **DefaultVectorIndexParams.HNSW(DataType.FLOAT16_VECTOR.name), + **DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name), + **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), + **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), + # build Hybrid index + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) + } + self.build_multi_index(index_params=index_params) + assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) + + # load collection + self.collection_wrap.load() + + @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054") + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_bitmap_index_query_with_modulo(self, expr, expr_field, limit): + """ + target: + 1. check modulo expression + method: + 1. prepare some data and build `BITMAP index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_bitmap_index_query_with_string(self, expr, expr_field, limit, rex): + """ + target: + 1. check string expression + method: + 1. prepare some data and build `BITMAP index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize( + "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_bitmap_index_query_with_operation(self, expr, expr_field, limit): + """ + target: + 1. check number operation + method: + 1. prepare some data and build `BITMAP index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" diff --git a/tests/scripts/ci_e2e_4am.sh b/tests/scripts/ci_e2e_4am.sh index 900afa8d95d56..0e8d2870ea09a 100755 --- a/tests/scripts/ci_e2e_4am.sh +++ b/tests/scripts/ci_e2e_4am.sh @@ -132,10 +132,10 @@ cd ${ROOT}/tests/python_client if [[ -n "${TEST_TIMEOUT:-}" ]]; then timeout "${TEST_TIMEOUT}" pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \ - --html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-} + --html=${CI_LOG_PATH}/report.html --self-contained-html --dist loadgroup ${@:-} else pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \ - --html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-} + --html=${CI_LOG_PATH}/report.html --self-contained-html --dist loadgroup ${@:-} fi # # Run concurrent test with 5 processes