Skip to content

Commit

Permalink
Add test cases for tensor in test_knn.py (#1528)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
Add test cases for invalid match tensor params, including data type,
match type and extra options.

### Type of change
- [x] Test cases

---------

Co-authored-by: Jin Hai <[email protected]>
  • Loading branch information
Ami11111 and JinHai-CN authored Jul 25, 2024
1 parent f7b9026 commit 77c14a4
Show file tree
Hide file tree
Showing 2 changed files with 325 additions and 0 deletions.
87 changes: 87 additions & 0 deletions python/test/cases/test_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,90 @@ def test_sparse_knn_with_index(self, check_data):
"data_dir": common_values.TEST_TMP_DIR}], indirect=True)
def test_with_multiple_fusion(self, check_data):
self.test_infinity_obj._test_with_multiple_fusion(check_data)

@pytest.mark.parametrize("check_data", [{"file_name": "pysdk_test_knn.csv",
"data_dir": common_values.TEST_TMP_DIR}], indirect=True)
@pytest.mark.parametrize("index_column_name", ["gender_vector"])
@pytest.mark.parametrize("knn_column_name", ["gender_vector"])
@pytest.mark.parametrize("index_distance_type", ["l2","ip", "cosine"])
@pytest.mark.parametrize("knn_distance_type", ["l2", "ip", "cosine"])
@pytest.mark.parametrize("index_type", [index.IndexType.Hnsw, index.IndexType.IVFFlat])
def test_with_various_index_knn_distance_combination(self, check_data, index_column_name, knn_column_name,
index_distance_type, knn_distance_type, index_type):
self.test_infinity_obj._test_with_various_index_knn_distance_combination(check_data, index_column_name, knn_column_name,
index_distance_type, knn_distance_type, index_type)

def test_zero_dimension_vector(self):
self.test_infinity_obj._test_zero_dimension_vector()

@pytest.mark.parametrize("dim", [1000, 10000, 100000, 200000])
def test_big_dimension_vector(self, dim):
self.test_infinity_obj._test_big_dimension_vector(dim)

# "^5" indicates the point that column "body" get multipy by 5, default is multipy by 1
# refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
@pytest.mark.parametrize("fields_and_matching_text", [
["body","black"],
["doctitle,num,body", "black"],
["doctitle,num,body^5", "black"],
["", "body:black"],
["", "body:black^5"],
["", "'body':'(black)'"],
["", "body:'(black)^5'"],
["", "'body':'(black OR white)'"],
["", "'body':'(black AND white)'"],
["", "'body':'(black)^5 OR (white)'"],
["", "'body':'(black)^5 AND (white)'"],
["", "'body':'black - white'"],
["", "body:black OR doctitle:black"],
["", "body:black AND doctitle:black"],
["", "(body:black OR doctitle:black) AND (body:white OR doctitle:white)"],
["", "(body:black)^5 OR doctitle:black"],
["", "(body:black)^5 AND doctitle:black"],
["", "(body:black OR doctitle:black)^5 AND (body:white OR doctitle:white)"],
#["", "doc\*:back"] not support
])
@pytest.mark.parametrize("check_data", [{"file_name": "enwiki_embedding_99_commas.csv",
"data_dir": common_values.TEST_TMP_DIR}], indirect=True)
def test_with_various_fulltext_match(self, check_data, fields_and_matching_text):
self.test_infinity_obj._test_with_various_fulltext_match(check_data, fields_and_matching_text)

@pytest.mark.parametrize("data_type", ['varchar',
pytest.param(1),
pytest.param(1.1),
pytest.param([]),
pytest.param({}),
pytest.param(()),
pytest.param("@#$!#@$SDasdf3!@#$")
])
@pytest.mark.parametrize("check_data", [{"file_name": "tensor_maxsim.csv",
"data_dir": common_values.TEST_TMP_DIR}], indirect=True)
def test_tensor_scan_with_invalid_data_type(self, check_data, data_type):
self.test_infinity_obj._test_tensor_scan_with_invalid_data_type(check_data, data_type)

@pytest.mark.parametrize("method_type", ['invalid method type',
pytest.param(1),
pytest.param(1.1),
pytest.param([]),
pytest.param({}),
pytest.param(()),
pytest.param("@#$!#@$SDasdf3!@#$")
])
@pytest.mark.parametrize("check_data", [{"file_name": "tensor_maxsim.csv",
"data_dir": common_values.TEST_TMP_DIR}], indirect=True)
def test_tensor_scan_with_invalid_method_type(self, check_data, method_type):
self.test_infinity_obj._test_tensor_scan_with_invalid_method_type(check_data, method_type)

@pytest.mark.parametrize("extra_option", ['topn=-1',
'topn=0',
'topn=100000000',
pytest.param(1),
pytest.param(1.1),
pytest.param([]),
pytest.param({}),
pytest.param(()),
])
@pytest.mark.parametrize("check_data", [{"file_name": "tensor_maxsim.csv",
"data_dir": common_values.TEST_TMP_DIR}], indirect=True)
def test_tensor_scan_with_invalid_extra_option(self, check_data, extra_option):
self.test_infinity_obj._test_tensor_scan_with_invalid_extra_option(check_data, extra_option)
Loading

0 comments on commit 77c14a4

Please sign in to comment.