Skip to content

Commit

Permalink
Allow user to give index name to search dense vector or ignore index …
Browse files Browse the repository at this point in the history
…to search dense vector (#1643)

### What problem does this PR solve?

- Use 'ignore index' to brutal force search
- Use given index name indicated by 'index_name' to search dense vector.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Test cases
- [x] Python SDK impacted, Need to update PyPI

Signed-off-by: Jin Hai <[email protected]>
  • Loading branch information
JinHai-CN authored Aug 13, 2024
1 parent 24ec1eb commit d551716
Show file tree
Hide file tree
Showing 24 changed files with 2,814 additions and 2,444 deletions.
4 changes: 2 additions & 2 deletions python/benchmark/fulltext_import_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def insert_data(db_obj, data):
table_obj = db_obj.create_table("insert_benchmark",
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
"text": {"type": "varchar"}}, ConflictType.Error)
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
assert res.error_code == ErrorCode.OK
inserted_records_num = 0
while inserted_records_num < len(data):
Expand All @@ -50,7 +50,7 @@ def import_file(db_obj, path):
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
"text": {"type": "varchar"}}, ConflictType.Error)
assert table_obj
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
assert res.error_code == ErrorCode.OK
table_obj.import_data(path, {'file_type': 'jsonl'})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def insert_data(db_obj, data):
table_obj = db_obj.create_table("insert_benchmark",
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
"text": {"type": "varchar"}}, ConflictType.Error)
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
assert res.error_code == ErrorCode.OK
inserted_records_num = 0
while inserted_records_num < len(data):
Expand All @@ -50,7 +50,7 @@ def import_file(db_obj, path):
{"id": {"type": "varchar"}, "title": {"type": "varchar"},
"text": {"type": "varchar"}}, ConflictType.Error)
assert table_obj
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText, []))
res = table_obj.create_index("text_index", index.IndexInfo("text", index.IndexType.FullText))
assert res.error_code == ErrorCode.OK
table_obj.import_data(path, {'file_type': 'jsonl'})

Expand Down
2 changes: 1 addition & 1 deletion python/benchmark/legacy_benchmark/remote_benchmark_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def one_thread(rounds, query_path, ground_truth_path, ef: int, remote: bool, tab

query_builder = InfinityThriftQueryBuilder(table)
query_builder.output(["_row_id"])
query_builder.knn('col1', query_vec, 'float', 'l2', 100, {'ef': str(ef)})
query_builder.knn('col1', query_vec, 'float', 'l2', 100, {'index_name': 'hnsw_index', 'ef': str(ef)})
res, _ = query_builder.to_result()
end = time.time()

Expand Down
14 changes: 7 additions & 7 deletions python/benchmark/legacy_benchmark/remote_benchmark_knn_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,13 @@ def create_index(table_obj, m: int, ef_construction: int, remote: bool):
index.IndexInfo(
"col1",
index.IndexType.Hnsw,
[
index.InitParameter("M", str(m)),
index.InitParameter("ef_construction", str(ef_construction)),
index.InitParameter("ef", str(ef_construction)),
index.InitParameter("metric", "l2"),
index.InitParameter("encode", "lvq"),
],
{
"m": str(m),
"ef_construction": str(ef_construction),
"ef": str(ef_construction),
"metric": "l2",
"encode": "lvq"
},
)
)

Expand Down
14 changes: 7 additions & 7 deletions python/benchmark/legacy_benchmark/remote_benchmark_knn_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ def create_index(table_name):
res = table.create_index("hnsw_index",
index.IndexInfo("col1",
index.IndexType.Hnsw,
[
index.InitParameter("M", "16"),
index.InitParameter("ef_construction", "200"),
index.InitParameter("ef", "200"),
index.InitParameter("metric", "l2"),
index.InitParameter("encode", "lvq")
]))
{
"m": "16",
"ef_construction": "200",
"ef": "200",
"metric": "l2",
"encode": "lvq"
}))

assert res.error_code == ErrorCode.OK

Expand Down
20 changes: 11 additions & 9 deletions python/benchmark/mldr_benchmark/insert_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,20 +97,22 @@ def main(self):
print("Finish creating fulltext index.")
print("Start creating Hnsw index...")
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
[index.InitParameter("M", "16"),
index.InitParameter("ef_construction",
"200"),
index.InitParameter("ef", "200"),
index.InitParameter("metric", "ip"),
index.InitParameter("encode", "lvq")]),
{
"m": "16",
"ef_construction": "200",
"ef": "200",
"metric": "ip",
"encode": "lvq"
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
print("Finish creating Hnsw index.")
print("Start creating BMP index...")
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
[index.InitParameter("block_size", "8"),
index.InitParameter("compress_type",
"compress")]),
{
"block_size": "8",
"compress_type": "compress"
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})
Expand Down
20 changes: 11 additions & 9 deletions python/benchmark/mldr_benchmark/insert_data_50000.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,22 @@ def main(self):
print("Finish creating fulltext index.")
print("Start creating Hnsw index...")
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
[index.InitParameter("M", "16"),
index.InitParameter("ef_construction",
"1000"),
index.InitParameter("ef", "1000"),
index.InitParameter("metric", "ip"),
index.InitParameter("encode", "lvq")]),
{
"m": "16",
"ef_construction": "1000",
"ef": "1000",
"metric": "ip",
"encode": "lvq"
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
print("Finish creating Hnsw index.")
print("Start creating BMP index...")
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
[index.InitParameter("block_size", "8"),
index.InitParameter("compress_type",
"compress")]),
{
"block_size": "8",
"compress_type": "compress",
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})
Expand Down
24 changes: 13 additions & 11 deletions python/benchmark/mldr_benchmark/insert_data_with_colbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,30 +100,32 @@ def main(self):
del docid_list
del corpus_text_list
print("Start creating fulltext index.")
ft_params = []
ft_params = {}
if lang == "zh":
ft_params.append(index.InitParameter("analyzer", "chinese"))
ft_params = {"analyzer": "chinese"}
res = self.infinity_table.create_index("ft_index",
index.IndexInfo("fulltext_col", index.IndexType.FullText, ft_params),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
print("Finish creating fulltext index.")
print("Start creating Hnsw index...")
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
[index.InitParameter("M", "16"),
index.InitParameter("ef_construction",
"200"),
index.InitParameter("ef", "200"),
index.InitParameter("metric", "ip"),
index.InitParameter("encode", "lvq")]),
{
"m": "16",
"ef_construction": "200",
"ef": "200",
"metric": "ip",
"encode": "lvq"
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
print("Finish creating Hnsw index.")
print("Start creating BMP index...")
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
[index.InitParameter("block_size", "8"),
index.InitParameter("compress_type",
"compress")]),
{
"block_size": "8",
"compress_type": "compress"
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})
Expand Down
20 changes: 10 additions & 10 deletions python/benchmark/mldr_benchmark/insert_data_with_colbert_50000.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,20 @@ def main(self):
print("Finish creating fulltext index.")
print("Start creating Hnsw index...")
res = self.infinity_table.create_index("hnsw_index", index.IndexInfo("dense_col", index.IndexType.Hnsw,
[index.InitParameter("M", "16"),
index.InitParameter("ef_construction",
"200"),
index.InitParameter("ef", "200"),
index.InitParameter("metric", "ip"),
index.InitParameter("encode", "lvq")]),
{
"m": "16",
"ef_construction": "200",
"ef": "200",
"metric": "ip",
"encode": "lvq"
}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
print("Finish creating Hnsw index.")
print("Start creating BMP index...")
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,
[index.InitParameter("block_size", "8"),
index.InitParameter("compress_type",
"compress")]),
res = self.infinity_table.create_index("bmp_index", index.IndexInfo("sparse_col", index.IndexType.BMP,{
"block_size": "8",
"compress_type": "compress"}),
ConflictType.Error)
assert res.error_code == ErrorCode.OK
self.infinity_table.optimize("bmp_index", {"topk": "1000", "bp_reorder": ""})
Expand Down
4 changes: 3 additions & 1 deletion python/infinity/local_infinity/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def knn(
knn_opt_params = []
if knn_params != None:
for k, v in knn_params.items():
knn_opt_params.append(InitParameter(k, v))
key = k.lower()
value = v.lower()
knn_opt_params.append(InitParameter(key, value))

knn_expr = WrapKnnExpr()
knn_expr.column_expr = column_expr
Expand Down
4 changes: 3 additions & 1 deletion python/infinity/remote_thrift/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ def knn(
knn_opt_params = []
if knn_params is not None:
for k, v in knn_params.items():
knn_opt_params.append(InitParameter(k, v))
key = k.lower()
value = v.lower()
knn_opt_params.append(InitParameter(key, value))

knn_expr = KnnExpr(
column_expr=column_expr,
Expand Down
Loading

0 comments on commit d551716

Please sign in to comment.