Skip to content

Commit

Permalink
Add: sparse benchmark. (#1476)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Add sparse query benchmark.

### Type of change

- [x] Other (please describe):
sparse benchmark.
  • Loading branch information
small-turtle-1 authored Jul 15, 2024
1 parent ea6a7f7 commit 6eb8cb9
Show file tree
Hide file tree
Showing 6 changed files with 468 additions and 44 deletions.
2 changes: 1 addition & 1 deletion python/benchmark/legacy_benchmark/remote_benchmark_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def process_pool(threads, rounds, query_path, ef: int, remote: bool, table_name)
results.append(f"Round {i + 1}:")
results.append(f"Total Dur: {dur:.2f} s")
results.append(f"Query Count: {total_queries_count}")
results.append(f"QPS: {(len(total_queries) / dur):.2f}")
results.append(f"QPS: {(total_queries_count / dur):.2f}")

for result in results:
print(result)
Expand Down
87 changes: 44 additions & 43 deletions python/benchmark/legacy_benchmark/remote_benchmark_knn_import.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import time
Expand All @@ -11,7 +25,9 @@


def import_data(path, dataset: str, m: int, ef_construction: int, remote: bool):
print(f"dataset: {dataset}, m: {m}, ef_construction: {ef_construction}, remote: {remote}")
print(
f"dataset: {dataset}, m: {m}, ef_construction: {ef_construction}, remote: {remote}"
)
if dataset == "sift_1m":
import_sift_1m(path + "/sift_base.fvecs", m, ef_construction, remote)
elif dataset == "gist_1m":
Expand All @@ -38,15 +54,15 @@ def import_sift_1m(path, m: int, ef_construction: int, remote: bool):
assert os.path.exists(path)

start = time.time()
res = table_obj.import_data(path, {'file_type': 'fvecs'})
res = table_obj.import_data(path, {"file_type": "fvecs"})
end = time.time()
dur = end - start
print(f"Import sift_1m cost time: {dur} s")

assert res.error_code == ErrorCode.OK

start = time.time()
create_index("sift_benchmark", m, ef_construction, remote)
create_index(table_obj, m, ef_construction, remote)
end = time.time()
dur = end - start
print(f"Create index on sift_1m cost time: {dur} s")
Expand All @@ -70,39 +86,37 @@ def import_gist_1m(path, m: int, ef_construction: int, remote: bool):
assert os.path.exists(path)

start = time.time()
res = table_obj.import_data(path, {'file_type': 'fvecs'})
res = table_obj.import_data(path, {"file_type": "fvecs"})
end = time.time()
dur = end - start
print(f"Import gist_1m cost time: {dur} s")

assert res.error_code == ErrorCode.OK

start = time.time()
create_index("gist_benchmark", m, ef_construction, remote)
create_index(table_obj, m, ef_construction, remote)
end = time.time()
dur = end - start
print(f"Create index on gist_1m cost time: {dur} s")


def create_index(table_name, m: int, ef_construction: int, remote: bool):
infinity_obj = None
if remote:
infinity_obj = infinity.connect(LOCAL_HOST)
else:
infinity_obj = infinity.connect(LOCAL_INFINITY_PATH)
assert infinity_obj

table = infinity_obj.get_database("default_db").get_table(table_name)
res = table.create_index("hnsw_index",
[index.IndexInfo("col1",
index.IndexType.Hnsw,
[
index.InitParameter("M", str(m)),
index.InitParameter("ef_construction", str(ef_construction)),
index.InitParameter("ef", str(ef_construction)),
index.InitParameter("metric", "l2"),
index.InitParameter("encode", "lvq")
])])
def create_index(table_obj, m: int, ef_construction: int, remote: bool):
res = table_obj.create_index(
"hnsw_index",
[
index.IndexInfo(
"col1",
index.IndexType.Hnsw,
[
index.InitParameter("M", str(m)),
index.InitParameter("ef_construction", str(ef_construction)),
index.InitParameter("ef", str(ef_construction)),
index.InitParameter("metric", "l2"),
index.InitParameter("encode", "lvq"),
],
)
],
)

assert res.error_code == ErrorCode.OK

Expand All @@ -117,7 +131,8 @@ def str2bool(value):
else:
raise argparse.ArgumentTypeError("Boolean value expected")

if __name__ == '__main__':

if __name__ == "__main__":
current_path = os.getcwd()

parser = argparse.ArgumentParser(description="Benchmark Infinity")
Expand All @@ -126,28 +141,14 @@ def str2bool(value):
"-d",
"--data",
type=str,
default='sift_1m', # gist_1m
default="sift_1m", # gist_1m
dest="data_set",
)
parser.add_argument("--m", type=int, default=16, dest="m")
parser.add_argument(
"--m",
type=int,
default=16,
dest="m"
)
parser.add_argument(
"--ef_construction",
type=int,
default=200,
dest="ef_construction"
)
parser.add_argument(
"-R",
"--remote",
type=str2bool,
default=True,
dest="remote"
"--ef_construction", type=int, default=200, dest="ef_construction"
)
parser.add_argument("-R", "--remote", type=str2bool, default=True, dest="remote")

args = parser.parse_args()

Expand Down
Loading

0 comments on commit 6eb8cb9

Please sign in to comment.