-
Notifications
You must be signed in to change notification settings - Fork 4
/
to_milvus.py
61 lines (50 loc) · 1.6 KB
/
to_milvus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import random
# from pprint import pprint
# from milvus import Milvus, DataType
#
# _HOST = '127.0.0.1'
# _PORT = '19530'
# client = Milvus(_HOST, _PORT)
#
#
# if collection_name in client.list_collections():
# client.drop_collection(collection_name)
#
# collection_param = {
# "fields": [
# # {"name": "id", "type": DataType.INT32},
# {"name": "embedding", "type": DataType.FLOAT_VECTOR, "params": {"dim": 32}},
# ],
# "segment_row_limit": 16384,
# "auto_id": False
# }
#
# client.create_collection(collection_name, collection_param)
# client.create_partition(collection_name, "Movie")
#
# print("--------get collection info--------")
# collection = client.get_collection_info(collection_name)
# pprint(collection)
# partitions = client.list_partitions(collection_name)
# print("\n----------list partitions----------")
# pprint(partitions)
# ids = client.insert()
import codecs
import sys
sys.path.append("milvus_tool")
from milvus_tool.milvus_insert import VecToMilvus
def get_vectors():
with codecs.open("product_vectors.txt", "r", encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
ids = [int(line.split(":")[0]) for line in lines]
embeddings = []
for line in lines:
line = line.strip().split(":")[1][1:-1]
str_nums = line.split(",")
emb = [float(x) for x in str_nums]
embeddings.append(emb)
return ids, embeddings
ids, embeddings = get_vectors()
collection_name = 'demo_e_commerce'
client = VecToMilvus()
status, ids = client.insert(collection_name=collection_name, vectors=embeddings, ids=ids, partition_tag="Product")