forked from MatteoFasulo/Weaviate-Search-Engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupload_img.py
70 lines (58 loc) · 2.34 KB
/
upload_img.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
###################################################################################################
# @Weaviate - https://github.com/weaviate/weaviate-examples/tree/main/nearest-neighbor-dog-search #
# #
###################################################################################################
import os, re
import weaviate
def set_up_batch():
"""
Prepare batching configuration to speed up deleting and importing data.
"""
client.batch.configure(
batch_size=100,
dynamic=True,
timeout_retries=3,
callback=None,
)
def clear_up_MyImages():
"""
Remove all objects from the MyImages collection.
This is useful, if we want to rerun the import with different pictures.
"""
with client.batch as batch:
batch.delete_objects(
class_name="MyImages",
# same where operator as in the GraphQL API
where={
"operator": "NotEqual",
"path": ["text"],
"valueString": "x"
},
output="verbose",
)
def import_data():
"""
Process all images in [base64_images] folder and add import them into MyImages collection
"""
with client.batch as batch:
# Iterate over all .b64 files in the base64_images folder
for encoded_file_path in os.listdir("./b64_img"):
with open("./b64_img/" + encoded_file_path) as file:
file_lines = file.readlines()
base64_encoding = " ".join(file_lines)
base64_encoding = base64_encoding.replace("\n", "").replace(" ", "")
# remove .b64 to get the original file name
image_file = encoded_file_path.replace(".b64", "")
# remove image file extension and swap - for " " to get the breed name
breed = re.sub(".(jpg|jpeg|png)", "", image_file).replace("-", " ")
# The properties from our schema
data_properties = {
"image": base64_encoding,
"text": image_file,
}
batch.add_data_object(data_properties, "MyImages")
client = weaviate.Client(url = "http://localhost:8080")
set_up_batch()
clear_up_MyImages()
import_data()
print("The objects have been uploaded to Weaviate.")