-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmilvus_import.py
63 lines (50 loc) · 1.87 KB
/
milvus_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import requests
from pymilvus import Collection, connections
# Configurations
IMAGE_FOLDER = "./images"
BENTOML_URL = "http://127.0.0.1:3000/vectorize"
COLLECTION_NAME = "image_vectors"
# Milvus connection
connections.connect("default", host="127.0.0.1", port="19530")
collection = Collection(COLLECTION_NAME)
# Check if the collection exists
if not collection.is_empty:
print(f"Collection {COLLECTION_NAME} already exists and contains data.")
# Process and insert images
def process_and_insert_images():
image_names = []
vectors = []
for image_file in os.listdir(IMAGE_FOLDER):
if image_file.lower().endswith(('.jpg', '.jpeg')):
image_path = os.path.join(IMAGE_FOLDER, image_file)
print(f"Processing {image_path}...")
# Call BentoML service to get vector
with open(image_path, "rb") as image:
response = requests.post(
BENTOML_URL,
headers={"Content-Type": "image/jpeg"},
data=image
)
if response.status_code == 200 and "vector" in response.json():
image_names.append(image_file)
vectors.append(response.json()["vector"])
else:
print(f"Failed to get vector for {image_file}. Response: {response.text}")
# Insert into Milvus
if image_names and vectors:
collection.insert([image_names, vectors])
print(f"Inserted {len(image_names)} images into Milvus.")
else:
print("No data to insert.")
# Run the process
process_and_insert_images()
collection.create_index(
field_name="vector",
index_params={"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 128}}
)
collection.load()
# Flush and load collection
collection.flush()
collection.load()
print(f"Collection {COLLECTION_NAME} is ready for querying!")