Reputation: 193
I am referring to the notebook in this following link to work on image similarity search: https://github.com/towhee-io/examples/blob/main/image/reverse_image_search/1_build_image_search_engine.ipynb
Below is the code I am working with:
import csv
from glob import glob
from pathlib import Path
from statistics import mean
from towhee import pipe, ops, DataCollection
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
# Towhee parameters
MODEL = 'resnet50'
# Milvus parameters
HOST = [MY_HOST]
PORT = [MY_PORT]
TOPK = 5
DIM = 2048
COLLECTION_NAME = 'images'
INDEX_TYPE = 'IVF_FLAT'
METRIC_TYPE = 'L2'
index_params = {
'metric_type': METRIC_TYPE,
'index_type': INDEX_TYPE,
'params': {"nlist": 2048}
}
collection.create_index(field_name='image', index_params=index_params, index_name = "image_index")
I only performed vectorization for images, and I proceeded with saving it with metadata
# Load image path
def load_image(x):
if x.endswith('csv'):
with open(x) as f:
reader = csv.reader(f)
next(reader)
for item in reader:
yield item[1]
else:
for item in glob(x):
yield item
# Embedding pipeline
p_embed = (
pipe.input('src')
.flat_map('src', 'img_path', load_image)
.map('img_path', 'img', ops.image_decode())
.map('img', 'vec', ops.image_embedding.timm(model_name=MODEL))
)
image_save_dir = [MY_IMAGE_PATH]
p_display = p_embed.output('img_path', 'img', 'vec')
result = DataCollection(p_display(image_save_dir))
# check result
result.show()
print(result[0]['img_path'])
print(result[0]['vec'])
connections.connect(alias='default', host=HOST, port=PORT)
collection_name = "clothes"
collection = Collection(name = collection_name)
for i, r in enumerate(result):
vector = r['vec']
collection.insert([
{
"clothes_id" : i,
"category" : "top",
"color" : "black",
"image" : vector,
"gender" : ["F"],
"style" : ["casual"],
"thickness" : [],
"season" : ["spring"]
}
])
And then I was attempting to get the ID value by doing image similarity search
p_search_pre = (
p_embed.map('vec', ('search_res'), ops.ann_search.milvus_client(
host=HOST, port=PORT, limit=5,
collection_name="clothes"))
.map('search_res', 'pred', lambda x: [y[0] for y in x]) # get id
)
p_search = p_search_pre.output('img_path', 'pred')
# Search for example query image(s)
collection.load()
dc = p_search('[MY_IMAGE_PATH]/test37.png')
# Display search results with image paths
DataCollection(dc).show()
However, there was no results when I displayed. In order to ensure the search was done properly, I downloaded the images stored in Milvus, saved them in the path, and calculated the cosine similarity for the same image.
from numpy import dot
from numpy.linalg import norm
import numpy as np
def cos_similarity(A, B):
return dot(A, B) / (norm(A) * norm(B))
test_image = '[MY_IMAGE_PATH]/test38.png'
p_display = p_embed.output('img_path', 'img', 'vec')
result = DataCollection(p_display(image_save_dir))
collection_name = 'clothes'
collection = Collection(name=collection_name)
# get image vector where clothes_id=38
save_results = collection.query(
expr="clothes_id == 38",
output_fields=["clothes_id", "category", "color", "gender", "style", "thickness", "season", "image"]
)
if save_results:
saved_image_vector = save_results[0]["image"]
result_vector = np.array(result[0]['vec'])
saved_image_vector = np.array(saved_image_vector)
# get similarity
similarity = cos_similarity(result_vector, saved_image_vector)
print(f"cosine similarity : {similarity}")
else:
print("there is no images")
The result I got was about 1. I am confused what the problem could be, and I am not sure why I cannot perform proper image similarity search.
Upvotes: 0
Views: 54
Reputation: 54
I followed your code structure and attempted the code below for the section where you performed the similarity search to get the ID value and it succeeded in returning me the result.
p_search_pre = (
p_embed.map('vec', ('search_res'), ops.ann_search.milvus_client(
host=HOST, port=PORT, limit=5, collection_name="clothes"))
.map('search_res', 'pred', lambda x: [(y[0]) for y in x])
)
Upvotes: 0