Rashad Tockey
Rashad Tockey

Reputation: 193

No Results Shown for Image Similarity Search with Milvus

I am referring to the notebook in this following link to work on image similarity search: https://github.com/towhee-io/examples/blob/main/image/reverse_image_search/1_build_image_search_engine.ipynb

Below is the code I am working with:

import csv
from glob import glob
from pathlib import Path
from statistics import mean

from towhee import pipe, ops, DataCollection
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility


# Towhee parameters
MODEL = 'resnet50'

# Milvus parameters
HOST = [MY_HOST]
PORT = [MY_PORT]
TOPK = 5
DIM = 2048 
COLLECTION_NAME = 'images'
INDEX_TYPE = 'IVF_FLAT'
METRIC_TYPE = 'L2'

index_params = {
    'metric_type': METRIC_TYPE,
    'index_type': INDEX_TYPE,
    'params': {"nlist": 2048}
}

collection.create_index(field_name='image', index_params=index_params, index_name = "image_index")

I only performed vectorization for images, and I proceeded with saving it with metadata

# Load image path
def load_image(x):
    if x.endswith('csv'):
        with open(x) as f:
            reader = csv.reader(f)
            next(reader)
            for item in reader:
                yield item[1]
    else:
        for item in glob(x):
            yield item
            
# Embedding pipeline
p_embed = (
    pipe.input('src')
        .flat_map('src', 'img_path', load_image)
        .map('img_path', 'img', ops.image_decode())
        .map('img', 'vec', ops.image_embedding.timm(model_name=MODEL))
)


image_save_dir = [MY_IMAGE_PATH]
p_display = p_embed.output('img_path', 'img', 'vec') 
result = DataCollection(p_display(image_save_dir))

# check result
result.show()
print(result[0]['img_path'])
print(result[0]['vec'])    

connections.connect(alias='default', host=HOST, port=PORT)
collection_name = "clothes"
collection = Collection(name = collection_name)

for i, r in enumerate(result):
    vector = r['vec']
    collection.insert([
        {
            "clothes_id" : i,
            "category" : "top",
            "color" : "black",
            "image" : vector,
            "gender" : ["F"],
            "style" : ["casual"],
            "thickness" : [],
            "season" : ["spring"]
        }
    ])

And then I was attempting to get the ID value by doing image similarity search

p_search_pre = (
        p_embed.map('vec', ('search_res'), ops.ann_search.milvus_client(
                    host=HOST, port=PORT, limit=5,
                    collection_name="clothes"))
               .map('search_res', 'pred', lambda x: [y[0] for y in x]) # get id
)

p_search = p_search_pre.output('img_path', 'pred') 

# Search for example query image(s)
collection.load()
dc = p_search('[MY_IMAGE_PATH]/test37.png')

# Display search results with image paths
DataCollection(dc).show()

However, there was no results when I displayed. In order to ensure the search was done properly, I downloaded the images stored in Milvus, saved them in the path, and calculated the cosine similarity for the same image.

from numpy import dot
from numpy.linalg import norm
import numpy as np

def cos_similarity(A, B):
    return dot(A, B) / (norm(A) * norm(B))

test_image = '[MY_IMAGE_PATH]/test38.png'
p_display = p_embed.output('img_path', 'img', 'vec') 
result = DataCollection(p_display(image_save_dir))

collection_name = 'clothes' 
collection = Collection(name=collection_name)

# get image vector where clothes_id=38
save_results = collection.query(
    expr="clothes_id == 38",
    output_fields=["clothes_id", "category", "color", "gender", "style", "thickness", "season", "image"]
)

if save_results:
    saved_image_vector = save_results[0]["image"]
    result_vector = np.array(result[0]['vec'])
    saved_image_vector = np.array(saved_image_vector)

    # get similarity
    similarity = cos_similarity(result_vector, saved_image_vector)
    print(f"cosine similarity : {similarity}")
else:
    print("there is no images")

The result I got was about 1. I am confused what the problem could be, and I am not sure why I cannot perform proper image similarity search.

Upvotes: 0

Views: 54

Answers (1)

rachel song
rachel song

Reputation: 54

I followed your code structure and attempted the code below for the section where you performed the similarity search to get the ID value and it succeeded in returning me the result.

p_search_pre = (
        p_embed.map('vec', ('search_res'), ops.ann_search.milvus_client(
                    host=HOST, port=PORT, limit=5, collection_name="clothes"))
                .map('search_res', 'pred', lambda x: [(y[0]) for y in x]) 
)

Upvotes: 0

Related Questions