Reputation: 1
I have an ML model written in torch which produces an embedding of size 512.
I wrote a custom script utilizing the model and sending the embedding to the collection (which is empty).
class FaceEmbedding(Model):
def __init__(self, MONGO_CONNECTION_STRING:str=MONGO_CONNECTION_STRING) -> None:
super().__init__()
self.FaceEmbeddingModel = torch.load('Model/model2.pth')
self.device = next(self.FaceEmbeddingModel.parameters()).device
self.T = v2.Compose([
v2.ToImage(),
v2.ToDtype(torch.float32),
v2.Resize(256),
v2.CenterCrop(224),
v2.Normalize(mean = [0.48235, 0.45882, 0.40784],
std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098])
])
self.dbName = "FaceSimilarity"
self.collectionName = "Embeddings"
self.client = MongoClient(MONGO_CONNECTION_STRING)
self.collection = self.client[self.dbName][self.collectionName]
def __makeEucEmbeddings(self, img:np.ndarray)->np.ndarray:
img_t = self.T(img)
img_t = torch.unsqueeze(img_t, dim=0)
embedding = self.FaceEmbeddingModel.pos(img_t.to(self.device))
del img_t
torch.cuda.empty_cache()
return embedding.squeeze().cpu().detach().numpy()
def makeEmbeddings(self, img:np.ndarray, k:int):
face_locations = fr.face_locations(img)
sorted(face_locations, key = lambda rect: abs(rect[2]-rect[0])*abs(rect[1]-rect[3]))
face_locations = face_locations[:k][::-1]
EucEmb = []
FREmb = []
for face in face_locations:
top, right, bottom, left = face
face_img = img[top:bottom, left:right]
FREmb.append(fr.face_encodings(img, [face])[0])
Euc = self.__makeEucEmbeddings(face_img)
EucEmb.append(Euc.tolist())
return EucEmb, FREmb, face_locations
def __make_pipeline(self, EucEmb):
pipeline = [{
"$vectorSearch": {
"index": "vector_index",
"path": "EuclidianEmbedding",
"queryVector":EucEmb,
"numCandidates":200,
"limit":10
}
}]
return pipeline
def saveEmbedding(self, embeddings)->None:
data = []
for EucEmb, FREmb in embeddings:
data.append({
"EuclidianEmbedding":EucEmb,
"FREmbedding":FREmb
})
self.collection.insert_many(data)
def __vectorSearch(self, img, k):
EucEmb, FREmb, face_locations = self.makeEmbeddings(img, k)
ResEmb = []
if isinstance(EucEmb, list):
for emb in EucEmb:
ResEmb.append(self.collection.aggregate(self.__make_pipeline(emb)))
RecFace = []
NotRecFace = []
for emb in range(len(FREmb)):
match = fr.compare_faces([i['FREmbedding'] for i in ResEmb], FREmb[emb])[0]
if True in match:
idx = match.index(True)
RecFace.append(ResEmb[idx])
else:
idx = FREmb.index(emb)
NotRecFace.append([
EucEmb[emb],
FREmb[emb]
])
return RecFace, NotRecFace, face_locations
def vectorSearch(self, img, k, SaveNotRecFace=False):
RecFace, NotRecFace, face_locations = self.__vectorSearch(img, k)
if SaveNotRecFace:
for embedding in NotRecFace:
self.saveEmbedding(embedding)
return RecFace, NotRecFace, face_locations
I am getting this error upon running the script on various images with one face common among them.
OperationFailure: PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector, full error: {'ok': 0.0, 'errmsg': 'PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector', 'code': 8, 'codeName': 'UnknownError', '$clusterTime': {'clusterTime': Timestamp(1716546450, 2), 'signature': {'hash': b'\xf4B1N\xc2\xffG\x9d$J}\xea\xad\xfe\xdfz\x83Cx\x80', 'keyId': 7345064297216606213}}, 'operationTime': Timestamp(1716546450, 2)}
Defined Indexes in the Mongo Compass for the collection
In Atlas Vector Search, I defined this JSON
{
"fields": [
{
"numDimensions": 512,
"path": "EuclideanEmbedding",
"similarity": "euclidean",
"type": "vector"
}
]
}
I searched this up on the docs of MongoDB and came across the knnVector type index in Atlas Vector Search but, changing the type in the JSON gives error again.
Upvotes: 0
Views: 452
Reputation: 11
(Just a heads up, I work for MongoDB)
I believe this error is a bug that's occurring when you index a field as a vector that is not a vector in your documents. Can you double check that the name of the field in the "path" in your index is the name of the field in your documents where your vectors are?
(Also, this error message should be improved shortly.)
Upvotes: 0