PlanExecutor error caused by embedding index not indexed as knnVector

Question

I have an ML model written in torch which produces an embedding of size 512.

I wrote a custom script utilizing the model and sending the embedding to the collection (which is empty).

class FaceEmbedding(Model):
    def __init__(self, MONGO_CONNECTION_STRING:str=MONGO_CONNECTION_STRING) -> None:
        super().__init__()
        self.FaceEmbeddingModel = torch.load('Model/model2.pth')
        self.device = next(self.FaceEmbeddingModel.parameters()).device
        self.T = v2.Compose([
            v2.ToImage(),
            v2.ToDtype(torch.float32),
            v2.Resize(256),
            v2.CenterCrop(224),
            v2.Normalize(mean = [0.48235, 0.45882, 0.40784], 
                         std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098])
        ])
        self.dbName = "FaceSimilarity"
        self.collectionName = "Embeddings"
        self.client = MongoClient(MONGO_CONNECTION_STRING)
        self.collection = self.client[self.dbName][self.collectionName]
        
    def __makeEucEmbeddings(self, img:np.ndarray)->np.ndarray:
        img_t = self.T(img)
        img_t = torch.unsqueeze(img_t, dim=0)
        embedding = self.FaceEmbeddingModel.pos(img_t.to(self.device))
        del img_t
        torch.cuda.empty_cache()
        return embedding.squeeze().cpu().detach().numpy()

    def makeEmbeddings(self, img:np.ndarray, k:int):
        face_locations = fr.face_locations(img)
        sorted(face_locations, key = lambda rect: abs(rect[2]-rect[0])*abs(rect[1]-rect[3]))
        face_locations = face_locations[:k][::-1]
        EucEmb = []
        FREmb = []
        for face in face_locations:
            top, right, bottom, left = face
            face_img = img[top:bottom, left:right]
            FREmb.append(fr.face_encodings(img, [face])[0])
            Euc = self.__makeEucEmbeddings(face_img)
            EucEmb.append(Euc.tolist())
        return EucEmb, FREmb, face_locations

    def __make_pipeline(self, EucEmb): 
        pipeline = [{
            "$vectorSearch": {
                "index": "vector_index",
                "path": "EuclidianEmbedding",
                "queryVector":EucEmb,
                "numCandidates":200,
                "limit":10
            }
        }]
        return pipeline
        
    def saveEmbedding(self, embeddings)->None:
        data = []
        for EucEmb, FREmb in embeddings:
            data.append({
                "EuclidianEmbedding":EucEmb,
                "FREmbedding":FREmb
            })
        self.collection.insert_many(data)
    
    def __vectorSearch(self, img, k):
        EucEmb, FREmb, face_locations = self.makeEmbeddings(img, k)
        ResEmb = []
        if isinstance(EucEmb, list):
            for emb in EucEmb:
                ResEmb.append(self.collection.aggregate(self.__make_pipeline(emb)))
        RecFace = []
        NotRecFace = []
        for emb in range(len(FREmb)):
            match = fr.compare_faces([i['FREmbedding'] for i in ResEmb], FREmb[emb])[0]
            if True in match:
                idx = match.index(True)
                RecFace.append(ResEmb[idx])
            else:
                idx = FREmb.index(emb)
                NotRecFace.append([
                    EucEmb[emb],
                    FREmb[emb]
                ])
        return RecFace, NotRecFace, face_locations
    
    def vectorSearch(self, img, k, SaveNotRecFace=False):
        RecFace, NotRecFace, face_locations = self.__vectorSearch(img, k)        
        if SaveNotRecFace:
            for embedding in NotRecFace:
                self.saveEmbedding(embedding)
        return RecFace, NotRecFace, face_locations

I am getting this error upon running the script on various images with one face common among them.

OperationFailure: PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector, full error: {'ok': 0.0, 'errmsg': 'PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector', 'code': 8, 'codeName': 'UnknownError', '$clusterTime': {'clusterTime': Timestamp(1716546450, 2), 'signature': {'hash': b'\xf4B1N\xc2\xffG\x9d$J}\xea\xad\xfe\xdfz\x83Cx\x80', 'keyId': 7345064297216606213}}, 'operationTime': Timestamp(1716546450, 2)}

Defined Indexes in the Mongo Compass for the collection

In Atlas Vector Search, I defined this JSON

{
  "fields": [
    {
      "numDimensions": 512,
      "path": "EuclideanEmbedding",
      "similarity": "euclidean",
      "type": "vector"
    }
  ]
}

I searched this up on the docs of MongoDB and came across the knnVector type index in Atlas Vector Search but, changing the type in the JSON gives error again.

PlanExecutor error caused by embedding index not indexed as knnVector

Answers (1)

Related Questions