Gabriel Lisboa
Gabriel Lisboa

Reputation: 1

ScoredVector has no attribute 'metadata' at ['['received_data', 'matches', 0]']['metadata']

I'm building a chatbot RAG using HuggingFace, Mistral, LangChain and Pinecone.

I have a Python Script to watch changes in my MongoDB collection and send the data to Pinecone as a vector.

import os
from pymongo import MongoClient
from pinecone import Pinecone, ServerlessSpec
from pymongo.errors import OperationFailure
from sentence_transformers import SentenceTransformer, util
from certifi import where  # Import certifi library

# mongodb stuff
client = MongoClient(
    "mongodb+srv://db_userAdmin:[email protected]/?retryWrites=true&w=majority",
    tls=True,  # Enable TLS encryption
    tlsAllowInvalidCertificates=False,  # Don't allow invalid certificates
    tlsCAFile=where()  # Use certifi library for CA bundle
)
db = client['test']
collection = db['reflections']

# Pinecone initialization
pc = Pinecone(api_key='f6cb822c-79ac-4ad4-bd11-1474dd49701d')
index = pc.Index("langchain-demo")

# transformer stuff
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
test_vector = model.encode("This is a test string")
print("Dimension of test vector:", test_vector.shape)

# Watch for changes
try:
  cursor = collection.watch()
  for change in cursor:
    print("Change detected:", change)
    if change['operationType'] == 'insert':
      document = change['fullDocument']
      vector = model.encode(document['content']).tolist()
      print("Extracted Vector:", vector)

      # Extract document ID from ObjectId
      document_id = str(document['_id'])
      user_id = str(document['user'])
      created_at = document['createdAt']

      # Wrap upsert call with empty vector check
      if vector:  # Check if vector is not empty
        metadata = {'user_id': user_id, 'created_at': str(created_at)}
        index.upsert([(document_id, vector, metadata)])

    elif change['operationType'] == 'update':
      document_id = str(change['documentKey']['_id'])
      updated_fields = change['updateDescription']['updatedFields']
      if 'content' in updated_fields:
            vector = model.encode(updated_fields['content']).tolist()
            index.upsert([(document_id, vector, metadata)])

    elif change['operationType'] == 'delete':
      document_id = str(change['documentKey']['_id'])
      index.delete(ids=[document_id])

except OperationFailure as e:
  print("Error watching collection:", e)
except Exception as e:
  print("An error occurred:", e)

This my chatbot python script:

from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone as PineconeStore
from langchain_huggingface import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

class ChatBot():
    def __init__(self):
        load_dotenv()

        self.pc = Pinecone(
            api_key=os.getenv("PINECONE_API_KEY"),
        )

        self.index_name = "langchain-demo"
        self.index = self.pc.Index(self.index_name)

        # Initialize embeddings and Pinecone store for document retrieval
        self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        self.docsearch = PineconeStore.from_existing_index(self.index_name, self.embeddings)

        # Setup LLM
        self.llm = HuggingFaceEndpoint(
            repo_id='mistralai/Mixtral-8x7B-Instruct-v0.1',
            temperature=0.8, top_p=0.8, top_k=50,
            huggingfacehub_api_token=os.getenv('HUGGINGFACE_API_KEY')
        )

        # Define the prompt template
        self.template = PromptTemplate(template="""
            You are a seer. These Humans will ask you questions about their life. Use the following piece of context to answer the question. 
            If you don't know the answer, just say you don't know.
            You answer with short and concise answers, no longer than 2 sentences.

            Context: {context}
            Question: {question}
            Answer: 
        """, input_variables=["context", "question"])

        self.rag_chain = (
            {"context": self.docsearch.as_retriever(), "question": RunnablePassthrough()}
            | self.template
            | self.llm
            | StrOutputParser()
        )

    def get_user_data_from_pinecone(self, user_id):
        # Perform a query with metadata filtering
        results = self.index.query(
            vector=[0]*384,  # Example vector; adjust according to your actual vector size
            filter={"user_id": user_id},
            top_k=10,
            include_metadata=True,
            include_values=True
        )
        return results
    
# Usage example outside the class
bot = ChatBot()
user_id = input("Enter your user ID: ")
user_data = bot.get_user_data_from_pinecone(user_id)

question = input("Ask me anything: ")
result = bot.rag_chain.invoke(question)
print("AI's response:", result)

The problem is I'm receiving this error:

raise PineconeApiAttributeError( pinecone.core.client.exceptions.PineconeApiAttributeError: ScoredVector has no attribute 'metadata' at ['['received_data', 'matches', 0]']['metadata']

Not sure what is the problem.

Upvotes: 0

Views: 410

Answers (1)

steve-ed
steve-ed

Reputation: 105

The error encountered PineconeApiAttributeError: ScoredVector has no attribute 'metadata requests about the metadata is missing from the vectors returned by the Pinecone query even though you set include_metadata = True . The structure or keys of the metadata might not match what is expected in the query.

May be because when inserting vectors, the metadata is not correctly included in the upsert operation.

To ensure that the metadata is correctly included in the upsert operation , update the get_user_data_from_pinecone at the end before the return statement :

if 'matches' in result :
   for _match in results['matches']:
         if 'metadata' in _match:
            print("Metadata for vector:", _match['metadata'])
         else:
            print("No metadata found for vector:", _match)
else:
   print('No match founded')
return results
    

Check query handling (mentioned in the documentation of Pinecone) , the script will help you understand the reason why metadata is missing and ensure it is correctly indexed and retrieved in your queries.

Upvotes: 0

Related Questions