Renata Ka
Renata Ka

Reputation: 91

AttributeError: 'VectorStoreIndex' object has no attribute 'documents'

I am building my first chatbot trained on my database. I've stucked and can't move forward with the last part of my code :( My code looks as follows:

import os
from llama_index import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, LLMPredictor, PromptHelper
from langchain import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch, FAISS
from langchain.document_loaders import TextLoader, PyPDFLoader, DirectoryLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import VectorStoreRetrieverMemory
from langchain.chat_models import ChatOpenAI
import gradio as gr 
os.environ["OPENAI_API_KEY"] = "key" #moj API
llm_name = "gpt-3.5-turbo"
from llama_index import ServiceContext, StorageContext, load_index_from_storage

def create_index(path):
    max_input = 4096
    num_output = 4096
    tokens = 200
    chunk_size = 500  # for LLM, we need to define chunk size

    # define prompt
    promptHelper = PromptHelper(max_input, num_output, chunk_overlap_ratio=0.1, chunk_size_limit=1024)

    # define LLM — there could be many models we can use, but in this example, let’s go with OpenAI model
    llmPredictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=tokens))

    # load data — it will take all the .txtx files, if there are more than 1
    csv_docs = SimpleDirectoryReader("/Python_script/llama/llama").load_data()
    pdf_docs = SimpleDirectoryReader("/Python_script/llama/llama").load_data()
    docs = csv_docs + pdf_docs

    # create vector index
    service_context = ServiceContext.from_defaults(llm_predictor=llmPredictor, prompt_helper=promptHelper)

    vectorIndex = GPTVectorStoreIndex.from_documents(documents=docs, service_context=service_context)
    vectorIndex.storage_context.persist(persist_dir="store_test")
import gradio as gr

from llama_index import GPTVectorStoreIndex, StorageContext, LLMPredictor, load_index_from_storage, SimpleDirectoryReader

storage_context = StorageContext.from_defaults(persist_dir="/Users/renia/Praca_dyplomowa/Python_script/store_test")
index = load_index_from_storage(storage_context)

# Define the LLMPredictor with OpenAI model
tokens = 200
predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=tokens))

# Get the documents from the index
documents = SimpleDirectoryReader("/Python_script/store_test").load_data()

# Create a new GPTVectorStoreIndex using the loaded documents
vector_index = GPTVectorStoreIndex.from_documents(documents=documents, predictor=predictor)

# Load the previously created vector index from storage using the specified index ID (or default ID)
vectorIndex = load_index_from_storage(storage_context)

# Create OpenAIEmbeddings
embedding_size = 1536  # Dimensions of the OpenAIEmbeddings
embedding_fn = OpenAIEmbeddings().embed_query

# Create DocArrayInMemorySearch retriever

def to_vector_store(vector_store_index):
    return VectorStore(vector_store_index.documents, vector_store_index.embeddings)

retriever = {
    "name": "DocArrayInMemorySearch",
    "params": {
        "vector_index": vectorIndex,
        "embedding_fn": embedding_fn,
    },
    "vectorstore": to_vector_store(vectorIndex),
}

# Create VectorStoreRetrieverMemory with the retriever
memory = VectorStoreRetrieverMemory(retriever=retriever)

# Define the chat function
def chatbot_response(message, history):
    # Convert history to a list of (user_input, bot_response) tuples
    history_list = []
    for user_input, bot_response in history:
        history_list.append((f"User: {user_input}", f"Bot: {bot_response}"))

    # Retrieve relevant memories based on previous conversations
    relevant_memories = memory.predict_new_summary(messages=history_list, previous_summary="")

    # Concatenate all relevant memories into a single string
    relevant_memories_str = "\n".join(relevant_memories)

    # Get the bot's response using the vector index, considering both the message and relevant memories
    bot_response = vectorIndex.query(relevant_memories_str + f"\nUser: {message}\n")

    # Only take the last part of the response which is the bot's response
    bot_response = bot_response.split("Bot:")[-1].strip()

    return bot_response

# Create the Gradio chat interface
gr.ChatInterface(chatbot_response,
                 title="Iron Ladies Chatbot",
                 description="Ask the Iron Ladies chatbot any question!",
                 theme="dark",
                 examples=[
                     ["Tell me about Iron Ladies.", "The Iron Ladies is a team of powerful female superheroes :)"],
                     ["What are their powers?", "Each member of the Iron Ladies has unique superpowers."],
                     ["Who is the leader?", "The leader of the Iron Ladies is Maja - natural born leader :D."],
                 ])

Once I execute the code I get the following error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[32], line 37
     27 def to_vector_store(vector_store_index):
     28     return VectorStore(vector_store_index.documents, vector_store_index.embeddings)
     31 retriever = {
     32     "name": "DocArrayInMemorySearch",
     33     "params": {
     34         "vector_index": vectorIndex,
     35         "embedding_fn": embedding_fn,
     36     },
---> 37     "vectorstore": to_vector_store(vectorIndex),
     38 }
     40 # Create VectorStoreRetrieverMemory with the retriever
     41 memory = VectorStoreRetrieverMemory(retriever=retriever)

Cell In[32], line 28, in to_vector_store(vector_store_index)
     27 def to_vector_store(vector_store_index):
---> 28     return VectorStore(vector_store_index.documents, vector_store_index.embeddings)

AttributeError: 'VectorStoreIndex' object has no attribute 'documents'

Is there anyone who have a clue how I can move forward and load the vectorIndex and give my chatbot memory?

I've tried to update my code but I'm unable to find solution. Each time I make any amendments, I get new errors relating to vectorIndex. I was trying to get some help from GPT or Bard, but they are not that smart ;)

Upvotes: 3

Views: 10663

Answers (3)

harsh
harsh

Reputation: 1

If any one stuck in saving and loading Index then you might try this

# Persist index to disk
index.storage_context.persist("naval_index")

from llama_index import StorageContext, load_index_from_storage

# Rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="naval_index")

# Load index from the storage context
new_index = load_index_from_storage(storage_context)

new_query_engine = new_index.as_query_engine()
response = new_query_engine.query("who is this text about?")
print(response)

Upvotes: 0

Melinda
Melinda

Reputation: 127

I ran into the same error, trying to build my chatbot refined on custom data. I implemented this code: https://docs.kanaries.net/tutorials/ChatGPT/how-to-train-chatgpt

To be honest, I think it is just stolen from somewhere else, because the article has been posted in July, yet much of the code turned out to be deprecated for a long time (months, years).

I "worked around" the load_index_from_storage() issue, based on Andrew Arrow's answer. I am not reading the indices after saving them, but just keeping them in memory (I know, dirty).

You can do that by modifying the create_index() function to return the index, and calling it before defining the chat interface.

I then run into another error, namely, AttributeError: 'VectorStoreIndex' object has no attribute 'query'. I see that you are using the same call, so you may encounter an error too.
The solution to that was provided by VirajOke in this thread: https://github.com/jerryjliu/llama_index/issues/2497

I will post my full code below, I just tested it and it works. Hope this helps. :)

import os
from langchain import OpenAI
import openai
import gradio as gr
import sys
key = 'your_api_key' # it's important to set the key before importing llama_index, as the library does not update environmental variables once loaded   
openai.api_key = key
os.environ["OPENAI_API_KEY"] = key
from llama_index import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, LLMPredictor, PromptHelper, load_index_from_storage

    
 
def construct_index(directory_path):
    max_input_size = 4096
    num_outputs = 512
    max_chunk_overlap = 0.1 #20
    chunk_size_limit = 600

    prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
    llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
    documents = SimpleDirectoryReader(directory_path).load_data()
    index = GPTVectorStoreIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
    index.storage_context.persist(persist_dir="index.json")
    return index

index = construct_index("docs")

def chatbot(input_text):
    query_engine = index.as_query_engine()
    response = query_engine.query(input_text)
    return response.response
 
iface = gr.Interface(fn=chatbot,
                     inputs=gr.inputs.Textbox(lines=7, label="Enter your text"),
                     outputs="text",
                     title="My AI Chatbot")
 
iface.launch(share=True)

Upvotes: 2

Andrew Arrow
Andrew Arrow

Reputation: 4585

the attribute names should be docs and predictor instead:

from llama_index import VectorStore

def to_vector_store(vector_store_index):
    return VectorStore(vector_store_index.docs, vector_store_index.predictor)

You have already created the GPTVectorStoreIndex object using the variable vector_index, but in the subsequent code, you are using vectorIndex (capital "I") instead.

Use vector_index:

def to_vector_store(vector_store_index):
    return VectorStore(vector_store_index.docs, vector_store_index.predictor)

vector_index = load_index_from_storage(storage_context)

embedding_size = 1536
embedding_fn = OpenAIEmbeddings().embed_query

retriever = {
    "name": "DocArrayInMemorySearch",
    "params": {
        "vector_index": vector_index,
        "embedding_fn": embedding_fn,
    },
    "vectorstore": to_vector_store(vector_index),
}

Upvotes: 0

Related Questions