Ankit Bansal
Ankit Bansal

Reputation: 2358

RateLimit error llama_index code with openai api key

My code is

import os
import sys

import transformers
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from llama_index import Document, GPTVectorStoreIndex

os.environ['OPENAI_API_KEY'] = 'my-openapi-key'

# Load the hugging face model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Create a Document object for each text file in the directory
documents = []
for filename in os.listdir("data"):
    with open(os.path.join("data", filename), "r") as f:
        print(filename)
        documents.append(Document(filename, f.read()))

# Create a GPTVectorStoreIndex object from a list of Document objects
index = GPTVectorStoreIndex.from_documents(documents)

# Index the documents
index.index()


# Query the index
query = "What is the capital of France?"
predictions = index.query(query)

# Print the predictions
for prediction in predictions:
    print(prediction)

My input contains just one file with one line. On running this code , I am getting below error:

RateLimitError                            Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:382, in Retrying.__call__(self, fn, *args, **kwargs)
    381 try:
--> 382     result = fn(*args, **kwargs)
    383 except BaseException:  # noqa: B902

File ~/.local/lib/python3.10/site-packages/llama_index/embeddings/openai.py:149, in get_embeddings(list_of_text, engine, **kwargs)
    147 list_of_text = [text.replace("\n", " ") for text in list_of_text]
--> 149 data = openai.Embedding.create(input=list_of_text, model=engine, **kwargs).data
    150 return [d["embedding"] for d in data]

File ~/.local/lib/python3.10/site-packages/openai/api_resources/embedding.py:33, in Embedding.create(cls, *args, **kwargs)
     32 try:
---> 33     response = super().create(*args, **kwargs)
     35     # If a user specifies base64, we'll just return the encoded string.
     36     # This is only for the default case.

File ~/.local/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py:153, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)
    138 (
    139     deployment_id,
    140     engine,
   (...)
    150     api_key, api_base, api_type, api_version, organization, **params
    151 )
--> 153 response, _, api_key = requestor.request(
    154     "post",
    155     url,
    156     params=params,
    157     headers=headers,
    158     stream=stream,
    159     request_id=request_id,
    160     request_timeout=request_timeout,
    161 )
    163 if stream:
    164     # must be an iterator

File ~/.local/lib/python3.10/site-packages/openai/api_requestor.py:230, in APIRequestor.request(self, method, url, params, headers, files, stream, request_id, request_timeout)
    220 result = self.request_raw(
    221     method.lower(),
    222     url,
   (...)
    228     request_timeout=request_timeout,
    229 )
--> 230 resp, got_stream = self._interpret_response(result, stream)
    231 return resp, got_stream, self.api_key

File ~/.local/lib/python3.10/site-packages/openai/api_requestor.py:624, in APIRequestor._interpret_response(self, result, stream)
    622 else:
    623     return (
--> 624         self._interpret_response_line(
    625             result.content.decode("utf-8"),
    626             result.status_code,
    627             result.headers,
    628             stream=False,
    629         ),
    630         False,
    631     )

File ~/.local/lib/python3.10/site-packages/openai/api_requestor.py:687, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream)
    686 if stream_error or not 200 <= rcode < 300:
--> 687     raise self.handle_error_response(
    688         rbody, rcode, resp.data, rheaders, stream_error=stream_error
    689     )
    690 return resp

RateLimitError: You exceeded your current quota, please check your plan and billing details.

The above exception was the direct cause of the following exception:

RetryError                                Traceback (most recent call last)
Cell In[13], line 24
     21         documents.append(Document(filename, f.read()))
     23 # Create a GPTVectorStoreIndex object from a list of Document objects
---> 24 index = GPTVectorStoreIndex.from_documents(documents)
     26 # Index the documents
     27 #index.index()
     29 try:

File ~/.local/lib/python3.10/site-packages/llama_index/indices/base.py:93, in BaseGPTIndex.from_documents(cls, documents, storage_context, service_context, **kwargs)
     89     docstore.set_document_hash(doc.get_doc_id(), doc.get_doc_hash())
     91 nodes = service_context.node_parser.get_nodes_from_documents(documents)
---> 93 return cls(
     94     nodes=nodes,
     95     storage_context=storage_context,
     96     service_context=service_context,
     97     **kwargs,
     98 )

File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:43, in GPTVectorStoreIndex.__init__(self, nodes, index_struct, service_context, storage_context, use_async, **kwargs)
     41 """Initialize params."""
     42 self._use_async = use_async
---> 43 super().__init__(
     44     nodes=nodes,
     45     index_struct=index_struct,
     46     service_context=service_context,
     47     storage_context=storage_context,
     48     **kwargs,
     49 )

File ~/.local/lib/python3.10/site-packages/llama_index/indices/base.py:65, in BaseGPTIndex.__init__(self, nodes, index_struct, storage_context, service_context, **kwargs)
     63 if index_struct is None:
     64     assert nodes is not None
---> 65     index_struct = self.build_index_from_nodes(nodes)
     66 self._index_struct = index_struct
     67 self._storage_context.index_store.add_index_struct(self._index_struct)

File ~/.local/lib/python3.10/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter.<locals>.wrap.<locals>.wrapped_llm_predict(_self, *args, **kwargs)
     76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
     77     with wrapper_logic(_self):
---> 78         f_return_val = f(_self, *args, **kwargs)
     80     return f_return_val

File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:187, in GPTVectorStoreIndex.build_index_from_nodes(self, nodes)
    179 @llm_token_counter("build_index_from_nodes")
    180 def build_index_from_nodes(self, nodes: Sequence[Node]) -> IndexDict:
    181     """Build the index from nodes.
    182 
    183     NOTE: Overrides BaseGPTIndex.build_index_from_nodes.
    184         GPTVectorStoreIndex only stores nodes in document store
    185         if vector store does not store text
    186     """
--> 187     return self._build_index_from_nodes(nodes)

File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:176, in GPTVectorStoreIndex._build_index_from_nodes(self, nodes)
    174     run_async_tasks(tasks)
    175 else:
--> 176     self._add_nodes_to_index(index_struct, nodes)
    177 return index_struct

File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:152, in GPTVectorStoreIndex._add_nodes_to_index(self, index_struct, nodes)
    146 def _add_nodes_to_index(
    147     self,
    148     index_struct: IndexDict,
    149     nodes: Sequence[Node],
    150 ) -> None:
    151     """Add document to index."""
--> 152     embedding_results = self._get_node_embedding_results(nodes)
    153     new_ids = self._vector_store.add(embedding_results)
    155     if not self._vector_store.stores_text:
    156         # NOTE: if the vector store doesn't store text,
    157         # we need to add the nodes to the index struct and document store

File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:84, in GPTVectorStoreIndex._get_node_embedding_results(self, nodes)
     78         id_to_embed_map[n.get_doc_id()] = n.embedding
     80 # call embedding model to get embeddings
     81 (
     82     result_ids,
     83     result_embeddings,
---> 84 ) = self._service_context.embed_model.get_queued_text_embeddings()
     85 for new_id, text_embedding in zip(result_ids, result_embeddings):
     86     id_to_embed_map[new_id] = text_embedding

File ~/.local/lib/python3.10/site-packages/llama_index/embeddings/base.py:167, in BaseEmbedding.get_queued_text_embeddings(self)
    165 cur_batch_ids = [text_id for text_id, _ in cur_batch]
    166 cur_batch_texts = [text for _, text in cur_batch]
--> 167 embeddings = self._get_text_embeddings(cur_batch_texts)
    168 result_ids.extend(cur_batch_ids)
    169 result_embeddings.extend(embeddings)

File ~/.local/lib/python3.10/site-packages/llama_index/embeddings/openai.py:253, in OpenAIEmbedding._get_text_embeddings(self, texts)
    246 def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
    247     """Get text embeddings.
    248 
    249     By default, this is a wrapper around _get_text_embedding.
    250     Can be overriden for batch queries.
    251 
    252     """
--> 253     return get_embeddings(
    254         texts, engine=self.text_engine, deployment_id=self.deployment_name
    255     )

File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:289, in BaseRetrying.wraps.<locals>.wrapped_f(*args, **kw)
    287 @functools.wraps(f)
    288 def wrapped_f(*args: t.Any, **kw: t.Any) -> t.Any:
--> 289     return self(f, *args, **kw)

File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:379, in Retrying.__call__(self, fn, *args, **kwargs)
    377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
    378 while True:
--> 379     do = self.iter(retry_state=retry_state)
    380     if isinstance(do, DoAttempt):
    381         try:

File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:326, in BaseRetrying.iter(self, retry_state)
    324     if self.reraise:
    325         raise retry_exc.reraise()
--> 326     raise retry_exc from fut.exception()
    328 if self.wait:
    329     sleep = self.wait(retry_state)

RetryError: RetryError[<Future at 0x7f6cd45685b0 state=finished raised RateLimitError>]

Upvotes: 3

Views: 4127

Answers (2)

usersina
usersina

Reputation: 1835

Library seems to be getting updated all the time, this is what worked for me at the time of writing. (Using version 0.7.10)

from llama_index import ServiceContext, VectorStoreIndex

service_context = ServiceContext.from_defaults(chunk_size=512)
index = VectorStoreIndex.from_documents(
    documents,
    service_context=service_context,
    show_progress=True
)

Upvotes: 3

PaulSaul
PaulSaul

Reputation: 1

I had the same issue. Llama index didn't handle rate limits properly for free trial accounts, however, after upgrading llama index, the issue was fixed. They fixed it in the latest versions.

Upvotes: 0

Related Questions