Reputation: 2358
My code is
import os
import sys
import transformers
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from llama_index import Document, GPTVectorStoreIndex
os.environ['OPENAI_API_KEY'] = 'my-openapi-key'
# Load the hugging face model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# Create a Document object for each text file in the directory
documents = []
for filename in os.listdir("data"):
with open(os.path.join("data", filename), "r") as f:
print(filename)
documents.append(Document(filename, f.read()))
# Create a GPTVectorStoreIndex object from a list of Document objects
index = GPTVectorStoreIndex.from_documents(documents)
# Index the documents
index.index()
# Query the index
query = "What is the capital of France?"
predictions = index.query(query)
# Print the predictions
for prediction in predictions:
print(prediction)
My input contains just one file with one line. On running this code , I am getting below error:
RateLimitError Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:382, in Retrying.__call__(self, fn, *args, **kwargs)
381 try:
--> 382 result = fn(*args, **kwargs)
383 except BaseException: # noqa: B902
File ~/.local/lib/python3.10/site-packages/llama_index/embeddings/openai.py:149, in get_embeddings(list_of_text, engine, **kwargs)
147 list_of_text = [text.replace("\n", " ") for text in list_of_text]
--> 149 data = openai.Embedding.create(input=list_of_text, model=engine, **kwargs).data
150 return [d["embedding"] for d in data]
File ~/.local/lib/python3.10/site-packages/openai/api_resources/embedding.py:33, in Embedding.create(cls, *args, **kwargs)
32 try:
---> 33 response = super().create(*args, **kwargs)
35 # If a user specifies base64, we'll just return the encoded string.
36 # This is only for the default case.
File ~/.local/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py:153, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)
138 (
139 deployment_id,
140 engine,
(...)
150 api_key, api_base, api_type, api_version, organization, **params
151 )
--> 153 response, _, api_key = requestor.request(
154 "post",
155 url,
156 params=params,
157 headers=headers,
158 stream=stream,
159 request_id=request_id,
160 request_timeout=request_timeout,
161 )
163 if stream:
164 # must be an iterator
File ~/.local/lib/python3.10/site-packages/openai/api_requestor.py:230, in APIRequestor.request(self, method, url, params, headers, files, stream, request_id, request_timeout)
220 result = self.request_raw(
221 method.lower(),
222 url,
(...)
228 request_timeout=request_timeout,
229 )
--> 230 resp, got_stream = self._interpret_response(result, stream)
231 return resp, got_stream, self.api_key
File ~/.local/lib/python3.10/site-packages/openai/api_requestor.py:624, in APIRequestor._interpret_response(self, result, stream)
622 else:
623 return (
--> 624 self._interpret_response_line(
625 result.content.decode("utf-8"),
626 result.status_code,
627 result.headers,
628 stream=False,
629 ),
630 False,
631 )
File ~/.local/lib/python3.10/site-packages/openai/api_requestor.py:687, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream)
686 if stream_error or not 200 <= rcode < 300:
--> 687 raise self.handle_error_response(
688 rbody, rcode, resp.data, rheaders, stream_error=stream_error
689 )
690 return resp
RateLimitError: You exceeded your current quota, please check your plan and billing details.
The above exception was the direct cause of the following exception:
RetryError Traceback (most recent call last)
Cell In[13], line 24
21 documents.append(Document(filename, f.read()))
23 # Create a GPTVectorStoreIndex object from a list of Document objects
---> 24 index = GPTVectorStoreIndex.from_documents(documents)
26 # Index the documents
27 #index.index()
29 try:
File ~/.local/lib/python3.10/site-packages/llama_index/indices/base.py:93, in BaseGPTIndex.from_documents(cls, documents, storage_context, service_context, **kwargs)
89 docstore.set_document_hash(doc.get_doc_id(), doc.get_doc_hash())
91 nodes = service_context.node_parser.get_nodes_from_documents(documents)
---> 93 return cls(
94 nodes=nodes,
95 storage_context=storage_context,
96 service_context=service_context,
97 **kwargs,
98 )
File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:43, in GPTVectorStoreIndex.__init__(self, nodes, index_struct, service_context, storage_context, use_async, **kwargs)
41 """Initialize params."""
42 self._use_async = use_async
---> 43 super().__init__(
44 nodes=nodes,
45 index_struct=index_struct,
46 service_context=service_context,
47 storage_context=storage_context,
48 **kwargs,
49 )
File ~/.local/lib/python3.10/site-packages/llama_index/indices/base.py:65, in BaseGPTIndex.__init__(self, nodes, index_struct, storage_context, service_context, **kwargs)
63 if index_struct is None:
64 assert nodes is not None
---> 65 index_struct = self.build_index_from_nodes(nodes)
66 self._index_struct = index_struct
67 self._storage_context.index_store.add_index_struct(self._index_struct)
File ~/.local/lib/python3.10/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter.<locals>.wrap.<locals>.wrapped_llm_predict(_self, *args, **kwargs)
76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
77 with wrapper_logic(_self):
---> 78 f_return_val = f(_self, *args, **kwargs)
80 return f_return_val
File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:187, in GPTVectorStoreIndex.build_index_from_nodes(self, nodes)
179 @llm_token_counter("build_index_from_nodes")
180 def build_index_from_nodes(self, nodes: Sequence[Node]) -> IndexDict:
181 """Build the index from nodes.
182
183 NOTE: Overrides BaseGPTIndex.build_index_from_nodes.
184 GPTVectorStoreIndex only stores nodes in document store
185 if vector store does not store text
186 """
--> 187 return self._build_index_from_nodes(nodes)
File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:176, in GPTVectorStoreIndex._build_index_from_nodes(self, nodes)
174 run_async_tasks(tasks)
175 else:
--> 176 self._add_nodes_to_index(index_struct, nodes)
177 return index_struct
File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:152, in GPTVectorStoreIndex._add_nodes_to_index(self, index_struct, nodes)
146 def _add_nodes_to_index(
147 self,
148 index_struct: IndexDict,
149 nodes: Sequence[Node],
150 ) -> None:
151 """Add document to index."""
--> 152 embedding_results = self._get_node_embedding_results(nodes)
153 new_ids = self._vector_store.add(embedding_results)
155 if not self._vector_store.stores_text:
156 # NOTE: if the vector store doesn't store text,
157 # we need to add the nodes to the index struct and document store
File ~/.local/lib/python3.10/site-packages/llama_index/indices/vector_store/base.py:84, in GPTVectorStoreIndex._get_node_embedding_results(self, nodes)
78 id_to_embed_map[n.get_doc_id()] = n.embedding
80 # call embedding model to get embeddings
81 (
82 result_ids,
83 result_embeddings,
---> 84 ) = self._service_context.embed_model.get_queued_text_embeddings()
85 for new_id, text_embedding in zip(result_ids, result_embeddings):
86 id_to_embed_map[new_id] = text_embedding
File ~/.local/lib/python3.10/site-packages/llama_index/embeddings/base.py:167, in BaseEmbedding.get_queued_text_embeddings(self)
165 cur_batch_ids = [text_id for text_id, _ in cur_batch]
166 cur_batch_texts = [text for _, text in cur_batch]
--> 167 embeddings = self._get_text_embeddings(cur_batch_texts)
168 result_ids.extend(cur_batch_ids)
169 result_embeddings.extend(embeddings)
File ~/.local/lib/python3.10/site-packages/llama_index/embeddings/openai.py:253, in OpenAIEmbedding._get_text_embeddings(self, texts)
246 def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
247 """Get text embeddings.
248
249 By default, this is a wrapper around _get_text_embedding.
250 Can be overriden for batch queries.
251
252 """
--> 253 return get_embeddings(
254 texts, engine=self.text_engine, deployment_id=self.deployment_name
255 )
File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:289, in BaseRetrying.wraps.<locals>.wrapped_f(*args, **kw)
287 @functools.wraps(f)
288 def wrapped_f(*args: t.Any, **kw: t.Any) -> t.Any:
--> 289 return self(f, *args, **kw)
File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:379, in Retrying.__call__(self, fn, *args, **kwargs)
377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
378 while True:
--> 379 do = self.iter(retry_state=retry_state)
380 if isinstance(do, DoAttempt):
381 try:
File ~/.local/lib/python3.10/site-packages/tenacity/__init__.py:326, in BaseRetrying.iter(self, retry_state)
324 if self.reraise:
325 raise retry_exc.reraise()
--> 326 raise retry_exc from fut.exception()
328 if self.wait:
329 sleep = self.wait(retry_state)
RetryError: RetryError[<Future at 0x7f6cd45685b0 state=finished raised RateLimitError>]
Upvotes: 3
Views: 4127
Reputation: 1835
Library seems to be getting updated all the time, this is what worked for me at the time of writing. (Using version 0.7.10
)
from llama_index import ServiceContext, VectorStoreIndex
service_context = ServiceContext.from_defaults(chunk_size=512)
index = VectorStoreIndex.from_documents(
documents,
service_context=service_context,
show_progress=True
)
Upvotes: 3
Reputation: 1
I had the same issue. Llama index didn't handle rate limits properly for free trial accounts, however, after upgrading llama index, the issue was fixed. They fixed it in the latest versions.
Upvotes: 0