Reputation: 21
I wanted to write Rag and using llama3 on Google Colab and I used the following code:
#### INDEXING ####
# Load Documents
loader = WebBaseLoader(
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
bs_kwargs=dict(
parse_only=bs4.SoupStrainer(
class_=("post-content", "post-title", "post-header")
)
),
)
docs = loader.load()
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
# Embed
vectorstore = Chroma.from_documents(
documents=splits,
embedding=OllamaEmbeddings(model=local_llm)
)
retriever = vectorstore.as_retriever()
But when it comes to Chroma.from_documents
, I get the following error:
ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/embeddings (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7c949f725420>: Failed to establish a new connection: [Errno 111] Connection refused'))
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-2332286f4c58> in <cell line: 29>()
27
28 # Embed
---> 29 vectorstore = Chroma.from_documents(
30 documents=splits,
31 embedding=OllamaEmbeddings(model=local_llm)
/usr/local/lib/python3.10/dist-packages/langchain_community/vectorstores/chroma.py in from_documents(cls, documents, embedding, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)
788 texts = [doc.page_content for doc in documents]
789 metadatas = [doc.metadata for doc in documents]
--> 790 return cls.from_texts(
791 texts=texts,
792 embedding=embedding,
/usr/local/lib/python3.10/dist-packages/langchain_community/vectorstores/chroma.py in from_texts(cls, texts, embedding, metadatas, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)
746 documents=texts,
747 ):
--> 748 chroma_collection.add_texts(
749 texts=batch[3] if batch[3] else [],
750 metadatas=batch[2] if batch[2] else None,
/usr/local/lib/python3.10/dist-packages/langchain_community/vectorstores/chroma.py in add_texts(self, texts, metadatas, ids, **kwargs)
274 texts = list(texts)
275 if self._embedding_function is not None:
--> 276 embeddings = self._embedding_function.embed_documents(texts)
277 if metadatas:
278 # fill metadatas with empty dicts if somebody
/usr/local/lib/python3.10/dist-packages/langchain_community/embeddings/ollama.py in embed_documents(self, texts)
209 """
210 instruction_pairs = [f"{self.embed_instruction}{text}" for text in texts]
--> 211 embeddings = self._embed(instruction_pairs)
212 return embeddings
213
/usr/local/lib/python3.10/dist-packages/langchain_community/embeddings/ollama.py in _embed(self, input)
197 else:
198 iter_ = input
--> 199 return [self._process_emb_response(prompt) for prompt in iter_]
200
201 def embed_documents(self, texts: List[str]) -> List[List[float]]:
/usr/local/lib/python3.10/dist-packages/langchain_community/embeddings/ollama.py in <listcomp>(.0)
197 else:
198 iter_ = input
--> 199 return [self._process_emb_response(prompt) for prompt in iter_]
200
201 def embed_documents(self, texts: List[str]) -> List[List[float]]:
/usr/local/lib/python3.10/dist-packages/langchain_community/embeddings/ollama.py in _process_emb_response(self, input)
168 )
169 except requests.exceptions.RequestException as e:
--> 170 raise ValueError(f"Error raised by inference endpoint: {e}")
171
172 if res.status_code != 200:
ValueError: Error raised by inference endpoint: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/embeddings (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7c949f725420>: Failed to establish a new connection: [Errno 111] Connection refused'))
I used:
! pip install langchain
and ! pip install chromadb
for installing needed libraries.
Upvotes: 2
Views: 610