Reputation: 128
try:
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)
except:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./data").load_data()
index = GPTVectorStoreIndex.from_documents(documents)
index.storage_context.persist()
@cl.on_chat_start
async def factory():
llm_predictor = LLMPredictor(
llm=ChatOpenAI(
temperature=0,
model_name="gpt-3.5-turbo",
streaming=True,
# openai_api_key=API_KEY
),
)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
chunk_size=512,
callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]),
)
query_engine = index.as_query_engine(
service_context=service_context,
streaming=True,
)
cl.user_session.set("query_engine", query_engine)
@cl.on_message
async def main(message):
query_engine = cl.user_session.get("query_engine") # type: RetrieverQueryEngine
response = await cl.make_async(query_engine.query)(message)
response_message = cl.Message(content="")
for token in response.response_gen:
await response_message.stream_token(token=token)
if response.response_txt:
response_message.content = response.response_txt
await response_message.send()
Hello folks,
A beginner here.
I have some text files in the 'data' folder How can I add custom instructions that will be followed while generating any answer. An example could be : "Answer like a pirate would" Very basic thing I can think of is : Adding this line in "About you.txt" in the data folder.
Overwhelmed. Can anyone also suggest a learning path for langchain?
Upvotes: 0
Views: 1233
Reputation: 2652
The question pertains to LlamaIndex. Following the documentation, a custom prompt can be set as follows:
from llama_index.prompts import LangchainPromptTemplate
from langchain import hub
langchain_prompt = hub.pull("rlm/rag-prompt")
lc_prompt_tmpl = LangchainPromptTemplate(
template=langchain_prompt,
template_var_mappings={"query_str": "question", "context_str": "context"},
)
query_engine.update_prompts(
{"response_synthesizer:text_qa_template": lc_prompt_tmpl}
)
Upvotes: 0
Reputation: 2836
If you are using any vector store database, you can easily achieve this with filter. I believe you have done 75% of work and created the indexes, you can further store this in vector db.
Loading and splitting the documents
def load_source_documents(folder_path):
all_documents = []
dir_loaders = [DirectoryLoader(folder_path, glob="**/*.txt")]
#print(doc_loaders)
for loader in dir_loaders:
document = loader.load()
all_documents.extend(document)
return all_documents
def split_documents(documents):
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=40)
chunked_documents = text_splitter.split_documents(documents)
return chunked_documents
Using data lake and retrieving the result from vector db, you find separate columns gets created with metadata and you can filter based on this filter. This also help that you fetch result only from relevant documents
def setup_datalake_add_documents(chunked_documents, org_id, dataset_name):
#embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
embeddings = OpenAIEmbeddings()
my_activeloop_org_id = org_id
my_activeloop_dataset_name = dataset_name
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)
# add documents to our Deep Lake dataset
#db.add_documents(chunked_documents)
return db
def query_datalake(db, query, subject):
filter={"metadata": {"source": f"output\\{subject}.txt"}}
#Distance function L2 for Euclidean, L1 for Nuclear, Max l-infinity distance, cos for cosine similarity, dot for dot product
docs = db.similarity_search(query, filter=filter, distance_metric="cos")
return docs
Upvotes: 0