Langchain : How to store memory with streaming?

Question

I have a simple RAG app and cannot figure out how to store memory with streaming. Should save_context be part of the chain? Or do I have to handle it using some callback?

At the end of the example is answer_chain, where the last step is skipped. I believe it should be something at the end, but I cannot figure out what. I want to run a callback when streaming is finished.

Also, I split the chain into two steps, as when there is one big streaming chain, it sends documents and so on to the stout, which does not make sense, I only want messages. Is it the proper way to handle it with two separate chains?

Any ideas?

import uuid
from typing import Iterator

import dotenv
from langchain_core.messages import get_buffer_string
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, format_document
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel
from langchain_core.runnables.utils import Output

from document_index.vector import get_retriever
from operator import itemgetter
from memory import get_memory
from model import get_model

dotenv.load_dotenv()

model = get_model()
condense_question_prompt = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(condense_question_prompt)

initial_prompt = """
You are helpful AI assistant.
Answer the question based only on the context below.

### Context start ###
{context}

### Context end ###

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(initial_prompt)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

retriever = get_retriever()


def _get_memory_with_session_id(session_id):
    return get_memory(session_id)


def _combine_documents(docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="

"):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)


def search(session_id, query) -> Iterator[Output]:
    memory = _get_memory_with_session_id(session_id)

    def _save_context(inputs, answer):
        memory.save_context(inputs, {"answer": answer})

    loaded_memory = RunnablePassthrough.assign(
        chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
    )
    standalone_question = {
        "standalone_question": {
                                   "question": lambda x: x["question"],
                                   "chat_history": lambda x: get_buffer_string(x["chat_history"]),
                               }
                               | CONDENSE_QUESTION_PROMPT
                               | model
                               | StrOutputParser()
    }

    retrieved_documents = {
        "docs": itemgetter("standalone_question") | retriever,
        "question": lambda x: x["standalone_question"],
    }

    preparation_chain = loaded_memory | standalone_question | retrieved_documents
    memory.load_memory_variables({})
    inputs = {"question": query}
    docs = preparation_chain.invoke(inputs)

    answer_chain = (
            {"docs": RunnablePassthrough()}
            | {
                "context": lambda x: _combine_documents(x["docs"]),
                "question": itemgetter("question"),
            }
            | ANSWER_PROMPT
            | model
            | StrOutputParser()
          # | RunnableLambda(_save_context, ????query_argument, ????MODEL_ANSWER)
    )

    return answer_chain.stream(docs)


if __name__ == "__main__":
    session_id = str(uuid.uuid4())
    query = "Where to buy beer?"
    for result in search(session_id, query):
        print(result, end="")

Langchain : How to store memory with streaming?

Answers (1)

Related Questions