Reputation: 11
I need to do abstractive text summarization with LLM Llama 3.1 8B Instruct. My question is how do I do this methodologically?
Here is how I propose going about it
Does this aforementioned procedure suffice or I might need additional input?
Appliyng parameter training for LLM
Upvotes: 0
Views: 2845
Reputation: 2294
I will provide a code snippet, this will guide you on how to build your own LLM. I would advise you learn some methodologies such as RAG-Rank, RAG-Fusion.
The first step is to download ollama
follow this link always makes sure the app is running whenever you want to run the python class which I have provided below.
import ollama
from operator import itemgetter
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredPDFLoader
from ollama.embeddings import OllamaEmbeddings
from ollama import ChatOllama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.runnables import RunnablePassthrough
# Define a prompt for the RAG model
SYSTEM_PROMPT = """ You are very helpful assistant and also
an expert in summarizing documents,
use all available resources to generate answers to the questions asked.
You can return queries that you think are relevant to the question asked. \
You will be asked to summarize a document here is the question: {question} \n
"""
def read_pdf_file(data):
"""
Retrieve the data from the file
Args: data: The data to be retrieved
Returns: list: List of documents
"""
local_path = "your_pdf_file_path"
try:
# Local PDF file uploads
if local_path:
loader = UnstructuredPDFLoader(file_path=local_path)
data = loader.load()
else:
logging.error("Upload a PDF file")
except Exception as e:
raise # Re-raise the exception to stop further execution
# Split and chunk
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)
try:
ollama.embeddings(
model="llama_3.1_8B_instruct",
# prompt='Llamas are members of the camelid family',
)
embedding_model = (OllamaEmbeddings(model="llama_3.1_8B_instruct"),)
vectorstore_db = FAISS.from_documents(
documents=chunks, embedding=embedding_model
)
vectorstore_db.save_local("faiss_index")
vector_retriever = vectorstore_db.as_retriever()
except Exception as e:
raise # Re-raise the exception to stop further execution
return vector_retriever
# RAG FUSION
def main(self) -> str:
"""
Implement the RAG fusion
Args:
question: The question to be answered
Returns:
str: The answer to the question
"""
LLM_PROMPT = PromptTemplate(
input_variables=["question"],
template="""You are an AI language model assistant. Your task is to generate summaries. Provide these alternative questions separated by newlines.
Original question: {question}""",
)
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
local_model = "llama_3.1_8B_instruct." #find the real name from ollama and download it
llm = ChatOllama(model=local_model)
retriever = read_pdf_file(self.data)
retriever = MultiQueryRetriever.from_llm(
retriever, llm, prompt=LLM_PROMPT
)
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
response = chain.invoke("summarize this documents about climate change")
print(response)
Upvotes: 0