Reputation: 49
I am trying to build a Django, Langchain Pdf Chat Web App on which Students can upload their Syllabus PDF and Ask questions about it. similar to this Website.
This is a College Project On AI & Web development.
I have learned a lot about of it and Because of limited resources and being beginner. I have created a simple django web app on which can upload pdf and ask question from it and It uses, embedding, vectorstore, and Chat-gpt API and Providing the Answers but the Accuracy of Answers is very bad and sometimes completely irrelevant.
Here is the code of the Project I create so far:
From Views.py
def upload_pdf(request):
if request.method == 'POST':
form = PDFUploadForm(request.POST, request.FILES)
if form.is_valid():
pdf_doc = form.save()
pdf_text = get_pdf_text([pdf_doc.pdf_file.path])
pdf_doc.processed_text = pdf_text
pdf_doc.save()
return redirect('process_pdf')
else:
form = PDFUploadForm()
return render(request, 'pdf/upload_pdf.html', {'form': form})
def processing_text(request):
pdf_docs = PDFDocument.objects.all()
if not pdf_docs:
return redirect('upload_pdf')
raw_text = "".join([doc.processed_text for doc in pdf_docs if doc.processed_text is not None])
# If all processed_text are None, handle accordingly
if not raw_text:
print("All processed_text are None.")
return redirect('upload_pdf')
text_chunks = get_text_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks, api_key=OPENAI_API_KEY)
vectorstore_id = str(len(vectorstore_dict))
vectorstore_dict[vectorstore_id] = vectorstore
request.session['vectorstore_id'] = vectorstore_id
return redirect('ask_question')
def ask_question(request):
if request.method == 'POST':
form = QuestionForm(request.POST)
if form.is_valid():
user_question = form.cleaned_data['question']
vectorstore_id = request.session.get('vectorstore_id')
if vectorstore_id:
vectorstore = vectorstore_dict[vectorstore_id]
conversation_chain = get_conversation_chain(vectorstore, api_key=OPENAI_API_KEY)
if 'chat_history' not in request.session:
request.session['chat_history'] = []
chat_history = request.session['chat_history']
response = conversation_chain.invoke({
'question': user_question,
'chat_history': chat_history
})
new_chat_history = response['chat_history']
request.session['chat_history'] = [str(item) for item in new_chat_history]
return render(request, 'pdf/answer.html', {'chat_history': new_chat_history})
else:
form = QuestionForm()
return render(request, 'pdf/ask_question.html', {'form': form})
here is the Code from the Utils.py
vectorstore_dict = {}
def get_pdf_text(pdf_paths):
text = ""
for path in pdf_paths:
pdf_reader = PdfReader(path)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
sections = re.split(r"(Semester \d+|Subject: [\w\s]+)", text)
text_chunks = []
for i in range(1, len(sections), 2): # Use the odd-indexed parts (headings like Semester or Subject)
heading = sections[i] # The heading (e.g., "Semester 1")
content = sections[i+1] if i+1 < len(sections) else "" # The content (e.g., subjects under Semester 1)
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=700, # Adjust based on your requirement
chunk_overlap=150
)
chunks = text_splitter.split_text(content)
text_chunks.extend([f"{heading}\n{chunk}" for chunk in chunks])
print("Generated Chunks: ", text_chunks) # Debugging: Inspect the chunks
return text_chunks
def get_vectorstore(text_chunks, api_key):
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_conversation_chain(vectorstore, api_key):
llm = ChatOpenAI(api_key=api_key)
memory = ConversationBufferMemory(return_messages=True, memory_key="chat_history")
retriever = vectorstore.as_retriever(search_type="mmr", k=5)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
memory=memory
)
return conversation_chain
Please Help, I want to Improve quality of my answers and They should be complete and have enough content to explain things properly
If Any other files of codes are required related to this project please ask I am glad to provide it.
Upvotes: 0
Views: 58