import os |
from PyPDF2 import PdfReader |
import streamlit as st |
from langchain.text_splitter import RecursiveCharacterTextSplitter |
from langchain_community.vectorstores.faiss import FAISS |
from langchain.chains.question_answering import load_qa_chain |
from langchain.prompts import PromptTemplate |
from langchain_google_genai import GoogleGenerativeAIEmbeddings |
from langchain_google_genai import ChatGoogleGenerativeAI |
import google.generativeai as genai |
from dotenv import load_dotenv |
load_dotenv() |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
def extract_pdf_text(pdfs): |
all_text = "" |
for pdf in pdfs: |
pdf_reader = PdfReader(pdf) |
for page in pdf_reader.pages: |
all_text += page.extract_text() |
return all_text |
def split_text_into_chunks(text): |
splitter = RecursiveCharacterTextSplitter(chunk_size=12000, chunk_overlap=1200) |
text_chunks = splitter.split_text(text) |
return text_chunks |
def create_vector_store(chunks): |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") |
vector_store = FAISS.from_texts(chunks, embedding=embeddings) |
vector_store.save_local("faiss_index") |
def setup_conversation_chain(template): |
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) |
prompt = PromptTemplate(template=template, input_variables=["context", "question"]) |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) |
return chain |
def handle_user_input(mode, user_question=None): |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") |
indexed_data = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) |
docs = indexed_data.similarity_search(user_question) |
chain = setup_conversation_chain(prompt_template[mode]) |
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True) |
return response["output_text"] |
prompt_template = { |
"chat":""" |
Your alias is Neural-PDF. Your task is to provide a thorough response based on the given context, ensuring all relevant details are included. |
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. |
Don't provide incorrect information.\n\n |
Context: \n {context}?\n |
Question: \n {question}\n |
Answer: |
""", |
"quiz":""" |
Your alias is Neural-PDF. Your task is to generate multiple choice questions for quiz based on the given context and requested number of questions, ensuring all relevant details are included. |
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. |
Don't provide incorrect information.\n\n |
Context: \n {context}?\n |
Question: \n {question}\n |
Answer: |
""", |
"long":""" |
Your alias is Neural-PDF. Your task is to generate long answer-type questions based on the given context and requested number of questions, ensuring all relevant details are included. |
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. |
Don't provide incorrect information.\n\n |
Context: \n {context}?\n |
Question: \n {question}\n |
Answer: |
""", |
} |
def main(): |
if "conversation" not in st.session_state: |
st.session_state.conversation = [] |
if "mode" not in st.session_state: |
st.session_state.mode="" |
if "file_upload" not in st.session_state: |
st.session_state.file_upload=False |
st.set_page_config(page_title="NeuralPDF", page_icon=":page_with_curl:", initial_sidebar_state="expanded", layout="wide") |
st.title("NeuralPDF: Interactive PDF Chat using AI 🤖") |
files = st.sidebar.file_uploader("Upload one or more PDF files", type="pdf", accept_multiple_files=True) |
if st.sidebar.button("Submit"): |
if files: |
with st.spinner("Processing..."): |
raw_text = extract_pdf_text(files) |
text_chunks = split_text_into_chunks(raw_text) |
create_vector_store(text_chunks) |
st.sidebar.success("Processing done!") |
st.session_state.file_upload=True |
with st.sidebar: |
if st.session_state.file_upload: |
modes={"Chat Conversation":"chat", "Quiz & MCQs":"quiz", "Long-Answer Questions":"long"} |
choose_mode = st.radio("", list(modes.keys()), index=0) |
st.session_state.mode=modes[choose_mode] |
if st.session_state.file_upload: |
for dialogue in st.session_state.conversation: |
with st.chat_message(dialogue["role"]): |
if st.session_state.mode != "chat" and dialogue["role"] == "assistant": |
st.markdown(dialogue["content"]) |
with st.expander("Answer"): |
st.markdown(dialogue["answer"]) |
else: st.markdown(dialogue["content"]) |
if prompt := st.chat_input("Type your question here"): |
with st.chat_message("user"): st.markdown(prompt) |
st.session_state.conversation.append({"role":"user", "content":prompt, "answer":""}) |
with st.chat_message("assistant"): |
response=handle_user_input(st.session_state.mode, prompt) |
answer="" |
if st.session_state.mode != "chat": |
answer = handle_user_input("chat", response) |
st.markdown(response) |
with st.expander("Answer"): |
st.markdown(answer) |
else: st.markdown(response) |
st.session_state.conversation.append({"role":"assistant", "content":response, "answer":answer}) |
if __name__ == "__main__": |
main() |