|
import os |
|
from PyPDF2 import PdfReader |
|
import streamlit as st |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.vectorstores.faiss import FAISS |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.prompts import PromptTemplate |
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings |
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
import google.generativeai as genai |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
|
|
|
|
|
def extract_pdf_text(pdfs): |
|
all_text = "" |
|
for pdf in pdfs: |
|
pdf_reader = PdfReader(pdf) |
|
for page in pdf_reader.pages: |
|
all_text += page.extract_text() |
|
return all_text |
|
|
|
|
|
def split_text_into_chunks(text): |
|
splitter = RecursiveCharacterTextSplitter(chunk_size=12000, chunk_overlap=1200) |
|
text_chunks = splitter.split_text(text) |
|
return text_chunks |
|
|
|
|
|
def create_vector_store(chunks): |
|
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") |
|
vector_store = FAISS.from_texts(chunks, embedding=embeddings) |
|
vector_store.save_local("faiss_index") |
|
|
|
|
|
def setup_conversation_chain(template): |
|
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) |
|
prompt = PromptTemplate(template=template, input_variables=["context", "question"]) |
|
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) |
|
return chain |
|
|
|
|
|
def handle_user_input(mode, user_question=None): |
|
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") |
|
indexed_data = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) |
|
docs = indexed_data.similarity_search(user_question) |
|
|
|
chain = setup_conversation_chain(prompt_template[mode]) |
|
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True) |
|
return response["output_text"] |
|
|
|
|
|
prompt_template = { |
|
"chat":""" |
|
Your alias is Neural-PDF. Your task is to provide a thorough response based on the given context, ensuring all relevant details are included. |
|
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. |
|
Don't provide incorrect information.\n\n |
|
Context: \n {context}?\n |
|
Question: \n {question}\n |
|
|
|
Answer: |
|
""", |
|
"quiz":""" |
|
Your alias is Neural-PDF. Your task is to generate multiple choice questions for quiz based on the given context and requested number of questions, ensuring all relevant details are included. |
|
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. |
|
Don't provide incorrect information.\n\n |
|
Context: \n {context}?\n |
|
Question: \n {question}\n |
|
|
|
Answer: |
|
""", |
|
"long":""" |
|
Your alias is Neural-PDF. Your task is to generate long answer-type questions based on the given context and requested number of questions, ensuring all relevant details are included. |
|
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. |
|
Don't provide incorrect information.\n\n |
|
Context: \n {context}?\n |
|
Question: \n {question}\n |
|
|
|
Answer: |
|
""", |
|
} |
|
|
|
|
|
|
|
def main(): |
|
if "conversation" not in st.session_state: |
|
st.session_state.conversation = [] |
|
if "mode" not in st.session_state: |
|
st.session_state.mode="" |
|
if "file_upload" not in st.session_state: |
|
st.session_state.file_upload=False |
|
|
|
st.set_page_config(page_title="NeuralPDF", page_icon=":page_with_curl:", initial_sidebar_state="expanded", layout="wide") |
|
st.title("NeuralPDF: Interactive PDF Chat using AI 🤖") |
|
|
|
|
|
files = st.sidebar.file_uploader("Upload one or more PDF files", type="pdf", accept_multiple_files=True) |
|
if st.sidebar.button("Submit"): |
|
if files: |
|
with st.spinner("Processing..."): |
|
raw_text = extract_pdf_text(files) |
|
text_chunks = split_text_into_chunks(raw_text) |
|
create_vector_store(text_chunks) |
|
st.sidebar.success("Processing done!") |
|
st.session_state.file_upload=True |
|
|
|
|
|
with st.sidebar: |
|
if st.session_state.file_upload: |
|
|
|
|
|
modes={"Chat Conversation":"chat", "Quiz & MCQs":"quiz", "Long-Answer Questions":"long"} |
|
choose_mode = st.radio("", list(modes.keys()), index=0) |
|
st.session_state.mode=modes[choose_mode] |
|
|
|
if st.session_state.file_upload: |
|
|
|
for dialogue in st.session_state.conversation: |
|
with st.chat_message(dialogue["role"]): |
|
if st.session_state.mode != "chat" and dialogue["role"] == "assistant": |
|
st.markdown(dialogue["content"]) |
|
with st.expander("Answer"): |
|
st.markdown(dialogue["answer"]) |
|
else: st.markdown(dialogue["content"]) |
|
|
|
|
|
if prompt := st.chat_input("Type your question here"): |
|
|
|
with st.chat_message("user"): st.markdown(prompt) |
|
st.session_state.conversation.append({"role":"user", "content":prompt, "answer":""}) |
|
|
|
with st.chat_message("assistant"): |
|
response=handle_user_input(st.session_state.mode, prompt) |
|
answer="" |
|
if st.session_state.mode != "chat": |
|
answer = handle_user_input("chat", response) |
|
st.markdown(response) |
|
with st.expander("Answer"): |
|
st.markdown(answer) |
|
else: st.markdown(response) |
|
st.session_state.conversation.append({"role":"assistant", "content":response, "answer":answer}) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
main() |