Ragpdfbot / app.py
tarrasyed19472007's picture
Update app.py
a1d1f5b verified
import streamlit as st
import fitz # PyMuPDF
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch
# Load the RAG model components
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq")
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
doc = fitz.open(pdf_file)
text = ""
for page in doc:
text += page.get_text()
return text
# Function to handle question answering
def answer_question(question, pdf_text):
# Prepare the context for the model
inputs = tokenizer(question, return_tensors="pt")
# Tokenize PDF text
pdf_inputs = tokenizer(pdf_text, return_tensors="pt")
# Generate the answer
with torch.no_grad():
outputs = model.generate(input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
context_input_ids=pdf_inputs['input_ids'],
context_attention_mask=pdf_inputs['attention_mask'])
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
return answer
# Streamlit app
st.title("PDF Question-Answer Chatbot")
st.write("Upload a PDF file and ask questions based on its content.")
# File uploader
pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
if pdf_file is not None:
# Extract text from the PDF
pdf_text = extract_text_from_pdf(pdf_file)
st.success("PDF loaded successfully!")
# Question input
question = st.text_input("Ask a question:")
if question:
with st.spinner("Finding answer..."):
try:
answer = answer_question(question, pdf_text)
st.write("### Answer:")
st.write(answer)
except Exception as e:
st.error(f"Error occurred: {str(e)}")