import streamlit as st import fitz # PyMuPDF from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration import torch # Load the RAG model components tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq") retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq") model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq") # Function to extract text from PDF def extract_text_from_pdf(pdf_file): doc = fitz.open(pdf_file) text = "" for page in doc: text += page.get_text() return text # Function to handle question answering def answer_question(question, pdf_text): # Prepare the context for the model inputs = tokenizer(question, return_tensors="pt") # Tokenize PDF text pdf_inputs = tokenizer(pdf_text, return_tensors="pt") # Generate the answer with torch.no_grad(): outputs = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], context_input_ids=pdf_inputs['input_ids'], context_attention_mask=pdf_inputs['attention_mask']) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) return answer # Streamlit app st.title("PDF Question-Answer Chatbot") st.write("Upload a PDF file and ask questions based on its content.") # File uploader pdf_file = st.file_uploader("Upload PDF", type=["pdf"]) if pdf_file is not None: # Extract text from the PDF pdf_text = extract_text_from_pdf(pdf_file) st.success("PDF loaded successfully!") # Question input question = st.text_input("Ask a question:") if question: with st.spinner("Finding answer..."): try: answer = answer_question(question, pdf_text) st.write("### Answer:") st.write(answer) except Exception as e: st.error(f"Error occurred: {str(e)}")