tarrasyed19472007 commited on
Commit
fd6cb76
·
verified ·
1 Parent(s): 4c33841

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -46
app.py CHANGED
@@ -1,57 +1,46 @@
 
1
  import streamlit as st
2
- from PyPDF2 import PdfReader
3
  from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
4
- import torch
5
 
6
- # Load RAG model and tokenizer
7
- tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence")
8
- retriever = RagRetriever.from_pretrained("facebook/rag-sequence", use_dummy_dataset=True)
9
- model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence")
10
-
11
- # Function to process PDF
12
- def read_pdf(file):
13
- text = ""
14
- pdf_reader = PdfReader(file)
15
- for page in pdf_reader.pages:
16
- text += page.extract_text()
17
  return text
18
 
19
- # Function to answer the question
20
- def answer_question(question, context):
21
- input_dict = tokenizer.prepare_seq2seq_batch(
22
- src_texts=[question],
23
- return_tensors="pt",
24
- padding=True,
25
- truncation=True
26
- )
27
- # Retrieve relevant documents
28
- input_ids = input_dict["input_ids"]
29
- context_ids = retriever(input_ids, return_tensors="pt")['input_ids']
30
-
31
- # Generate answer
32
- outputs = model.generate(input_ids=input_ids, context_input_ids=context_ids)
33
  answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)
34
  return answer[0]
35
 
36
- # Streamlit frontend
37
- st.title("PDF Question-Answering Chatbot")
38
-
39
- uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
40
 
 
41
  if uploaded_file is not None:
42
- # Read PDF
43
- pdf_text = read_pdf(uploaded_file)
44
- st.success("PDF file processed successfully.")
45
-
46
- # Text area for user input
47
- question = st.text_input("Ask a question about the PDF content:")
48
 
49
- if question:
50
- # Get the answer
51
- answer = answer_question(question, pdf_text)
52
- st.subheader("Answer:")
53
- st.write(answer)
54
-
55
- # Run the application
56
- if __name__ == "__main__":
57
- st.run()
 
1
+ !pip install streamlit transformers PyPDF2 faiss-cpu
2
  import streamlit as st
3
+ import PyPDF2 # Now PyPDF2 should be found
4
  from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
 
5
 
6
+ # Load PDF and extract text
7
+ def load_pdf(file):
8
+ with open(file, "rb") as f:
9
+ reader = PyPDF2.PdfReader(f)
10
+ text = ""
11
+ for page in reader.pages:
12
+ text += page.extract_text() + "\n"
 
 
 
 
13
  return text
14
 
15
+ # Initialize RAG model
16
+ def initialize_rag_model():
17
+ # Load the tokenizer and model
18
+ tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
19
+ retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="legacy", use_dummy_dataset=True)
20
+ model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")
21
+ return tokenizer, retriever, model
22
+
23
+ # Process user query
24
+ def generate_answer(query, context, tokenizer, retriever, model):
25
+ inputs = tokenizer(query, return_tensors="pt")
26
+ inputs["context_input_ids"] = retriever(context, return_tensors="pt")["input_ids"]
27
+ outputs = model.generate(**inputs)
 
28
  answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)
29
  return answer[0]
30
 
31
+ # Streamlit UI
32
+ st.title("PDF Question-Answer Chatbot")
 
 
33
 
34
+ uploaded_file = st.file_uploader("/content/Rag Comprehensive notes with example.pdf", type=["pdf"])
35
  if uploaded_file is not None:
36
+ text = load_pdf(uploaded_file)
37
+ st.write("PDF loaded successfully. You can now ask questions.")
 
 
 
 
38
 
39
+ # Initialize the RAG model
40
+ tokenizer, retriever, model = initialize_rag_model()
41
+
42
+ while True:
43
+ user_query = st.text_input("Ask a question about the PDF:")
44
+ if user_query:
45
+ answer = generate_answer(user_query, text, tokenizer, retriever, model)
46
+ st.write(f"Answer: {answer}")