tahirsher commited on
Commit
a30fe61
·
verified ·
1 Parent(s): 64b9df3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import docx
3
+ import PyPDF2
4
+ from transformers import pipeline
5
+ import tempfile
6
+
7
+ # Load Hugging Face model
8
+ @st.cache_resource
9
+ def load_pipeline():
10
+ return pipeline("question-answering", model="deepset/roberta-base-squad2")
11
+
12
+ qa_pipeline = load_pipeline()
13
+
14
+ def read_pdf(file):
15
+ text = ""
16
+ pdf_reader = PyPDF2.PdfReader(file)
17
+ for page in pdf_reader.pages:
18
+ text += page.extract_text() + "\n"
19
+ return text
20
+
21
+ def read_word(file):
22
+ doc = docx.Document(file)
23
+ text = ""
24
+ for para in doc.paragraphs:
25
+ text += para.text + "\n"
26
+ return text
27
+
28
+ def extract_text(uploaded_file):
29
+ file_type = uploaded_file.name.split('.')[-1].lower()
30
+ if file_type == 'pdf':
31
+ text = read_pdf(uploaded_file)
32
+ elif file_type == 'docx':
33
+ text = read_word(uploaded_file)
34
+ else:
35
+ st.error("Unsupported file type. Please upload a PDF or Word file.")
36
+ text = None
37
+ return text
38
+
39
+ # Streamlit interface
40
+ def main():
41
+ st.title("📄 File Reader & Hugging Face Q&A Application")
42
+ st.write("Upload a PDF or Word file and ask questions based on its content.")
43
+
44
+ # File upload
45
+ uploaded_file = st.file_uploader("Choose a PDF or Word file", type=["pdf", "docx"])
46
+
47
+ if uploaded_file is not None:
48
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
49
+ temp_file.write(uploaded_file.read())
50
+ temp_file_path = temp_file.name
51
+
52
+ # Extract and display text
53
+ file_text = extract_text(temp_file_path)
54
+ if file_text:
55
+ st.text_area("File Content", file_text[:1000] + "... (truncated for display)")
56
+
57
+ # Question-answering
58
+ question = st.text_input("Ask a question based on the file content:")
59
+
60
+ if st.button("Get Answer"):
61
+ if question.strip():
62
+ try:
63
+ result = qa_pipeline(question=question, context=file_text)
64
+ st.success(f"Answer: {result['answer']}")
65
+ except Exception as e:
66
+ st.error(f"Error generating answer: {str(e)}")
67
+ else:
68
+ st.warning("Please enter a question.")
69
+
70
+ if __name__ == "__main__":
71
+ main()