Huzaifa367 commited on
Commit
2395412
·
verified ·
1 Parent(s): 1b5a676

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -18
app.py CHANGED
@@ -9,6 +9,8 @@ from langchain.prompts import PromptTemplate
9
  import tempfile
10
  from gtts import gTTS
11
  import os
 
 
12
 
13
  def text_to_speech(text):
14
  tts = gTTS(text=text, lang='en')
@@ -18,19 +20,44 @@ def text_to_speech(text):
18
  st.audio(temp_filename, format='audio/mp3')
19
  os.remove(temp_filename)
20
 
21
- def get_pdf_text(pdf_docs):
22
- text=""
23
- for pdf in pdf_docs:
24
- pdf_reader= PdfReader(pdf)
25
- for page in pdf_reader.pages:
26
- text+= page.extract_text()
27
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def get_text_chunks(text):
30
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
31
  chunks = text_splitter.split_text(text)
32
  return chunks
33
-
34
  def get_vector_store(text_chunks, api_key):
35
  embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=api_key, model_name="sentence-transformers/all-MiniLM-l6-v2")
36
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
@@ -62,11 +89,10 @@ def user_input(user_question, api_key):
62
  chain = get_conversational_chain()
63
 
64
  response = chain(
65
- {"input_documents":docs, "question": user_question}
66
- , return_only_outputs=True)
 
67
 
68
- print(response) # Debugging line
69
-
70
  st.write("Replies:")
71
  if isinstance(response["output_text"], str):
72
  response_list = [response["output_text"]]
@@ -87,23 +113,25 @@ def main():
87
 
88
  with st.sidebar:
89
  st.title("Menu:")
90
- pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
91
  if st.button("Submit & Process"):
92
  with st.spinner("Processing..."):
93
- raw_text = get_pdf_text(pdf_docs)
 
 
 
94
  text_chunks = get_text_chunks(raw_text)
95
  get_vector_store(text_chunks, api_key)
96
  st.success("Done")
97
 
98
  # Check if any document is uploaded
99
- if pdf_docs:
100
  user_question = st.text_input("Ask a question from the Docs")
101
 
102
  if user_question:
103
  user_input(user_question, api_key)
104
  else:
105
- st.write("Please upload a document first to ask questions.")
106
 
107
-
108
  if __name__ == "__main__":
109
- main()
 
9
  import tempfile
10
  from gtts import gTTS
11
  import os
12
+ import docx
13
+ from pptx import Presentation
14
 
15
  def text_to_speech(text):
16
  tts = gTTS(text=text, lang='en')
 
20
  st.audio(temp_filename, format='audio/mp3')
21
  os.remove(temp_filename)
22
 
23
+ def read_text_from_pdf(pdf_file):
24
+ pdf_reader = PdfReader(pdf_file)
25
+ text = ""
26
+ for page in pdf_reader.pages:
27
+ text += page.extract_text()
28
+ return text
29
+
30
+ def read_text_from_docx(docx_file):
31
+ doc = docx.Document(docx_file)
32
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
33
+ return text
34
+
35
+ def read_text_from_pptx(pptx_file):
36
+ presentation = Presentation(pptx_file)
37
+ text = ""
38
+ for slide in presentation.slides:
39
+ for shape in slide.shapes:
40
+ if hasattr(shape, "text"):
41
+ text += shape.text + "\n"
42
+ return text
43
+
44
+ def get_text_from_file(file):
45
+ content = ""
46
+ if file.type == "application/pdf":
47
+ content = read_text_from_pdf(file)
48
+ elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
49
+ content = read_text_from_docx(file)
50
+ elif file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
51
+ content = read_text_from_pptx(file)
52
+ elif file.type == "text/plain":
53
+ content = file.getvalue().decode("utf-8")
54
+ return content
55
 
56
  def get_text_chunks(text):
57
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
58
  chunks = text_splitter.split_text(text)
59
  return chunks
60
+
61
  def get_vector_store(text_chunks, api_key):
62
  embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=api_key, model_name="sentence-transformers/all-MiniLM-l6-v2")
63
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
 
89
  chain = get_conversational_chain()
90
 
91
  response = chain(
92
+ {"input_documents": docs, "question": user_question},
93
+ return_only_outputs=True
94
+ )
95
 
 
 
96
  st.write("Replies:")
97
  if isinstance(response["output_text"], str):
98
  response_list = [response["output_text"]]
 
113
 
114
  with st.sidebar:
115
  st.title("Menu:")
116
+ uploaded_files = st.file_uploader("Upload your files (PDF, DOCX, PPTX, TXT)", accept_multiple_files=True)
117
  if st.button("Submit & Process"):
118
  with st.spinner("Processing..."):
119
+ raw_text = ""
120
+ for file in uploaded_files:
121
+ file_text = get_text_from_file(file)
122
+ raw_text += file_text
123
  text_chunks = get_text_chunks(raw_text)
124
  get_vector_store(text_chunks, api_key)
125
  st.success("Done")
126
 
127
  # Check if any document is uploaded
128
+ if uploaded_files:
129
  user_question = st.text_input("Ask a question from the Docs")
130
 
131
  if user_question:
132
  user_input(user_question, api_key)
133
  else:
134
+ st.write("Please upload a document (PDF, DOCX, PPTX, TXT) first to ask questions.")
135
 
 
136
  if __name__ == "__main__":
137
+ main()