TahaRasouli commited on
Commit
412b7cf
Β·
verified Β·
1 Parent(s): a942df2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -91
app.py CHANGED
@@ -85,11 +85,21 @@ class StreamlitDocProcessor:
85
  st.title("Document Assistant")
86
 
87
  # Create sidebar for navigation
 
88
  page = st.sidebar.selectbox(
89
  "Choose a page",
90
  ["Upload & Process", "Query"]
91
  )
92
 
 
 
 
 
 
 
 
 
 
93
  if page == "Upload & Process":
94
  self.upload_and_process_page()
95
  else:
@@ -98,18 +108,32 @@ class StreamlitDocProcessor:
98
  def upload_and_process_page(self):
99
  st.header("Upload and Process Documents")
100
 
 
 
 
 
 
 
 
 
 
 
101
  # File uploader
102
  uploaded_files = st.file_uploader(
103
  "Upload PDF or XML files",
104
  type=['pdf', 'xml'],
105
- accept_multiple_files=True
 
106
  )
107
 
108
  if uploaded_files:
109
  for uploaded_file in uploaded_files:
110
- # Create progress bar
111
- progress_bar = st.progress(0)
112
- status_text = st.empty()
 
 
 
113
 
114
  # Check if file is already processed
115
  file_ext = os.path.splitext(uploaded_file.name)[1][1:] # Get extension without dot
@@ -121,7 +145,7 @@ class StreamlitDocProcessor:
121
  temp_path = tmp_file.name
122
 
123
  # Process the file
124
- status_text.text(f'Processing {uploaded_file.name}...')
125
  progress_bar.progress(25)
126
 
127
  result = st.session_state.processor.process_file(temp_path)
@@ -132,13 +156,15 @@ class StreamlitDocProcessor:
132
  st.session_state.processed_files[file_ext] = []
133
  st.session_state.processed_files[file_ext].append(uploaded_file.name)
134
  progress_bar.progress(100)
135
- status_text.success(f"Successfully processed {uploaded_file.name}")
136
  else:
137
  progress_bar.progress(100)
138
- status_text.error(f"Failed to process {uploaded_file.name}: {result['error']}")
 
139
 
140
  except Exception as e:
141
- status_text.error(f"Error processing {uploaded_file.name}: {str(e)}")
 
142
  finally:
143
  # Clean up temporary file
144
  try:
@@ -146,26 +172,30 @@ class StreamlitDocProcessor:
146
  except:
147
  pass
148
  else:
149
- status_text.info(f"{uploaded_file.name} has already been processed")
150
  progress_bar.progress(100)
151
 
152
  # Display processed files
153
  if any(st.session_state.processed_files.values()):
154
  st.subheader("Processed Files")
155
- if st.session_state.processed_files.get('xml'):
156
- st.write("XML Files:")
157
- for file in sorted(st.session_state.processed_files['xml']):
158
- st.text(f"πŸ“± {file}")
159
- if st.session_state.processed_files.get('pdf'):
160
- st.write("PDF Files:")
161
- for file in sorted(st.session_state.processed_files['pdf']):
162
- st.text(f"πŸ“„ {file}")
163
-
164
- # Modify the qa_page method in the StreamlitDocProcessor class
 
 
 
 
165
 
166
  def qa_page(self):
167
  st.header("Query Documents")
168
-
169
  try:
170
  # Refresh available files
171
  st.session_state.processed_files = self.get_processed_files()
@@ -173,105 +203,126 @@ class StreamlitDocProcessor:
173
  if not any(st.session_state.processed_files.values()):
174
  st.warning("No processed files available. Please upload and process some files first.")
175
  return
176
-
177
  # Create combined list of files with icons
178
  all_files = []
179
  for file in st.session_state.processed_files.get('xml', []):
180
  all_files.append(f"πŸ“± {file}")
181
  for file in st.session_state.processed_files.get('pdf', []):
182
  all_files.append(f"πŸ“„ {file}")
183
-
184
  if not all_files:
185
  st.warning("No processed files available. Please upload and process some files first.")
186
  return
187
-
 
 
 
 
 
 
 
 
 
188
  # File selection
189
  selected_files = st.multiselect(
190
  "Select files to search through",
191
  sorted(all_files),
192
- default=all_files
 
193
  )
194
-
195
  # Remove icons from selected files
196
  selected_files = [f.split(' ', 1)[1] for f in selected_files]
197
-
198
  if not selected_files:
199
  st.warning("Please select at least one file to search through.")
200
  return
201
 
202
- # Question input
203
- question = st.text_input("Enter your question:")
204
-
205
- if question:
206
- col1, col2, col3 = st.columns(3)
207
 
208
- with col1:
209
- if st.button("Quick Answer"):
210
- try:
211
- with st.spinner("Getting quick answer..."):
212
- answer = st.session_state.processor.ask_question_selective(
213
- question,
214
- selected_files
215
- )
216
- st.write("Answer:", answer)
217
- except Exception as e:
218
- st.error(f"Error getting answer: {str(e)}")
 
 
 
219
 
220
- with col2:
221
- if st.button("Detailed Answer"):
222
- try:
223
- with st.spinner("Getting detailed answer..."):
224
- result = st.session_state.processor.get_detailed_context(
225
- question,
226
- selected_files
227
- )
228
- if result['success']:
229
- st.write("### Relevant Information")
230
- for item in result['results']:
231
- with st.expander(f"Source: {item['metadata']['source_file']} ({item['metadata']['content_type'].upper()})"):
232
- st.write(f"Relevance Score: {item['relevance_score']:.2f}")
233
- if item['metadata']['content_type'] == 'xml':
234
- st.write(f"XML Path: {item['source_info']['path']}")
235
- st.write("Content:", item['content'])
236
- else:
237
- st.error(result['error'])
238
- except Exception as e:
239
- st.error(f"Error getting detailed answer: {str(e)}")
240
 
241
- with col3:
242
- if st.button("Complete Analysis"):
243
- try:
244
- with st.spinner("Performing complete analysis..."):
245
- result = st.session_state.processor.get_summary_and_details(
246
- question,
247
- selected_files
248
- )
249
- if result['success']:
250
- st.write("### Summary")
251
- st.write(result['summary'])
252
-
253
- st.write("### Detailed Information")
254
- for item in result['details']:
255
- with st.expander(f"Source: {item['metadata']['source_file']} ({item['metadata']['content_type'].upper()})"):
256
- st.write(f"Relevance Score: {item['relevance_score']:.2f}")
257
- if item['metadata']['content_type'] == 'xml':
258
- st.write(f"XML Path: {item['source_info']['path']}")
259
- if 'parent_info' in item:
260
- st.write("Parent Element:", item['parent_info']['content'])
261
- if 'children_info' in item:
262
- st.write("Related Elements:")
263
- for child in item['children_info']:
264
- st.write(f"- {child['content']}")
265
- st.write("Content:", item['content'])
266
- else:
267
- st.error(result['error'])
268
- except Exception as e:
269
- st.error(f"Error getting complete analysis: {str(e)}")
270
 
271
  except Exception as e:
272
  st.error(f"Error in Q&A interface: {str(e)}")
273
 
274
  def main():
 
 
 
 
 
 
 
 
275
  # Initialize session state
276
  initialize_session_state()
277
 
 
85
  st.title("Document Assistant")
86
 
87
  # Create sidebar for navigation
88
+ st.sidebar.title("Navigation")
89
  page = st.sidebar.selectbox(
90
  "Choose a page",
91
  ["Upload & Process", "Query"]
92
  )
93
 
94
+ # Add sidebar information
95
+ with st.sidebar.expander("About"):
96
+ st.write("""
97
+ This application allows you to:
98
+ - Upload PDF and XML documents
99
+ - Process them for semantic search
100
+ - Query the documents with different levels of detail
101
+ """)
102
+
103
  if page == "Upload & Process":
104
  self.upload_and_process_page()
105
  else:
 
108
  def upload_and_process_page(self):
109
  st.header("Upload and Process Documents")
110
 
111
+ # Add instructions
112
+ with st.expander("Instructions", expanded=True):
113
+ st.write("""
114
+ 1. Click 'Browse files' to select documents
115
+ 2. You can select multiple files at once
116
+ 3. Supported formats: PDF and XML
117
+ 4. Wait for processing to complete
118
+ 5. Processed files will be listed below
119
+ """)
120
+
121
  # File uploader
122
  uploaded_files = st.file_uploader(
123
  "Upload PDF or XML files",
124
  type=['pdf', 'xml'],
125
+ accept_multiple_files=True,
126
+ help="Select one or more PDF or XML files to upload"
127
  )
128
 
129
  if uploaded_files:
130
  for uploaded_file in uploaded_files:
131
+ # Create progress bar and status container
132
+ col1, col2 = st.columns([3, 1])
133
+ with col1:
134
+ progress_bar = st.progress(0)
135
+ with col2:
136
+ status_text = st.empty()
137
 
138
  # Check if file is already processed
139
  file_ext = os.path.splitext(uploaded_file.name)[1][1:] # Get extension without dot
 
145
  temp_path = tmp_file.name
146
 
147
  # Process the file
148
+ status_text.info('Processing...')
149
  progress_bar.progress(25)
150
 
151
  result = st.session_state.processor.process_file(temp_path)
 
156
  st.session_state.processed_files[file_ext] = []
157
  st.session_state.processed_files[file_ext].append(uploaded_file.name)
158
  progress_bar.progress(100)
159
+ status_text.success("βœ“ Success")
160
  else:
161
  progress_bar.progress(100)
162
+ status_text.error("βœ— Failed")
163
+ st.error(f"Failed to process {uploaded_file.name}: {result['error']}")
164
 
165
  except Exception as e:
166
+ status_text.error("βœ— Error")
167
+ st.error(f"Error processing {uploaded_file.name}: {str(e)}")
168
  finally:
169
  # Clean up temporary file
170
  try:
 
172
  except:
173
  pass
174
  else:
175
+ status_text.info("Already processed")
176
  progress_bar.progress(100)
177
 
178
  # Display processed files
179
  if any(st.session_state.processed_files.values()):
180
  st.subheader("Processed Files")
181
+
182
+ col1, col2 = st.columns(2)
183
+
184
+ with col1:
185
+ if st.session_state.processed_files.get('xml'):
186
+ st.write("πŸ“± XML Files:")
187
+ for file in sorted(st.session_state.processed_files['xml']):
188
+ st.text(f" β€’ {file}")
189
+
190
+ with col2:
191
+ if st.session_state.processed_files.get('pdf'):
192
+ st.write("πŸ“„ PDF Files:")
193
+ for file in sorted(st.session_state.processed_files['pdf']):
194
+ st.text(f" β€’ {file}")
195
 
196
  def qa_page(self):
197
  st.header("Query Documents")
198
+
199
  try:
200
  # Refresh available files
201
  st.session_state.processed_files = self.get_processed_files()
 
203
  if not any(st.session_state.processed_files.values()):
204
  st.warning("No processed files available. Please upload and process some files first.")
205
  return
206
+
207
  # Create combined list of files with icons
208
  all_files = []
209
  for file in st.session_state.processed_files.get('xml', []):
210
  all_files.append(f"πŸ“± {file}")
211
  for file in st.session_state.processed_files.get('pdf', []):
212
  all_files.append(f"πŸ“„ {file}")
213
+
214
  if not all_files:
215
  st.warning("No processed files available. Please upload and process some files first.")
216
  return
217
+
218
+ # Add query instructions
219
+ with st.expander("Query Instructions", expanded=True):
220
+ st.write("""
221
+ Choose your query type:
222
+ - **Quick Answer**: Basic response with essential information
223
+ - **Detailed Answer**: Shows sources and relevance with expandable details
224
+ - **Complete Analysis**: Provides summary and full breakdown with XML hierarchies
225
+ """)
226
+
227
  # File selection
228
  selected_files = st.multiselect(
229
  "Select files to search through",
230
  sorted(all_files),
231
+ default=all_files,
232
+ help="Choose which files to include in your search"
233
  )
234
+
235
  # Remove icons from selected files
236
  selected_files = [f.split(' ', 1)[1] for f in selected_files]
237
+
238
  if not selected_files:
239
  st.warning("Please select at least one file to search through.")
240
  return
241
 
242
+ # Question input
243
+ question = st.text_input(
244
+ "Enter your question:",
245
+ help="Type your question here and choose a query type below"
246
+ )
247
 
248
+ if question:
249
+ col1, col2, col3 = st.columns(3)
250
+
251
+ with col1:
252
+ if st.button("Quick Answer", help="Get a concise answer quickly"):
253
+ try:
254
+ with st.spinner("Getting quick answer..."):
255
+ answer = st.session_state.processor.ask_question_selective(
256
+ question,
257
+ selected_files
258
+ )
259
+ st.write("Answer:", answer)
260
+ except Exception as e:
261
+ st.error(f"Error getting answer: {str(e)}")
262
 
263
+ with col2:
264
+ if st.button("Detailed Answer", help="Get answer with sources and relevance scores"):
265
+ try:
266
+ with st.spinner("Getting detailed answer..."):
267
+ result = st.session_state.processor.get_detailed_context(
268
+ question,
269
+ selected_files
270
+ )
271
+ if result['success']:
272
+ st.write("### Relevant Information")
273
+ for item in result['results']:
274
+ with st.expander(f"Source: {item['metadata']['source_file']} ({item['metadata']['content_type'].upper()})"):
275
+ st.write(f"Relevance Score: {item['relevance_score']:.2f}")
276
+ if item['metadata']['content_type'] == 'xml':
277
+ st.write(f"XML Path: {item['source_info']['path']}")
278
+ st.write("Content:", item['content'])
279
+ else:
280
+ st.error(result['error'])
281
+ except Exception as e:
282
+ st.error(f"Error getting detailed answer: {str(e)}")
283
 
284
+ with col3:
285
+ if st.button("Complete Analysis", help="Get comprehensive analysis with XML hierarchy"):
286
+ try:
287
+ with st.spinner("Performing complete analysis..."):
288
+ result = st.session_state.processor.get_summary_and_details(
289
+ question,
290
+ selected_files
291
+ )
292
+ if result['success']:
293
+ st.write("### Summary")
294
+ st.write(result['summary'])
295
+
296
+ st.write("### Detailed Information")
297
+ for item in result['details']:
298
+ with st.expander(f"Source: {item['metadata']['source_file']} ({item['metadata']['content_type'].upper()})"):
299
+ st.write(f"Relevance Score: {item['relevance_score']:.2f}")
300
+ if item['metadata']['content_type'] == 'xml':
301
+ st.write(f"XML Path: {item['source_info']['path']}")
302
+ if 'parent_info' in item:
303
+ st.write("Parent Element:", item['parent_info']['content'])
304
+ if 'children_info' in item:
305
+ st.write("Related Elements:")
306
+ for child in item['children_info']:
307
+ st.write(f"- {child['content']}")
308
+ st.write("Content:", item['content'])
309
+ else:
310
+ st.error(result['error'])
311
+ except Exception as e:
312
+ st.error(f"Error getting complete analysis: {str(e)}")
313
 
314
  except Exception as e:
315
  st.error(f"Error in Q&A interface: {str(e)}")
316
 
317
  def main():
318
+ # Set page config
319
+ st.set_page_config(
320
+ page_title="Document Assistant",
321
+ page_icon="πŸ“š",
322
+ layout="wide",
323
+ initial_sidebar_state="expanded"
324
+ )
325
+
326
  # Initialize session state
327
  initialize_session_state()
328