TahaRasouli commited on
Commit
b8885ab
Β·
verified Β·
1 Parent(s): 5aa51eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -48
app.py CHANGED
@@ -14,7 +14,7 @@ def initialize_session_state():
14
  os.makedirs(st.session_state.CHROMADB_DIR, exist_ok=True)
15
 
16
  if 'processed_files' not in st.session_state:
17
- st.session_state.processed_files = set()
18
 
19
  if 'processor' not in st.session_state:
20
  try:
@@ -71,19 +71,18 @@ class StreamlitDocProcessor:
71
  persist_dir=st.session_state.CHROMADB_DIR
72
  )
73
 
74
- def get_processed_files(self) -> set:
75
  """Get list of processed files from ChromaDB"""
76
  try:
77
  if st.session_state.processor:
78
- available_files = st.session_state.processor.get_available_files()
79
- return set(available_files['pdf'] + available_files['xml'])
80
- return set()
81
  except Exception as e:
82
  st.error(f"Error getting processed files: {str(e)}")
83
- return set()
84
 
85
  def run(self):
86
- st.title("AAS Assistant")
87
 
88
  # Create sidebar for navigation
89
  page = st.sidebar.selectbox(
@@ -112,7 +111,9 @@ class StreamlitDocProcessor:
112
  progress_bar = st.progress(0)
113
  status_text = st.empty()
114
 
115
- if uploaded_file.name not in st.session_state.processed_files:
 
 
116
  try:
117
  # Create a temporary file
118
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
@@ -127,7 +128,9 @@ class StreamlitDocProcessor:
127
  progress_bar.progress(75)
128
 
129
  if result['success']:
130
- st.session_state.processed_files.add(uploaded_file.name)
 
 
131
  progress_bar.progress(100)
132
  status_text.success(f"Successfully processed {uploaded_file.name}")
133
  else:
@@ -147,50 +150,54 @@ class StreamlitDocProcessor:
147
  progress_bar.progress(100)
148
 
149
  # Display processed files
150
- if st.session_state.processed_files:
151
  st.subheader("Processed Files")
152
- for file in sorted(st.session_state.processed_files):
153
- st.text(f"βœ“ {file}")
 
 
 
 
 
 
154
 
155
  def qa_page(self):
156
- st.header("Query our database")
157
 
158
  try:
159
  # Refresh available files
160
  st.session_state.processed_files = self.get_processed_files()
161
 
162
- if not st.session_state.processed_files:
163
  st.warning("No processed files available. Please upload and process some files first.")
164
  return
165
 
166
- # Enhanced file selection with type indicators
167
- available_files = self.get_processed_files()
168
- xml_files = [f"πŸ“± {f}" for f in available_files['xml']]
169
- pdf_files = [f"πŸ“„ {f}" for f in available_files['pdf']]
170
- all_files = sorted(xml_files + pdf_files)
 
171
 
 
 
 
 
 
172
  selected_files = st.multiselect(
173
  "Select files to search through",
174
- all_files,
175
- default=all_files,
176
- help="πŸ“± = XML files, πŸ“„ = PDF files"
177
  )
178
 
179
- # Clean up the file names (remove emojis) for processing
180
- selected_files = [f[2:] for f in selected_files] # Remove emoji prefix
181
 
182
  if not selected_files:
183
  st.warning("Please select at least one file to search through.")
184
  return
185
 
186
- # Question input with suggested prompts for XML
187
- xml_selected = any(f.endswith('.xml') for f in selected_files)
188
- if xml_selected:
189
- st.info("Suggested questions for XML content:\n" +
190
- "β€’ What are the main components and their relationships?\n" +
191
- "β€’ What data types and properties are defined?\n" +
192
- "β€’ How are the elements structured and organized?")
193
-
194
  question = st.text_input("Enter your question:")
195
 
196
  if st.button("Ask Question") and question:
@@ -200,28 +207,12 @@ class StreamlitDocProcessor:
200
  question,
201
  selected_files
202
  )
203
-
204
- # Display the answer in a structured way
205
  st.write("Answer:", answer)
206
-
207
- # If XML files were queried, show additional metadata
208
- if xml_selected:
209
- with st.expander("Show XML Structure Details"):
210
- st.write("Related XML Elements:")
211
- # Get the structure information from the processor
212
- xml_details = st.session_state.processor.get_xml_structure_info(
213
- selected_files,
214
- question
215
- )
216
- for detail in xml_details:
217
- st.code(detail, language="xml")
218
-
219
  except Exception as e:
220
  st.error(f"Error getting answer: {str(e)}")
221
 
222
  except Exception as e:
223
  st.error(f"Error in Q&A interface: {str(e)}")
224
-
225
 
226
  def main():
227
  # Initialize session state
 
14
  os.makedirs(st.session_state.CHROMADB_DIR, exist_ok=True)
15
 
16
  if 'processed_files' not in st.session_state:
17
+ st.session_state.processed_files = dict(pdf=[], xml=[])
18
 
19
  if 'processor' not in st.session_state:
20
  try:
 
71
  persist_dir=st.session_state.CHROMADB_DIR
72
  )
73
 
74
+ def get_processed_files(self) -> dict:
75
  """Get list of processed files from ChromaDB"""
76
  try:
77
  if st.session_state.processor:
78
+ return st.session_state.processor.get_available_files()
79
+ return dict(pdf=[], xml=[])
 
80
  except Exception as e:
81
  st.error(f"Error getting processed files: {str(e)}")
82
+ return dict(pdf=[], xml=[])
83
 
84
  def run(self):
85
+ st.title("Document Assistant")
86
 
87
  # Create sidebar for navigation
88
  page = st.sidebar.selectbox(
 
111
  progress_bar = st.progress(0)
112
  status_text = st.empty()
113
 
114
+ # Check if file is already processed
115
+ file_ext = os.path.splitext(uploaded_file.name)[1][1:] # Get extension without dot
116
+ if uploaded_file.name not in st.session_state.processed_files.get(file_ext, []):
117
  try:
118
  # Create a temporary file
119
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
 
128
  progress_bar.progress(75)
129
 
130
  if result['success']:
131
+ if file_ext not in st.session_state.processed_files:
132
+ st.session_state.processed_files[file_ext] = []
133
+ st.session_state.processed_files[file_ext].append(uploaded_file.name)
134
  progress_bar.progress(100)
135
  status_text.success(f"Successfully processed {uploaded_file.name}")
136
  else:
 
150
  progress_bar.progress(100)
151
 
152
  # Display processed files
153
+ if any(st.session_state.processed_files.values()):
154
  st.subheader("Processed Files")
155
+ if st.session_state.processed_files.get('xml'):
156
+ st.write("XML Files:")
157
+ for file in sorted(st.session_state.processed_files['xml']):
158
+ st.text(f"πŸ“± {file}")
159
+ if st.session_state.processed_files.get('pdf'):
160
+ st.write("PDF Files:")
161
+ for file in sorted(st.session_state.processed_files['pdf']):
162
+ st.text(f"πŸ“„ {file}")
163
 
164
  def qa_page(self):
165
+ st.header("Query Documents")
166
 
167
  try:
168
  # Refresh available files
169
  st.session_state.processed_files = self.get_processed_files()
170
 
171
+ if not any(st.session_state.processed_files.values()):
172
  st.warning("No processed files available. Please upload and process some files first.")
173
  return
174
 
175
+ # Create combined list of files with icons
176
+ all_files = []
177
+ for file in st.session_state.processed_files.get('xml', []):
178
+ all_files.append(f"πŸ“± {file}")
179
+ for file in st.session_state.processed_files.get('pdf', []):
180
+ all_files.append(f"πŸ“„ {file}")
181
 
182
+ if not all_files:
183
+ st.warning("No processed files available. Please upload and process some files first.")
184
+ return
185
+
186
+ # File selection
187
  selected_files = st.multiselect(
188
  "Select files to search through",
189
+ sorted(all_files),
190
+ default=all_files
 
191
  )
192
 
193
+ # Remove icons from selected files
194
+ selected_files = [f.split(' ', 1)[1] for f in selected_files]
195
 
196
  if not selected_files:
197
  st.warning("Please select at least one file to search through.")
198
  return
199
 
200
+ # Question input
 
 
 
 
 
 
 
201
  question = st.text_input("Enter your question:")
202
 
203
  if st.button("Ask Question") and question:
 
207
  question,
208
  selected_files
209
  )
 
 
210
  st.write("Answer:", answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  except Exception as e:
212
  st.error(f"Error getting answer: {str(e)}")
213
 
214
  except Exception as e:
215
  st.error(f"Error in Q&A interface: {str(e)}")
 
216
 
217
  def main():
218
  # Initialize session state