TahaRasouli commited on
Commit
d1ea517
Β·
verified Β·
1 Parent(s): c94951d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -55
app.py CHANGED
@@ -108,91 +108,66 @@ class StreamlitDocProcessor:
108
  def upload_and_process_page(self):
109
  st.header("Upload and Process Documents")
110
 
111
- # Add instructions
112
- with st.expander("Instructions", expanded=True):
113
- st.write("""
114
- 1. Click 'Browse files' to select documents
115
- 2. You can select multiple files at once
116
- 3. Supported formats: PDF and XML
117
- 4. Wait for processing to complete
118
- 5. Processed files will be listed below
119
- """)
120
-
121
- # File uploader
122
  uploaded_files = st.file_uploader(
123
  "Upload PDF or XML files",
124
  type=['pdf', 'xml'],
125
- accept_multiple_files=True,
126
- help="Select one or more PDF or XML files to upload"
127
  )
128
-
129
  if uploaded_files:
130
  for uploaded_file in uploaded_files:
131
- # Create progress bar and status container
132
- col1, col2 = st.columns([3, 1])
133
- with col1:
134
- progress_bar = st.progress(0)
135
- with col2:
136
- status_text = st.empty()
137
 
138
- # Check if file is already processed
139
- file_ext = os.path.splitext(uploaded_file.name)[1][1:] # Get extension without dot
140
  if uploaded_file.name not in st.session_state.processed_files.get(file_ext, []):
141
  try:
142
- # Create a temporary file
143
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
144
  tmp_file.write(uploaded_file.getbuffer())
145
  temp_path = tmp_file.name
146
-
147
- # Process the file
148
- status_text.info('Processing...')
149
- progress_bar.progress(25)
150
 
151
- result = st.session_state.processor.process_file(temp_path)
152
- progress_bar.progress(75)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  if result['success']:
155
  if file_ext not in st.session_state.processed_files:
156
  st.session_state.processed_files[file_ext] = []
157
  st.session_state.processed_files[file_ext].append(uploaded_file.name)
158
  progress_bar.progress(100)
159
- status_text.success("βœ“ Success")
160
  else:
161
  progress_bar.progress(100)
162
- status_text.error("βœ— Failed")
163
- st.error(f"Failed to process {uploaded_file.name}: {result['error']}")
164
-
165
  except Exception as e:
166
- status_text.error("βœ— Error")
167
- st.error(f"Error processing {uploaded_file.name}: {str(e)}")
168
  finally:
169
- # Clean up temporary file
170
  try:
171
  os.unlink(temp_path)
172
  except:
173
  pass
174
  else:
175
- status_text.info("Already processed")
176
  progress_bar.progress(100)
177
 
178
- # Display processed files
179
- if any(st.session_state.processed_files.values()):
180
- st.subheader("Processed Files")
181
-
182
- col1, col2 = st.columns(2)
183
-
184
- with col1:
185
- if st.session_state.processed_files.get('xml'):
186
- st.write("πŸ“± XML Files:")
187
- for file in sorted(st.session_state.processed_files['xml']):
188
- st.text(f" β€’ {file}")
189
-
190
- with col2:
191
- if st.session_state.processed_files.get('pdf'):
192
- st.write("πŸ“„ PDF Files:")
193
- for file in sorted(st.session_state.processed_files['pdf']):
194
- st.text(f" β€’ {file}")
195
-
196
  def qa_page(self):
197
  st.header("Query Documents")
198
 
 
108
  def upload_and_process_page(self):
109
  st.header("Upload and Process Documents")
110
 
 
 
 
 
 
 
 
 
 
 
 
111
  uploaded_files = st.file_uploader(
112
  "Upload PDF or XML files",
113
  type=['pdf', 'xml'],
114
+ accept_multiple_files=True
 
115
  )
116
+
117
  if uploaded_files:
118
  for uploaded_file in uploaded_files:
119
+ # Create progress containers
120
+ progress_bar = st.progress(0)
121
+ status_container = st.empty()
 
 
 
122
 
123
+ file_ext = os.path.splitext(uploaded_file.name)[1][1:]
 
124
  if uploaded_file.name not in st.session_state.processed_files.get(file_ext, []):
125
  try:
 
126
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
127
  tmp_file.write(uploaded_file.getbuffer())
128
  temp_path = tmp_file.name
129
+
130
+ status_container.text(f'Processing {uploaded_file.name}...')
 
 
131
 
132
+ if file_ext == 'xml':
133
+ # Add processing status updates
134
+ status_container.text('Parsing XML...')
135
+ progress_bar.progress(10)
136
+
137
+ result = st.session_state.processor.process_file(temp_path)
138
+
139
+ if result['success']:
140
+ total_chunks = result['total_chunks']
141
+ for i, chunk_result in enumerate(result['results']):
142
+ # Update progress for each batch
143
+ progress = min(95, int(10 + (85 * (i / total_chunks))))
144
+ progress_bar.progress(progress)
145
+ status_container.text(f'Processing chunk {i+1}/{total_chunks}...')
146
+ else:
147
+ # Regular PDF processing
148
+ result = st.session_state.processor.process_file(temp_path)
149
 
150
  if result['success']:
151
  if file_ext not in st.session_state.processed_files:
152
  st.session_state.processed_files[file_ext] = []
153
  st.session_state.processed_files[file_ext].append(uploaded_file.name)
154
  progress_bar.progress(100)
155
+ status_container.success(f"Successfully processed {uploaded_file.name}")
156
  else:
157
  progress_bar.progress(100)
158
+ status_container.error(f"Failed to process {uploaded_file.name}: {result['error']}")
159
+
 
160
  except Exception as e:
161
+ status_container.error(f"Error processing {uploaded_file.name}: {str(e)}")
 
162
  finally:
 
163
  try:
164
  os.unlink(temp_path)
165
  except:
166
  pass
167
  else:
168
+ status_container.info(f"{uploaded_file.name} has already been processed")
169
  progress_bar.progress(100)
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def qa_page(self):
172
  st.header("Query Documents")
173