Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ def initialize_session_state():
|
|
14 |
os.makedirs(st.session_state.CHROMADB_DIR, exist_ok=True)
|
15 |
|
16 |
if 'processed_files' not in st.session_state:
|
17 |
-
st.session_state.processed_files =
|
18 |
|
19 |
if 'processor' not in st.session_state:
|
20 |
try:
|
@@ -71,19 +71,18 @@ class StreamlitDocProcessor:
|
|
71 |
persist_dir=st.session_state.CHROMADB_DIR
|
72 |
)
|
73 |
|
74 |
-
def get_processed_files(self) ->
|
75 |
"""Get list of processed files from ChromaDB"""
|
76 |
try:
|
77 |
if st.session_state.processor:
|
78 |
-
|
79 |
-
|
80 |
-
return set()
|
81 |
except Exception as e:
|
82 |
st.error(f"Error getting processed files: {str(e)}")
|
83 |
-
return
|
84 |
|
85 |
def run(self):
|
86 |
-
st.title("
|
87 |
|
88 |
# Create sidebar for navigation
|
89 |
page = st.sidebar.selectbox(
|
@@ -112,7 +111,9 @@ class StreamlitDocProcessor:
|
|
112 |
progress_bar = st.progress(0)
|
113 |
status_text = st.empty()
|
114 |
|
115 |
-
if
|
|
|
|
|
116 |
try:
|
117 |
# Create a temporary file
|
118 |
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
|
@@ -127,7 +128,9 @@ class StreamlitDocProcessor:
|
|
127 |
progress_bar.progress(75)
|
128 |
|
129 |
if result['success']:
|
130 |
-
st.session_state.processed_files
|
|
|
|
|
131 |
progress_bar.progress(100)
|
132 |
status_text.success(f"Successfully processed {uploaded_file.name}")
|
133 |
else:
|
@@ -147,50 +150,54 @@ class StreamlitDocProcessor:
|
|
147 |
progress_bar.progress(100)
|
148 |
|
149 |
# Display processed files
|
150 |
-
if st.session_state.processed_files:
|
151 |
st.subheader("Processed Files")
|
152 |
-
|
153 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
def qa_page(self):
|
156 |
-
st.header("Query
|
157 |
|
158 |
try:
|
159 |
# Refresh available files
|
160 |
st.session_state.processed_files = self.get_processed_files()
|
161 |
|
162 |
-
if not st.session_state.processed_files:
|
163 |
st.warning("No processed files available. Please upload and process some files first.")
|
164 |
return
|
165 |
|
166 |
-
#
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
171 |
|
|
|
|
|
|
|
|
|
|
|
172 |
selected_files = st.multiselect(
|
173 |
"Select files to search through",
|
174 |
-
all_files,
|
175 |
-
default=all_files
|
176 |
-
help="π± = XML files, π = PDF files"
|
177 |
)
|
178 |
|
179 |
-
#
|
180 |
-
selected_files = [f[
|
181 |
|
182 |
if not selected_files:
|
183 |
st.warning("Please select at least one file to search through.")
|
184 |
return
|
185 |
|
186 |
-
# Question input
|
187 |
-
xml_selected = any(f.endswith('.xml') for f in selected_files)
|
188 |
-
if xml_selected:
|
189 |
-
st.info("Suggested questions for XML content:\n" +
|
190 |
-
"β’ What are the main components and their relationships?\n" +
|
191 |
-
"β’ What data types and properties are defined?\n" +
|
192 |
-
"β’ How are the elements structured and organized?")
|
193 |
-
|
194 |
question = st.text_input("Enter your question:")
|
195 |
|
196 |
if st.button("Ask Question") and question:
|
@@ -200,28 +207,12 @@ class StreamlitDocProcessor:
|
|
200 |
question,
|
201 |
selected_files
|
202 |
)
|
203 |
-
|
204 |
-
# Display the answer in a structured way
|
205 |
st.write("Answer:", answer)
|
206 |
-
|
207 |
-
# If XML files were queried, show additional metadata
|
208 |
-
if xml_selected:
|
209 |
-
with st.expander("Show XML Structure Details"):
|
210 |
-
st.write("Related XML Elements:")
|
211 |
-
# Get the structure information from the processor
|
212 |
-
xml_details = st.session_state.processor.get_xml_structure_info(
|
213 |
-
selected_files,
|
214 |
-
question
|
215 |
-
)
|
216 |
-
for detail in xml_details:
|
217 |
-
st.code(detail, language="xml")
|
218 |
-
|
219 |
except Exception as e:
|
220 |
st.error(f"Error getting answer: {str(e)}")
|
221 |
|
222 |
except Exception as e:
|
223 |
st.error(f"Error in Q&A interface: {str(e)}")
|
224 |
-
|
225 |
|
226 |
def main():
|
227 |
# Initialize session state
|
|
|
14 |
os.makedirs(st.session_state.CHROMADB_DIR, exist_ok=True)
|
15 |
|
16 |
if 'processed_files' not in st.session_state:
|
17 |
+
st.session_state.processed_files = dict(pdf=[], xml=[])
|
18 |
|
19 |
if 'processor' not in st.session_state:
|
20 |
try:
|
|
|
71 |
persist_dir=st.session_state.CHROMADB_DIR
|
72 |
)
|
73 |
|
74 |
+
def get_processed_files(self) -> dict:
|
75 |
"""Get list of processed files from ChromaDB"""
|
76 |
try:
|
77 |
if st.session_state.processor:
|
78 |
+
return st.session_state.processor.get_available_files()
|
79 |
+
return dict(pdf=[], xml=[])
|
|
|
80 |
except Exception as e:
|
81 |
st.error(f"Error getting processed files: {str(e)}")
|
82 |
+
return dict(pdf=[], xml=[])
|
83 |
|
84 |
def run(self):
|
85 |
+
st.title("Document Assistant")
|
86 |
|
87 |
# Create sidebar for navigation
|
88 |
page = st.sidebar.selectbox(
|
|
|
111 |
progress_bar = st.progress(0)
|
112 |
status_text = st.empty()
|
113 |
|
114 |
+
# Check if file is already processed
|
115 |
+
file_ext = os.path.splitext(uploaded_file.name)[1][1:] # Get extension without dot
|
116 |
+
if uploaded_file.name not in st.session_state.processed_files.get(file_ext, []):
|
117 |
try:
|
118 |
# Create a temporary file
|
119 |
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
|
|
|
128 |
progress_bar.progress(75)
|
129 |
|
130 |
if result['success']:
|
131 |
+
if file_ext not in st.session_state.processed_files:
|
132 |
+
st.session_state.processed_files[file_ext] = []
|
133 |
+
st.session_state.processed_files[file_ext].append(uploaded_file.name)
|
134 |
progress_bar.progress(100)
|
135 |
status_text.success(f"Successfully processed {uploaded_file.name}")
|
136 |
else:
|
|
|
150 |
progress_bar.progress(100)
|
151 |
|
152 |
# Display processed files
|
153 |
+
if any(st.session_state.processed_files.values()):
|
154 |
st.subheader("Processed Files")
|
155 |
+
if st.session_state.processed_files.get('xml'):
|
156 |
+
st.write("XML Files:")
|
157 |
+
for file in sorted(st.session_state.processed_files['xml']):
|
158 |
+
st.text(f"π± {file}")
|
159 |
+
if st.session_state.processed_files.get('pdf'):
|
160 |
+
st.write("PDF Files:")
|
161 |
+
for file in sorted(st.session_state.processed_files['pdf']):
|
162 |
+
st.text(f"π {file}")
|
163 |
|
164 |
def qa_page(self):
|
165 |
+
st.header("Query Documents")
|
166 |
|
167 |
try:
|
168 |
# Refresh available files
|
169 |
st.session_state.processed_files = self.get_processed_files()
|
170 |
|
171 |
+
if not any(st.session_state.processed_files.values()):
|
172 |
st.warning("No processed files available. Please upload and process some files first.")
|
173 |
return
|
174 |
|
175 |
+
# Create combined list of files with icons
|
176 |
+
all_files = []
|
177 |
+
for file in st.session_state.processed_files.get('xml', []):
|
178 |
+
all_files.append(f"π± {file}")
|
179 |
+
for file in st.session_state.processed_files.get('pdf', []):
|
180 |
+
all_files.append(f"π {file}")
|
181 |
|
182 |
+
if not all_files:
|
183 |
+
st.warning("No processed files available. Please upload and process some files first.")
|
184 |
+
return
|
185 |
+
|
186 |
+
# File selection
|
187 |
selected_files = st.multiselect(
|
188 |
"Select files to search through",
|
189 |
+
sorted(all_files),
|
190 |
+
default=all_files
|
|
|
191 |
)
|
192 |
|
193 |
+
# Remove icons from selected files
|
194 |
+
selected_files = [f.split(' ', 1)[1] for f in selected_files]
|
195 |
|
196 |
if not selected_files:
|
197 |
st.warning("Please select at least one file to search through.")
|
198 |
return
|
199 |
|
200 |
+
# Question input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
question = st.text_input("Enter your question:")
|
202 |
|
203 |
if st.button("Ask Question") and question:
|
|
|
207 |
question,
|
208 |
selected_files
|
209 |
)
|
|
|
|
|
210 |
st.write("Answer:", answer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
except Exception as e:
|
212 |
st.error(f"Error getting answer: {str(e)}")
|
213 |
|
214 |
except Exception as e:
|
215 |
st.error(f"Error in Q&A interface: {str(e)}")
|
|
|
216 |
|
217 |
def main():
|
218 |
# Initialize session state
|