jessica45 commited on
Commit
5d526d1
·
verified ·
1 Parent(s): 64103af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -287
app.py CHANGED
@@ -1,107 +1,3 @@
1
- # import streamlit as st
2
- # from pdf_utils import extract_text_from_file, split_text
3
- # from chroma_db_utils import create_chroma_db, load_chroma_collection
4
- # from query_handler import handle_query
5
- # import os
6
- # import re
7
- # import tempfile
8
-
9
- # def generate_collection_name(file_path=None):
10
- # """Generate a valid collection name from a file path."""
11
- # base_name = os.path.basename(file_path) if file_path else "collection"
12
- # # Remove file extension
13
- # base_name = re.sub(r'\..*$', '', base_name)
14
- # # Replace invalid characters and ensure it starts with a letter
15
- # base_name = re.sub(r'\W+', '_', base_name)
16
- # base_name = re.sub(r'^[^a-zA-Z]+', '', base_name)
17
- # return base_name
18
-
19
- # def process_uploaded_file(uploaded_file, chroma_db_path):
20
- # """Process the uploaded file and create/load ChromaDB collection."""
21
- # # Create a temporary file to store the uploaded content
22
- # with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
23
- # tmp_file.write(uploaded_file.getvalue())
24
- # file_path = tmp_file.name
25
-
26
- # try:
27
- # # Generate collection name from original filename
28
- # collection_name = generate_collection_name(uploaded_file.name)
29
-
30
- # # Extract and process text
31
- # file_text = extract_text_from_file(file_path)
32
- # if file_text is None:
33
- # return None, "Failed to extract text from the file."
34
-
35
- # chunked_text = split_text(file_text)
36
-
37
- # # Try to load existing collection or create new one
38
- # try:
39
- # db = load_chroma_collection(collection_name, chroma_db_path)
40
- # st.success("Loaded existing ChromaDB collection.")
41
- # except Exception:
42
- # db = create_chroma_db(chunked_text, collection_name, chroma_db_path)
43
- # st.success("Created new ChromaDB collection.")
44
-
45
- # return db, None
46
-
47
- # except Exception as e:
48
- # return None, f"Error processing file: {str(e)}"
49
- # finally:
50
- # # Clean up temporary file
51
- # os.unlink(file_path)
52
-
53
- # def main():
54
- # st.title("File Question Answering System")
55
-
56
- # # Sidebar for configuration
57
- # st.sidebar.header("Configuration")
58
- # chroma_db_path = st.sidebar.text_input(
59
- # "ChromaDB Path",
60
- # value="./chroma_db",
61
- # help="Directory where ChromaDB collections will be stored"
62
- # )
63
-
64
- # # Main content
65
- # st.write("Upload a file and ask questions about its content!")
66
-
67
- # # File uploader
68
- # uploaded_file = st.file_uploader("Upload a file", type=["pdf", "docx", "txt"])
69
-
70
- # # Session state initialization
71
- # if 'db' not in st.session_state:
72
- # st.session_state.db = None
73
-
74
- # if uploaded_file is not None:
75
- # # Process file if not already processed
76
- # if st.session_state.db is None:
77
- # with st.spinner("Processing PDF file..."):
78
- # db, error = process_uploaded_file(uploaded_file, chroma_db_path)
79
- # if error:
80
- # st.error(error)
81
- # else:
82
- # st.session_state.db = db
83
- # st.success("File processed successfully!")
84
-
85
- # # Question answering interface
86
- # st.subheader("Ask a Question")
87
- # question = st.text_input("Enter your question:")
88
-
89
- # if question:
90
- # if st.session_state.db is not None:
91
- # with st.spinner("Finding answer..."):
92
- # answer = handle_query(question, st.session_state.db)
93
- # st.subheader("Answer:")
94
- # st.write(answer)
95
- # else:
96
- # st.error("Please wait for the file to be processed or try uploading again.")
97
-
98
- # # Clear database button
99
- # if st.button("Clear Database"):
100
- # st.session_state.db = None
101
- # st.success("Database cleared. You can upload a new file.")
102
-
103
- # if __name__ == "__main__":
104
- # main()
105
  import streamlit as st
106
  import os
107
  from typing import List
@@ -209,189 +105,6 @@ def main():
209
  })
210
  except Exception as e:
211
  st.error(f"Error generating response: {str(e)}")
212
-
213
- # Add a clear chat button
214
- if st.sidebar.button("Clear Chat"):
215
- st.session_state.messages = []
216
- st.experimental_rerun()
217
 
218
  if __name__ == "__main__":
219
  main()
220
-
221
-
222
-
223
-
224
- # import streamlit as st
225
- # from chromadb.config import Settings
226
- # import os
227
- # import chromadb
228
- # from typing import List
229
- # import time
230
- # import google
231
- # import datetime
232
- # # from chroma_db_utils import create_chroma_db, get_relevant_passage
233
- # from query_handler import generate_answer, handle_query
234
- # from pdf_utils import extract_text_from_file, split_text
235
- # import logging
236
-
237
- # # Configure logging
238
- # logging.basicConfig(level=logging.INFO)
239
- # logger = logging.getLogger(__name__)
240
-
241
- # def create_chroma_db(chunks: List[str]):
242
- # """Create and return an ephemeral ChromaDB collection."""
243
- # try:
244
- # # Initialize ChromaDB with ephemeral storage
245
- # client = chromadb.EphemeralClient()
246
-
247
- # # Create collection
248
- # collection_name = f"temp_collection_{int(time.time())}"
249
- # collection = client.create_collection(name=collection_name)
250
-
251
- # # Add documents
252
- # collection.add(
253
- # documents=chunks,
254
- # ids=[f"doc_{i}" for i in range(len(chunks))]
255
- # )
256
-
257
- # # Verify the data was added
258
- # verify_count = collection.count()
259
- # print(f"Verified: Added {verify_count} documents to collection {collection_name}")
260
-
261
- # # Store both client and collection in session state
262
- # st.session_state.chroma_client = client
263
- # return collection
264
-
265
- # except Exception as e:
266
- # print(f"Error creating ChromaDB: {str(e)}")
267
- # return None
268
-
269
- # def get_relevant_passage(query: str, collection):
270
- # """Get relevant passages from the collection."""
271
- # try:
272
- # # Use the collection directly since it's ephemeral
273
- # results = collection.query(
274
- # query_texts=[query],
275
- # n_results=2
276
- # )
277
-
278
- # if results and 'documents' in results:
279
- # print(f"Found {len(results['documents'])} relevant passages")
280
- # return results['documents']
281
- # return None
282
-
283
- # except Exception as e:
284
- # print(f"Error in get_relevant_passage: {str(e)}")
285
- # return None
286
-
287
- # def initialize_session_state():
288
- # """Initialize Streamlit session state variables."""
289
- # if "chat_history" not in st.session_state:
290
- # st.session_state.chat_history = []
291
- # if "chroma_collection" not in st.session_state:
292
- # st.session_state.chroma_collection = None
293
- # if "chroma_client" not in st.session_state:
294
- # st.session_state.chroma_client = None
295
-
296
- # def process_uploaded_file(uploaded_file) -> List[str]:
297
- # """Process the uploaded file and return text chunks."""
298
- # temp_file_path = f"/tmp/{uploaded_file.name}"
299
-
300
- # try:
301
- # with open(temp_file_path, "wb") as f:
302
- # f.write(uploaded_file.getbuffer())
303
-
304
- # # Extract text from the file
305
- # extracted_text = extract_text_from_file(temp_file_path)
306
-
307
- # if extracted_text:
308
- # # Split text into chunks
309
- # chunks = split_text(extracted_text)
310
- # return chunks
311
- # else:
312
- # st.error("No text could be extracted from the file.")
313
- # return []
314
- # finally:
315
- # if os.path.exists(temp_file_path):
316
- # os.remove(temp_file_path)
317
-
318
- # def chat_interface():
319
- # st.title("Chat with Your Documents 📄💬")
320
-
321
- # # Debug: Print current state
322
- # print(f"Current chroma_collection state: {st.session_state.chroma_collection}")
323
-
324
- # uploaded_files = st.file_uploader(
325
- # "Upload your files (TXT, PDF)",
326
- # accept_multiple_files=True,
327
- # type=['txt', 'pdf']
328
- # )
329
-
330
- # if uploaded_files and st.button("Process Files"):
331
- # with st.spinner("Processing files..."):
332
- # all_chunks = []
333
- # for uploaded_file in uploaded_files:
334
- # chunks = process_uploaded_file(uploaded_file)
335
- # print(f"Processed {len(chunks)} chunks from {uploaded_file.name}")
336
- # if chunks:
337
- # all_chunks.extend(chunks)
338
-
339
- # if all_chunks:
340
- # print(f"Creating ChromaDB with {len(all_chunks)} total chunks")
341
- # # Create ChromaDB collection with all documents
342
- # db = create_chroma_db(all_chunks)
343
- # if db:
344
- # # Verify the collection immediately after creation
345
- # try:
346
- # verify_count = db.count()
347
- # print(f"Verification - Collection size: {verify_count}")
348
- # # Try a test query
349
- # test_query = db.query(
350
- # query_texts=["test verification query"],
351
- # n_results=1
352
- # )
353
- # print("Verification - Query test successful")
354
-
355
- # st.session_state.chroma_collection = db
356
- # st.success(f"Files processed successfully! {verify_count} chunks loaded.")
357
- # except Exception as e:
358
- # print(f"Verification failed: {str(e)}")
359
- # st.error("Database verification failed")
360
- # else:
361
- # st.error("Failed to create database")
362
-
363
- # # Query interface
364
- # if st.session_state.chroma_collection is not None:
365
- # print("ChromaDB collection found in session state")
366
- # query = st.text_input("Ask a question about your documents:")
367
- # if st.button("Send") and query:
368
- # print(f"Processing query: {query}")
369
- # with st.spinner("Generating response..."):
370
- # try:
371
- # # Verify both client and collection exist
372
- # if st.session_state.chroma_client is None or st.session_state.chroma_collection is None:
373
- # st.error("Please upload documents first")
374
- # return
375
-
376
- # collection = st.session_state.chroma_collection
377
- # print(f"Collection name: {collection.name}")
378
- # print(f"Collection size: {collection.count()}")
379
-
380
- # relevant_passages = get_relevant_passage(query, collection)
381
-
382
- # if relevant_passages:
383
- # response = handle_query(query, relevant_passages)
384
- # st.session_state.chat_history.append((query, response))
385
- # else:
386
- # st.warning("No relevant information found in the documents.")
387
-
388
- # except Exception as e:
389
- # print(f"Full error during query processing: {str(e)}")
390
- # logger.exception("Detailed error trace:") # This will log the full stack trace
391
- # st.error("Failed to process your question. Please try again.")
392
- # else:
393
- # print("No ChromaDB collection in session state")
394
-
395
- # if __name__ == "__main__":
396
- # initialize_session_state()
397
- # chat_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import os
3
  from typing import List
 
105
  })
106
  except Exception as e:
107
  st.error(f"Error generating response: {str(e)}")
 
 
 
 
 
108
 
109
  if __name__ == "__main__":
110
  main()