Shreyas094 commited on
Commit
4205901
·
verified ·
1 Parent(s): 57b395c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def update_vectors(files, parser):
70
  global uploaded_documents
71
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
72
 
73
  if not files:
74
  logging.warning("No files provided for update_vectors")
75
- return "Please upload at least one PDF file.", gr.CheckboxGroup(
76
- choices=[doc["name"] for doc in uploaded_documents],
77
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
78
- label="Select documents to query"
79
- )
80
 
81
  embed = get_embeddings()
82
  total_chunks = 0
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
89
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
90
  all_data.extend(data)
91
  total_chunks += len(data)
92
- # Append new documents instead of replacing
93
  if not any(doc["name"] == file.name for doc in uploaded_documents):
94
  uploaded_documents.append({"name": file.name, "selected": True})
95
  logging.info(f"Added new document to uploaded_documents: {file.name}")
@@ -111,12 +123,10 @@ def update_vectors(files, parser):
111
  database.save_local("faiss_database")
112
  logging.info("FAISS database saved")
113
 
114
-
115
- return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
116
- choices=[doc["name"] for doc in uploaded_documents],
117
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
118
- label="Select documents to query"
119
- )
120
 
121
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
122
  print(f"Starting generate_chunked_response with {num_calls} calls")
@@ -485,6 +495,11 @@ def initial_conversation():
485
  "3. Ask questions about uploaded PDF documents\n\n"
486
  "To get started, upload some PDFs or ask me a question!")
487
  ]
 
 
 
 
 
488
 
489
  # Define the checkbox outside the demo block
490
  document_selector = gr.CheckboxGroup(label="Select documents to query")
@@ -548,6 +563,7 @@ with demo:
548
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
549
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
550
  update_button = gr.Button("Upload Document")
 
551
 
552
  update_output = gr.Textbox(label="Update Status")
553
 
@@ -555,6 +571,11 @@ with demo:
555
  update_button.click(update_vectors,
556
  inputs=[file_input, parser_dropdown],
557
  outputs=[update_output, document_selector])
 
 
 
 
 
558
 
559
  gr.Markdown(
560
  """
 
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
69
+ # Add this at the beginning of your script, after imports
70
+ DOCUMENTS_FILE = "uploaded_documents.json"
71
+
72
+ def load_documents():
73
+ if os.path.exists(DOCUMENTS_FILE):
74
+ with open(DOCUMENTS_FILE, "r") as f:
75
+ return json.load(f)
76
+ return []
77
+
78
+ def save_documents(documents):
79
+ with open(DOCUMENTS_FILE, "w") as f:
80
+ json.dump(documents, f)
81
+
82
+ # Replace the global uploaded_documents with this
83
+ uploaded_documents = load_documents()
84
+
85
+ # Modify the update_vectors function
86
  def update_vectors(files, parser):
87
  global uploaded_documents
88
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
89
 
90
  if not files:
91
  logging.warning("No files provided for update_vectors")
92
+ return "Please upload at least one PDF file.", display_documents()
 
 
 
 
93
 
94
  embed = get_embeddings()
95
  total_chunks = 0
 
102
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
103
  all_data.extend(data)
104
  total_chunks += len(data)
 
105
  if not any(doc["name"] == file.name for doc in uploaded_documents):
106
  uploaded_documents.append({"name": file.name, "selected": True})
107
  logging.info(f"Added new document to uploaded_documents: {file.name}")
 
123
  database.save_local("faiss_database")
124
  logging.info("FAISS database saved")
125
 
126
+ # Save the updated list of documents
127
+ save_documents(uploaded_documents)
128
+
129
+ return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
 
 
130
 
131
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
132
  print(f"Starting generate_chunked_response with {num_calls} calls")
 
495
  "3. Ask questions about uploaded PDF documents\n\n"
496
  "To get started, upload some PDFs or ask me a question!")
497
  ]
498
+ # Add this new function
499
+ def refresh_documents():
500
+ global uploaded_documents
501
+ uploaded_documents = load_documents()
502
+ return display_documents()
503
 
504
  # Define the checkbox outside the demo block
505
  document_selector = gr.CheckboxGroup(label="Select documents to query")
 
563
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
564
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
565
  update_button = gr.Button("Upload Document")
566
+ refresh_button = gr.Button("Refresh Document List")
567
 
568
  update_output = gr.Textbox(label="Update Status")
569
 
 
571
  update_button.click(update_vectors,
572
  inputs=[file_input, parser_dropdown],
573
  outputs=[update_output, document_selector])
574
+
575
+ # Add the refresh button functionality
576
+ refresh_button.click(refresh_documents,
577
+ inputs=[],
578
+ outputs=[document_selector])
579
 
580
  gr.Markdown(
581
  """