SearchGPT

Paused

App Files Files Community

Shreyas094 commited on Aug 3, 2024

Commit

4205901

verified ·

1 Parent(s): 57b395c

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -12

app.py CHANGED Viewed

@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
 def update_vectors(files, parser):
     global uploaded_documents
     logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
     if not files:
         logging.warning("No files provided for update_vectors")
-        return "Please upload at least one PDF file.", gr.CheckboxGroup(
-            choices=[doc["name"] for doc in uploaded_documents],
-            value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
-            label="Select documents to query"
-        )
     embed = get_embeddings()
     total_chunks = 0
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
             logging.info(f"Loaded {len(data)} chunks from {file.name}")
             all_data.extend(data)
             total_chunks += len(data)
-            # Append new documents instead of replacing
             if not any(doc["name"] == file.name for doc in uploaded_documents):
                 uploaded_documents.append({"name": file.name, "selected": True})
                 logging.info(f"Added new document to uploaded_documents: {file.name}")
@@ -111,12 +123,10 @@ def update_vectors(files, parser):
     database.save_local("faiss_database")
     logging.info("FAISS database saved")
-    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
-        choices=[doc["name"] for doc in uploaded_documents],
-        value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
-        label="Select documents to query"
-    )
 def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
@@ -485,6 +495,11 @@ def initial_conversation():
                 "3. Ask questions about uploaded PDF documents\n\n"
                 "To get started, upload some PDFs or ask me a question!")
     ]
 # Define the checkbox outside the demo block
 document_selector = gr.CheckboxGroup(label="Select documents to query")
@@ -548,6 +563,7 @@ with demo:
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
         parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
         update_button = gr.Button("Upload Document")
     update_output = gr.Textbox(label="Update Status")
@@ -555,6 +571,11 @@ with demo:
     update_button.click(update_vectors,
                         inputs=[file_input, parser_dropdown],
                         outputs=[update_output, document_selector])
     gr.Markdown(
     """

 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
+# Add this at the beginning of your script, after imports
+DOCUMENTS_FILE = "uploaded_documents.json"
+def load_documents():
+    if os.path.exists(DOCUMENTS_FILE):
+        with open(DOCUMENTS_FILE, "r") as f:
+            return json.load(f)
+    return []
+def save_documents(documents):
+    with open(DOCUMENTS_FILE, "w") as f:
+        json.dump(documents, f)
+# Replace the global uploaded_documents with this
+uploaded_documents = load_documents()
+# Modify the update_vectors function
 def update_vectors(files, parser):
     global uploaded_documents
     logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
     if not files:
         logging.warning("No files provided for update_vectors")
+        return "Please upload at least one PDF file.", display_documents()
     embed = get_embeddings()
     total_chunks = 0
             logging.info(f"Loaded {len(data)} chunks from {file.name}")
             all_data.extend(data)
             total_chunks += len(data)
             if not any(doc["name"] == file.name for doc in uploaded_documents):
                 uploaded_documents.append({"name": file.name, "selected": True})
                 logging.info(f"Added new document to uploaded_documents: {file.name}")
     database.save_local("faiss_database")
     logging.info("FAISS database saved")
+    # Save the updated list of documents
+    save_documents(uploaded_documents)
+    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
 def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
                 "3. Ask questions about uploaded PDF documents\n\n"
                 "To get started, upload some PDFs or ask me a question!")
     ]
+# Add this new function
+def refresh_documents():
+    global uploaded_documents
+    uploaded_documents = load_documents()
+    return display_documents()
 # Define the checkbox outside the demo block
 document_selector = gr.CheckboxGroup(label="Select documents to query")
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
         parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
         update_button = gr.Button("Upload Document")
+        refresh_button = gr.Button("Refresh Document List")
     update_output = gr.Textbox(label="Update Status")
     update_button.click(update_vectors,
                         inputs=[file_input, parser_dropdown],
                         outputs=[update_output, document_selector])
+    # Add the refresh button functionality
+    refresh_button.click(refresh_documents,
+                         inputs=[],
+                         outputs=[document_selector])
     gr.Markdown(
     """