Spaces:
Paused
Paused
Shreyas094
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def update_vectors(files, parser):
|
70 |
global uploaded_documents
|
71 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
72 |
|
73 |
if not files:
|
74 |
logging.warning("No files provided for update_vectors")
|
75 |
-
return "Please upload at least one PDF file.",
|
76 |
-
choices=[doc["name"] for doc in uploaded_documents],
|
77 |
-
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
78 |
-
label="Select documents to query"
|
79 |
-
)
|
80 |
|
81 |
embed = get_embeddings()
|
82 |
total_chunks = 0
|
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
|
|
89 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
90 |
all_data.extend(data)
|
91 |
total_chunks += len(data)
|
92 |
-
# Append new documents instead of replacing
|
93 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
94 |
uploaded_documents.append({"name": file.name, "selected": True})
|
95 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
@@ -111,12 +123,10 @@ def update_vectors(files, parser):
|
|
111 |
database.save_local("faiss_database")
|
112 |
logging.info("FAISS database saved")
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
label="Select documents to query"
|
119 |
-
)
|
120 |
|
121 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
122 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
@@ -485,6 +495,11 @@ def initial_conversation():
|
|
485 |
"3. Ask questions about uploaded PDF documents\n\n"
|
486 |
"To get started, upload some PDFs or ask me a question!")
|
487 |
]
|
|
|
|
|
|
|
|
|
|
|
488 |
|
489 |
# Define the checkbox outside the demo block
|
490 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
@@ -548,6 +563,7 @@ with demo:
|
|
548 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
549 |
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
|
550 |
update_button = gr.Button("Upload Document")
|
|
|
551 |
|
552 |
update_output = gr.Textbox(label="Update Status")
|
553 |
|
@@ -555,6 +571,11 @@ with demo:
|
|
555 |
update_button.click(update_vectors,
|
556 |
inputs=[file_input, parser_dropdown],
|
557 |
outputs=[update_output, document_selector])
|
|
|
|
|
|
|
|
|
|
|
558 |
|
559 |
gr.Markdown(
|
560 |
"""
|
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
69 |
+
# Add this at the beginning of your script, after imports
|
70 |
+
DOCUMENTS_FILE = "uploaded_documents.json"
|
71 |
+
|
72 |
+
def load_documents():
|
73 |
+
if os.path.exists(DOCUMENTS_FILE):
|
74 |
+
with open(DOCUMENTS_FILE, "r") as f:
|
75 |
+
return json.load(f)
|
76 |
+
return []
|
77 |
+
|
78 |
+
def save_documents(documents):
|
79 |
+
with open(DOCUMENTS_FILE, "w") as f:
|
80 |
+
json.dump(documents, f)
|
81 |
+
|
82 |
+
# Replace the global uploaded_documents with this
|
83 |
+
uploaded_documents = load_documents()
|
84 |
+
|
85 |
+
# Modify the update_vectors function
|
86 |
def update_vectors(files, parser):
|
87 |
global uploaded_documents
|
88 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
89 |
|
90 |
if not files:
|
91 |
logging.warning("No files provided for update_vectors")
|
92 |
+
return "Please upload at least one PDF file.", display_documents()
|
|
|
|
|
|
|
|
|
93 |
|
94 |
embed = get_embeddings()
|
95 |
total_chunks = 0
|
|
|
102 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
103 |
all_data.extend(data)
|
104 |
total_chunks += len(data)
|
|
|
105 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
106 |
uploaded_documents.append({"name": file.name, "selected": True})
|
107 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
|
|
123 |
database.save_local("faiss_database")
|
124 |
logging.info("FAISS database saved")
|
125 |
|
126 |
+
# Save the updated list of documents
|
127 |
+
save_documents(uploaded_documents)
|
128 |
+
|
129 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
|
|
|
|
130 |
|
131 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
132 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
|
|
495 |
"3. Ask questions about uploaded PDF documents\n\n"
|
496 |
"To get started, upload some PDFs or ask me a question!")
|
497 |
]
|
498 |
+
# Add this new function
|
499 |
+
def refresh_documents():
|
500 |
+
global uploaded_documents
|
501 |
+
uploaded_documents = load_documents()
|
502 |
+
return display_documents()
|
503 |
|
504 |
# Define the checkbox outside the demo block
|
505 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
|
|
563 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
564 |
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
|
565 |
update_button = gr.Button("Upload Document")
|
566 |
+
refresh_button = gr.Button("Refresh Document List")
|
567 |
|
568 |
update_output = gr.Textbox(label="Update Status")
|
569 |
|
|
|
571 |
update_button.click(update_vectors,
|
572 |
inputs=[file_input, parser_dropdown],
|
573 |
outputs=[update_output, document_selector])
|
574 |
+
|
575 |
+
# Add the refresh button functionality
|
576 |
+
refresh_button.click(refresh_documents,
|
577 |
+
inputs=[],
|
578 |
+
outputs=[document_selector])
|
579 |
|
580 |
gr.Markdown(
|
581 |
"""
|