Shreyas094 commited on
Commit
f01b00a
·
verified ·
1 Parent(s): 7a7b1d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -167
app.py CHANGED
@@ -15,7 +15,6 @@ from langchain_core.documents import Document
15
  from huggingface_hub import InferenceClient
16
  import inspect
17
  import logging
18
- import shutil
19
 
20
 
21
  # Set up basic configuration for logging
@@ -48,20 +47,18 @@ llama_parser = LlamaParse(
48
  )
49
 
50
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
51
- file_path = os.path.join(UPLOAD_FOLDER, os.path.basename(file.name))
52
- shutil.copy(file.name, file_path)
53
-
54
  if parser == "pypdf":
55
- loader = PyPDFLoader(file_path)
56
  return loader.load_and_split()
57
  elif parser == "llamaparse":
58
  try:
59
- documents = llama_parser.load_data(file_path)
60
- return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
61
  except Exception as e:
62
- logging.error(f"Error using Llama Parse: {str(e)}")
63
- logging.info("Falling back to PyPDF parser")
64
- loader = PyPDFLoader(file_path)
65
  return loader.load_and_split()
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
@@ -75,7 +72,11 @@ def update_vectors(files, parser):
75
 
76
  if not files:
77
  logging.warning("No files provided for update_vectors")
78
- return "Please upload at least one PDF file.", gr.update(choices=[], value=[])
 
 
 
 
79
 
80
  embed = get_embeddings()
81
  total_chunks = 0
@@ -88,8 +89,9 @@ def update_vectors(files, parser):
88
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
89
  all_data.extend(data)
90
  total_chunks += len(data)
91
- if not any(doc["name"] == os.path.basename(file.name) for doc in uploaded_documents):
92
- uploaded_documents.append({"name": os.path.basename(file.name), "selected": True})
 
93
  logging.info(f"Added new document to uploaded_documents: {file.name}")
94
  else:
95
  logging.info(f"Document already exists in uploaded_documents: {file.name}")
@@ -98,72 +100,53 @@ def update_vectors(files, parser):
98
 
99
  logging.info(f"Total chunks processed: {total_chunks}")
100
 
101
- if all_data:
102
- if os.path.exists("faiss_database"):
103
- logging.info("Updating existing FAISS database")
104
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
105
- database.add_documents(all_data)
106
- else:
107
- logging.info("Creating new FAISS database")
108
- database = FAISS.from_documents(all_data, embed)
109
-
110
- database.save_local("faiss_database")
111
- logging.info("FAISS database saved")
112
-
113
- return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
114
- else:
115
- return "No data was processed. Please check your files and try again.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
116
-
117
- UPLOAD_FOLDER = "uploaded_files"
118
- if not os.path.exists(UPLOAD_FOLDER):
119
- os.makedirs(UPLOAD_FOLDER)
120
-
121
- # Add this new function to handle file deletion
122
- def delete_file(file_name):
123
- global uploaded_documents
124
- logging.info(f"Attempting to delete file: {file_name}")
125
-
126
- # Remove the file from uploaded_documents
127
- uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
128
-
129
- # Remove the file from the file system if it exists
130
- file_path = os.path.join(UPLOAD_FOLDER, file_name)
131
- if os.path.exists(file_path):
132
- os.remove(file_path)
133
- logging.info(f"Deleted file: {file_path}")
134
  else:
135
- logging.warning(f"File not found: {file_path}")
 
136
 
137
- # Rebuild the FAISS database
138
- rebuild_faiss_database()
139
 
140
- return gr.update(value=[doc["name"] for doc in uploaded_documents], choices=[doc["name"] for doc in uploaded_documents])
 
 
 
 
141
 
142
- def rebuild_faiss_database():
143
- logging.info("Rebuilding FAISS database")
144
- embed = get_embeddings()
145
- all_data = []
146
-
147
- for doc in uploaded_documents:
148
- try:
149
- file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
150
- temp_file = NamedTemporaryFile(delete=False, suffix=".pdf", dir=UPLOAD_FOLDER)
151
- temp_file.write(open(file_path, 'rb').read())
152
- temp_file.close()
153
- data = load_document(temp_file, "llamaparse")
154
- all_data.extend(data)
155
- os.unlink(temp_file.name)
156
- except Exception as e:
157
- logging.error(f"Error processing file {doc['name']}: {str(e)}")
158
-
159
- if all_data:
160
- database = FAISS.from_documents(all_data, embed)
 
161
  database.save_local("faiss_database")
162
- logging.info("FAISS database rebuilt and saved")
163
- else:
164
- if os.path.exists("faiss_database"):
165
- shutil.rmtree("faiss_database")
166
- logging.info("No documents left, removed FAISS database")
 
 
 
 
167
 
168
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
169
  print(f"Starting generate_chunked_response with {num_calls} calls")
@@ -265,14 +248,14 @@ class CitingSources(BaseModel):
265
  ...,
266
  description="List of sources to cite. Should be an URL of the source."
267
  )
268
- def chatbot_interface(message, history, use_web_search, model, temperature, num_calls, selected_docs):
269
  if not message.strip():
270
  return "", history
271
 
272
  history = history + [(message, "")]
273
 
274
  try:
275
- for response in respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
276
  history[-1] = (message, response)
277
  yield history
278
  except gr.CancelledError:
@@ -295,67 +278,58 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
295
  logging.info(f"User Query: {message}")
296
  logging.info(f"Model Used: {model}")
297
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
 
298
  logging.info(f"Selected Documents: {selected_docs}")
299
 
300
  try:
301
  if use_web_search:
302
- logging.info("Entering web search flow")
303
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
304
  response = f"{main_content}\n\n{sources}"
305
- logging.info(f"Generated Response (first 100 chars): {response[:100]}...")
 
306
  yield response
307
  else:
308
- logging.info("Entering PDF search flow")
309
  embed = get_embeddings()
310
  if os.path.exists("faiss_database"):
311
- logging.info("FAISS database exists, loading it")
312
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
313
  retriever = database.as_retriever()
314
 
315
- logging.info("Attempting to retrieve relevant documents")
316
- try:
317
- relevant_docs = retriever.invoke(message)
318
- logging.info(f"Retrieved {len(relevant_docs)} relevant documents")
319
- except Exception as e:
320
- logging.error(f"Error retrieving relevant documents: {str(e)}")
321
- yield f"An error occurred while retrieving relevant documents: {str(e)}"
322
- return
323
-
324
  # Filter relevant documents based on user selection
325
- filtered_docs = [doc for doc in relevant_docs if os.path.basename(doc.metadata["source"]) in [os.path.basename(doc) for doc in selected_docs]]
326
- logging.info(f"Number of filtered documents: {len(filtered_docs)}")
327
 
328
- if not filtered_docs:
329
- logging.warning("No relevant information found in the selected documents")
330
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
331
  return
332
 
333
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
334
- logging.info(f"Total context length: {len(context_str)}")
335
  else:
336
- logging.warning("No FAISS database found")
337
  context_str = "No documents available."
338
  yield "No documents available. Please upload PDF documents to answer questions."
339
  return
340
 
341
  if model == "@cf/meta/llama-3.1-8b-instruct":
342
- logging.info("Using Cloudflare API")
343
  for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
344
- logging.info(f"Generated Response (first 100 chars): {partial_response[:100]}...")
 
345
  yield partial_response
346
  else:
347
- logging.info("Entering PDF search flow")
348
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
349
- logging.info(f"Generated Response (first 100 chars): {partial_response[:100]}...")
 
350
  yield partial_response
351
-
352
- logging.info("Finished respond function")
353
  except Exception as e:
354
- logging.error(f"Unexpected error in respond function: {str(e)}")
355
- yield f"An unexpected error occurred: {str(e)}"
 
 
 
 
 
356
 
357
- logging.info("Finished respond function")
358
- logging.info(f"Selected docs: {selected_docs}")
359
 
360
  def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
361
  headers = {
@@ -457,19 +431,11 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
457
 
458
  retriever = database.as_retriever()
459
  logging.info(f"Retrieving relevant documents for query: {query}")
460
- try:
461
- relevant_docs = retriever.invoke(query)
462
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
463
- except Exception as e:
464
- logging.error(f"Error retrieving relevant documents: {str(e)}")
465
- yield f"An error occurred while retrieving relevant documents: {str(e)}"
466
- return
467
-
468
- # Log the sources of relevant documents
469
- logging.info(f"Sources of relevant documents: {[os.path.basename(doc.metadata['source']) for doc in relevant_docs]}")
470
 
471
  # Filter relevant_docs based on selected documents
472
- filtered_docs = [doc for doc in relevant_docs if os.path.basename(doc.metadata["source"]) in selected_docs]
473
  logging.info(f"Number of filtered documents: {len(filtered_docs)}")
474
 
475
  if not filtered_docs:
@@ -478,7 +444,7 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
478
  return
479
 
480
  for doc in filtered_docs:
481
- logging.info(f"Document source: {os.path.basename(doc.metadata['source'])}")
482
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
483
 
484
  context_str = "\n".join([doc.page_content for doc in filtered_docs])
@@ -501,25 +467,18 @@ Write a detailed and complete response that answers the following user question:
501
  response = ""
502
  for i in range(num_calls):
503
  logging.info(f"API call {i+1}/{num_calls}")
504
- try:
505
- for message in client.chat_completion(
506
- messages=[{"role": "user", "content": prompt}],
507
- max_tokens=10000,
508
- temperature=temperature,
509
- stream=True,
510
- ):
511
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
512
- chunk = message.choices[0].delta.content
513
- response += chunk
514
- yield response # Yield partial response
515
- logging.info(f"API call {i+1} completed successfully")
516
- except Exception as e:
517
- logging.error(f"Error in API call {i+1}: {str(e)}")
518
 
519
  logging.info("Finished generating response")
520
- logging.info(f"Relevant docs: {[doc.metadata['source'] for doc in relevant_docs]}")
521
- logging.info(f"Selected docs: {selected_docs}")
522
- logging.info(f"Filtered docs: {[doc.metadata['source'] for doc in filtered_docs]}")
523
 
524
  def vote(data: gr.LikeData):
525
  if data.liked:
@@ -543,8 +502,8 @@ uploaded_documents = []
543
 
544
  def display_documents():
545
  return gr.CheckboxGroup(
546
- choices=[os.path.basename(doc["name"]) for doc in uploaded_documents],
547
- value=[os.path.basename(doc["name"]) for doc in uploaded_documents if doc["selected"]],
548
  label="Select documents to query"
549
  )
550
 
@@ -554,13 +513,13 @@ document_selector = gr.CheckboxGroup(label="Select documents to query")
554
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
555
 
556
  demo = gr.ChatInterface(
557
- chatbot_interface,
558
  additional_inputs=[
559
  gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
560
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
561
  gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
562
  use_web_search,
563
- document_selector # This should now correctly pass the selected documents
564
  ],
565
  title="AI-powered Web Search and PDF Chat Assistant",
566
  description="Chat with your PDFs or use web search to answer questions (Please use toggle under Additional Inputs to swithc between PDF and Web Search, Default Value Web Search)",
@@ -603,42 +562,31 @@ with demo:
603
 
604
  update_output = gr.Textbox(label="Update Status")
605
 
606
- # Create a new row for displaying uploaded files with delete buttons
607
- with gr.Row():
608
- uploaded_files = gr.CheckboxGroup(label="Uploaded Documents", interactive=True)
609
- delete_button = gr.Button("Delete Selected")
610
-
611
  # Update both the output text and the document selector
612
- update_button.click(
613
- update_vectors,
614
- inputs=[file_input, parser_dropdown],
615
- outputs=[update_output, uploaded_files]
616
- )
617
-
618
- # Handle file deletion
619
- delete_button.click(
620
- lambda selected: [delete_file(file) for file in selected],
621
- inputs=[uploaded_files],
622
- outputs=[uploaded_files]
623
- )
624
 
625
- # Update the document selector in the chat interface
626
- uploaded_files.change(
627
- lambda x: gr.update(choices=x, value=x),
628
- inputs=[uploaded_files],
629
- outputs=[document_selector]
630
- )
631
-
632
  gr.Markdown(
633
  """
634
  ## How to use
635
  1. Upload PDF documents using the file input at the top.
636
  2. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
637
  3. Select the documents you want to query using the checkboxes.
638
- 4. Ask questions in the chat interface.
639
- 5. Toggle "Use Web Search" to switch between PDF chat and web search.
640
- 6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
641
- 7. Use the provided examples or ask your own questions.
 
642
  """
643
  )
644
 
 
15
  from huggingface_hub import InferenceClient
16
  import inspect
17
  import logging
 
18
 
19
 
20
  # Set up basic configuration for logging
 
47
  )
48
 
49
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
50
+ """Loads and splits the document into pages."""
 
 
51
  if parser == "pypdf":
52
+ loader = PyPDFLoader(file.name)
53
  return loader.load_and_split()
54
  elif parser == "llamaparse":
55
  try:
56
+ documents = llama_parser.load_data(file.name)
57
+ return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
58
  except Exception as e:
59
+ print(f"Error using Llama Parse: {str(e)}")
60
+ print("Falling back to PyPDF parser")
61
+ loader = PyPDFLoader(file.name)
62
  return loader.load_and_split()
63
  else:
64
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 
72
 
73
  if not files:
74
  logging.warning("No files provided for update_vectors")
75
+ return "Please upload at least one PDF file.", gr.CheckboxGroup(
76
+ choices=[doc["name"] for doc in uploaded_documents],
77
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
78
+ label="Select documents to query"
79
+ )
80
 
81
  embed = get_embeddings()
82
  total_chunks = 0
 
89
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
90
  all_data.extend(data)
91
  total_chunks += len(data)
92
+ # Append new documents instead of replacing
93
+ if not any(doc["name"] == file.name for doc in uploaded_documents):
94
+ uploaded_documents.append({"name": file.name, "selected": True})
95
  logging.info(f"Added new document to uploaded_documents: {file.name}")
96
  else:
97
  logging.info(f"Document already exists in uploaded_documents: {file.name}")
 
100
 
101
  logging.info(f"Total chunks processed: {total_chunks}")
102
 
103
+ if os.path.exists("faiss_database"):
104
+ logging.info("Updating existing FAISS database")
105
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
106
+ database.add_documents(all_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  else:
108
+ logging.info("Creating new FAISS database")
109
+ database = FAISS.from_documents(all_data, embed)
110
 
111
+ database.save_local("faiss_database")
112
+ logging.info("FAISS database saved")
113
 
114
+ return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
115
+ choices=[doc["name"] for doc in uploaded_documents],
116
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
117
+ label="Select documents to query"
118
+ )
119
 
120
+ def delete_files(files_to_delete):
121
+ global uploaded_documents
122
+ if not files_to_delete:
123
+ return "No files selected for deletion.", document_selector
124
+
125
+ deleted_files = []
126
+ for file_name in files_to_delete:
127
+ # Remove the file from uploaded_documents
128
+ uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
129
+ deleted_files.append(file_name)
130
+
131
+ # Update the FAISS database
132
+ if os.path.exists("faiss_database"):
133
+ embed = get_embeddings()
134
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
135
+
136
+ # Remove documents from the database
137
+ database.delete(lambda doc: doc.metadata["source"] in deleted_files)
138
+
139
+ # Save the updated database
140
  database.save_local("faiss_database")
141
+
142
+ # Update the document selector
143
+ updated_selector = gr.CheckboxGroup(
144
+ choices=[doc["name"] for doc in uploaded_documents],
145
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
146
+ label="Select documents to query"
147
+ )
148
+
149
+ return f"Deleted files: {', '.join(deleted_files)}", updated_selector
150
 
151
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
152
  print(f"Starting generate_chunked_response with {num_calls} calls")
 
248
  ...,
249
  description="List of sources to cite. Should be an URL of the source."
250
  )
251
+ def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
252
  if not message.strip():
253
  return "", history
254
 
255
  history = history + [(message, "")]
256
 
257
  try:
258
+ for response in respond(message, history, model, temperature, num_calls, use_web_search):
259
  history[-1] = (message, response)
260
  yield history
261
  except gr.CancelledError:
 
278
  logging.info(f"User Query: {message}")
279
  logging.info(f"Model Used: {model}")
280
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
281
+
282
  logging.info(f"Selected Documents: {selected_docs}")
283
 
284
  try:
285
  if use_web_search:
 
286
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
287
  response = f"{main_content}\n\n{sources}"
288
+ first_line = response.split('\n')[0] if response else ''
289
+ # logging.info(f"Generated Response (first line): {first_line}")
290
  yield response
291
  else:
 
292
  embed = get_embeddings()
293
  if os.path.exists("faiss_database"):
 
294
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
295
  retriever = database.as_retriever()
296
 
 
 
 
 
 
 
 
 
 
297
  # Filter relevant documents based on user selection
298
+ all_relevant_docs = retriever.get_relevant_documents(message)
299
+ relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
300
 
301
+ if not relevant_docs:
 
302
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
303
  return
304
 
305
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
 
306
  else:
 
307
  context_str = "No documents available."
308
  yield "No documents available. Please upload PDF documents to answer questions."
309
  return
310
 
311
  if model == "@cf/meta/llama-3.1-8b-instruct":
312
+ # Use Cloudflare API
313
  for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
314
+ first_line = partial_response.split('\n')[0] if partial_response else ''
315
+ logging.info(f"Generated Response (first line): {first_line}")
316
  yield partial_response
317
  else:
318
+ # Use Hugging Face API
319
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
320
+ first_line = partial_response.split('\n')[0] if partial_response else ''
321
+ logging.info(f"Generated Response (first line): {first_line}")
322
  yield partial_response
 
 
323
  except Exception as e:
324
+ logging.error(f"Error with {model}: {str(e)}")
325
+ if "microsoft/Phi-3-mini-4k-instruct" in model:
326
+ logging.info("Falling back to Mistral model due to Phi-3 error")
327
+ fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
328
+ yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
329
+ else:
330
+ yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
331
 
332
+ logging.basicConfig(level=logging.DEBUG)
 
333
 
334
  def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
335
  headers = {
 
431
 
432
  retriever = database.as_retriever()
433
  logging.info(f"Retrieving relevant documents for query: {query}")
434
+ relevant_docs = retriever.get_relevant_documents(query)
435
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
 
 
 
 
 
 
 
 
436
 
437
  # Filter relevant_docs based on selected documents
438
+ filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
439
  logging.info(f"Number of filtered documents: {len(filtered_docs)}")
440
 
441
  if not filtered_docs:
 
444
  return
445
 
446
  for doc in filtered_docs:
447
+ logging.info(f"Document source: {doc.metadata['source']}")
448
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
449
 
450
  context_str = "\n".join([doc.page_content for doc in filtered_docs])
 
467
  response = ""
468
  for i in range(num_calls):
469
  logging.info(f"API call {i+1}/{num_calls}")
470
+ for message in client.chat_completion(
471
+ messages=[{"role": "user", "content": prompt}],
472
+ max_tokens=10000,
473
+ temperature=temperature,
474
+ stream=True,
475
+ ):
476
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
477
+ chunk = message.choices[0].delta.content
478
+ response += chunk
479
+ yield response # Yield partial response
 
 
 
 
480
 
481
  logging.info("Finished generating response")
 
 
 
482
 
483
  def vote(data: gr.LikeData):
484
  if data.liked:
 
502
 
503
  def display_documents():
504
  return gr.CheckboxGroup(
505
+ choices=[doc["name"] for doc in uploaded_documents],
506
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
507
  label="Select documents to query"
508
  )
509
 
 
513
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
514
 
515
  demo = gr.ChatInterface(
516
+ respond,
517
  additional_inputs=[
518
  gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
519
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
520
  gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
521
  use_web_search,
522
+ document_selector # Add the document selector to the chat interface
523
  ],
524
  title="AI-powered Web Search and PDF Chat Assistant",
525
  description="Chat with your PDFs or use web search to answer questions (Please use toggle under Additional Inputs to swithc between PDF and Web Search, Default Value Web Search)",
 
562
 
563
  update_output = gr.Textbox(label="Update Status")
564
 
 
 
 
 
 
565
  # Update both the output text and the document selector
566
+ update_button.click(update_vectors,
567
+ inputs=[file_input, parser_dropdown],
568
+ outputs=[update_output, document_selector])
569
+
570
+ # Add delete button
571
+ delete_button = gr.Button("Delete Selected Files")
572
+ delete_output = gr.Textbox(label="Delete Status")
573
+
574
+ # Connect delete button to the delete_files function
575
+ delete_button.click(delete_files,
576
+ inputs=[document_selector],
577
+ outputs=[delete_output, document_selector])
578
 
 
 
 
 
 
 
 
579
  gr.Markdown(
580
  """
581
  ## How to use
582
  1. Upload PDF documents using the file input at the top.
583
  2. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
584
  3. Select the documents you want to query using the checkboxes.
585
+ 4. To delete files, select them in the checkbox and click "Delete Selected Files".
586
+ 5. Ask questions in the chat interface.
587
+ 6. Toggle "Use Web Search" to switch between PDF chat and web search.
588
+ 7. Adjust Temperature and Number of API Calls to fine-tune the response generation.
589
+ 8. Use the provided examples or ask your own questions.
590
  """
591
  )
592