Shreyas094 commited on
Commit
bd7a464
·
verified ·
1 Parent(s): 9ccaed2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -61
app.py CHANGED
@@ -104,11 +104,6 @@ def update_vectors(files, parser):
104
  logging.warning(f"No chunks loaded from {file.name}")
105
  continue
106
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
107
-
108
- # Add source to metadata
109
- for chunk in data:
110
- chunk.metadata["source"] = file.name
111
-
112
  all_data.extend(data)
113
  total_chunks += len(data)
114
  if not any(doc["name"] == file.name for doc in uploaded_documents):
@@ -184,52 +179,57 @@ def delete_documents(selected_docs):
184
 
185
  return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
186
 
187
- def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2):
188
- print(f"Starting generate_chunked_response with model: {model}, num_calls: {num_calls}")
189
  full_response = ""
190
  messages = [{"role": "user", "content": prompt}]
191
 
192
  if model == "@cf/meta/llama-3.1-8b-instruct":
193
- # Cloudflare API logic
194
- headers = {
195
- "Authorization": f"Bearer {API_TOKEN}",
196
- "Content-Type": "application/json"
197
- }
198
-
199
  for i in range(num_calls):
200
  print(f"Starting Cloudflare API call {i+1}")
 
 
 
201
  try:
202
  response = requests.post(
203
  f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
 
204
  json={
205
- "messages": messages,
206
- "stream": True,
 
 
 
207
  "max_tokens": max_tokens,
208
  "temperature": temperature
209
  },
210
- headers=headers,
211
- stream=True
212
  )
 
213
  for line in response.iter_lines():
 
 
 
214
  if line:
215
  try:
216
  json_data = json.loads(line.decode('utf-8').split('data: ')[1])
217
- chunk = json_data.get('response', '')
218
  full_response += chunk
219
- yield full_response
220
- except (json.JSONDecodeError, IndexError):
221
  continue
222
  print(f"Cloudflare API call {i+1} completed")
223
  except Exception as e:
224
  print(f"Error in generating response from Cloudflare: {str(e)}")
225
- if i == num_calls - 1: # If this is the last attempt
226
- yield full_response # Yield whatever response we have so far
227
  else:
228
- # Hugging Face API logic
229
  client = InferenceClient(model, token=huggingface_token)
230
 
231
  for i in range(num_calls):
232
  print(f"Starting Hugging Face API call {i+1}")
 
 
 
233
  try:
234
  for message in client.chat_completion(
235
  messages=messages,
@@ -237,19 +237,16 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
237
  temperature=temperature,
238
  stream=True,
239
  ):
 
 
 
240
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
241
  chunk = message.choices[0].delta.content
242
  full_response += chunk
243
- yield full_response
244
  print(f"Hugging Face API call {i+1} completed")
245
  except Exception as e:
246
  print(f"Error in generating response from Hugging Face: {str(e)}")
247
- if i == num_calls - 1: # If this is the last attempt
248
- yield full_response # Yield whatever response we have so far
249
 
250
- if not full_response:
251
- yield "I apologize, but I couldn't generate a response at this time. Please try again later."
252
-
253
  # Clean up the response
254
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
255
  clean_response = clean_response.replace("Using the following context:", "").strip()
@@ -270,7 +267,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
270
  final_response = '\n\n'.join(unique_paragraphs)
271
 
272
  print(f"Final clean response: {final_response[:100]}...")
273
- yield final_response
274
 
275
  def duckduckgo_search(query):
276
  with DDGS() as ddgs:
@@ -308,16 +305,19 @@ def retry_last_response(history, use_web_search, model, temperature, num_calls):
308
 
309
  return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
310
 
311
- def respond(message, history, use_web_search, model, temperature, num_calls, selected_docs):
312
  logging.info(f"User Query: {message}")
313
  logging.info(f"Model Used: {model}")
314
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
 
315
  logging.info(f"Selected Documents: {selected_docs}")
316
 
317
  try:
318
  if use_web_search:
319
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
320
  response = f"{main_content}\n\n{sources}"
 
 
321
  yield response
322
  else:
323
  embed = get_embeddings()
@@ -335,26 +335,28 @@ def respond(message, history, use_web_search, model, temperature, num_calls, sel
335
 
336
  context_str = "\n".join([doc.page_content for doc in relevant_docs])
337
  else:
 
338
  yield "No documents available. Please upload PDF documents to answer questions."
339
  return
340
 
341
  if model == "@cf/meta/llama-3.1-8b-instruct":
342
  # Use Cloudflare API
343
- prompt = f"""Using the following context from the PDF documents:
344
- {context_str}
345
- Write a detailed and complete response that answers the following user question: '{message}'"""
346
- for partial_response in generate_chunked_response(prompt, model, num_calls=num_calls, temperature=temperature):
347
  yield partial_response
348
  else:
349
  # Use Hugging Face API
350
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
 
 
351
  yield partial_response
352
  except Exception as e:
353
  logging.error(f"Error with {model}: {str(e)}")
354
  if "microsoft/Phi-3-mini-4k-instruct" in model:
355
  logging.info("Falling back to Mistral model due to Phi-3 error")
356
  fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
357
- yield from respond(message, history, use_web_search, fallback_model, temperature, num_calls, selected_docs)
358
  else:
359
  yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
360
 
@@ -458,36 +460,25 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
458
  yield "No documents available. Please upload PDF documents to answer questions."
459
  return
460
 
461
- # Log all documents in the database
462
- all_docs = list(database.docstore._dict.values())
463
- logging.info(f"All documents in database: {[doc.metadata['source'] for doc in all_docs]}")
464
- logging.info(f"Number of documents in database: {len(all_docs)}")
465
-
466
- # Get only the selected documents
467
- selected_docs_content = [doc for doc in all_docs if doc.metadata["source"] in selected_docs]
468
- logging.info(f"Number of selected documents: {len(selected_docs_content)}")
469
- logging.info(f"Selected documents: {[doc.metadata['source'] for doc in selected_docs_content]}")
470
-
471
- if not selected_docs_content:
472
- logging.warning(f"No documents found for the selected sources: {selected_docs}")
473
- yield "No documents found for the selected sources. Please check your document selection."
474
- return
475
-
476
- # Perform similarity search on selected documents
477
- relevant_docs = database.similarity_search(query, k=5, filter=lambda doc: doc.metadata["source"] in selected_docs)
478
- logging.info(f"Number of relevant documents after similarity search: {len(relevant_docs)}")
479
- logging.info(f"Relevant documents: {[doc.metadata['source'] for doc in relevant_docs]}")
480
-
481
- if not relevant_docs:
482
- logging.warning("No relevant documents found after similarity search")
483
- yield "No relevant information found in the selected documents. Please try rephrasing your query."
484
  return
485
 
486
- for doc in relevant_docs:
487
  logging.info(f"Document source: {doc.metadata['source']}")
488
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
489
 
490
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
491
  logging.info(f"Total context length: {len(context_str)}")
492
 
493
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
104
  logging.warning(f"No chunks loaded from {file.name}")
105
  continue
106
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
 
 
 
 
 
107
  all_data.extend(data)
108
  total_chunks += len(data)
109
  if not any(doc["name"] == file.name for doc in uploaded_documents):
 
179
 
180
  return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
181
 
182
+ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
183
+ print(f"Starting generate_chunked_response with {num_calls} calls")
184
  full_response = ""
185
  messages = [{"role": "user", "content": prompt}]
186
 
187
  if model == "@cf/meta/llama-3.1-8b-instruct":
188
+ # Cloudflare API
 
 
 
 
 
189
  for i in range(num_calls):
190
  print(f"Starting Cloudflare API call {i+1}")
191
+ if should_stop:
192
+ print("Stop clicked, breaking loop")
193
+ break
194
  try:
195
  response = requests.post(
196
  f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
197
+ headers={"Authorization": f"Bearer {API_TOKEN}"},
198
  json={
199
+ "stream": true,
200
+ "messages": [
201
+ {"role": "system", "content": "You are a friendly assistant"},
202
+ {"role": "user", "content": prompt}
203
+ ],
204
  "max_tokens": max_tokens,
205
  "temperature": temperature
206
  },
207
+ stream=true
 
208
  )
209
+
210
  for line in response.iter_lines():
211
+ if should_stop:
212
+ print("Stop clicked during streaming, breaking")
213
+ break
214
  if line:
215
  try:
216
  json_data = json.loads(line.decode('utf-8').split('data: ')[1])
217
+ chunk = json_data['response']
218
  full_response += chunk
219
+ except json.JSONDecodeError:
 
220
  continue
221
  print(f"Cloudflare API call {i+1} completed")
222
  except Exception as e:
223
  print(f"Error in generating response from Cloudflare: {str(e)}")
 
 
224
  else:
225
+ # Original Hugging Face API logic
226
  client = InferenceClient(model, token=huggingface_token)
227
 
228
  for i in range(num_calls):
229
  print(f"Starting Hugging Face API call {i+1}")
230
+ if should_stop:
231
+ print("Stop clicked, breaking loop")
232
+ break
233
  try:
234
  for message in client.chat_completion(
235
  messages=messages,
 
237
  temperature=temperature,
238
  stream=True,
239
  ):
240
+ if should_stop:
241
+ print("Stop clicked during streaming, breaking")
242
+ break
243
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
244
  chunk = message.choices[0].delta.content
245
  full_response += chunk
 
246
  print(f"Hugging Face API call {i+1} completed")
247
  except Exception as e:
248
  print(f"Error in generating response from Hugging Face: {str(e)}")
 
 
249
 
 
 
 
250
  # Clean up the response
251
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
252
  clean_response = clean_response.replace("Using the following context:", "").strip()
 
267
  final_response = '\n\n'.join(unique_paragraphs)
268
 
269
  print(f"Final clean response: {final_response[:100]}...")
270
+ return final_response
271
 
272
  def duckduckgo_search(query):
273
  with DDGS() as ddgs:
 
305
 
306
  return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
307
 
308
+ def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
309
  logging.info(f"User Query: {message}")
310
  logging.info(f"Model Used: {model}")
311
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
312
+
313
  logging.info(f"Selected Documents: {selected_docs}")
314
 
315
  try:
316
  if use_web_search:
317
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
318
  response = f"{main_content}\n\n{sources}"
319
+ first_line = response.split('\n')[0] if response else ''
320
+ # logging.info(f"Generated Response (first line): {first_line}")
321
  yield response
322
  else:
323
  embed = get_embeddings()
 
335
 
336
  context_str = "\n".join([doc.page_content for doc in relevant_docs])
337
  else:
338
+ context_str = "No documents available."
339
  yield "No documents available. Please upload PDF documents to answer questions."
340
  return
341
 
342
  if model == "@cf/meta/llama-3.1-8b-instruct":
343
  # Use Cloudflare API
344
+ for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
345
+ first_line = partial_response.split('\n')[0] if partial_response else ''
346
+ # logging.info(f"Generated Response (first line): {first_line}")
 
347
  yield partial_response
348
  else:
349
  # Use Hugging Face API
350
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
351
+ first_line = partial_response.split('\n')[0] if partial_response else ''
352
+ # logging.info(f"Generated Response (first line): {first_line}")
353
  yield partial_response
354
  except Exception as e:
355
  logging.error(f"Error with {model}: {str(e)}")
356
  if "microsoft/Phi-3-mini-4k-instruct" in model:
357
  logging.info("Falling back to Mistral model due to Phi-3 error")
358
  fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
359
+ yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
360
  else:
361
  yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
362
 
 
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
+ retriever = database.as_retriever()
464
+ logging.info(f"Retrieving relevant documents for query: {query}")
465
+ relevant_docs = retriever.get_relevant_documents(query)
466
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
467
+
468
+ # Filter relevant_docs based on selected documents
469
+ filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
470
+ logging.info(f"Number of filtered documents: {len(filtered_docs)}")
471
+
472
+ if not filtered_docs:
473
+ logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
474
+ yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
 
 
 
 
 
 
 
 
 
 
 
475
  return
476
 
477
+ for doc in filtered_docs:
478
  logging.info(f"Document source: {doc.metadata['source']}")
479
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
480
 
481
+ context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
  logging.info(f"Total context length: {len(context_str)}")
483
 
484
  if model == "@cf/meta/llama-3.1-8b-instruct":