Spaces:
Paused
Paused
Shreyas094
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -104,11 +104,6 @@ def update_vectors(files, parser):
|
|
104 |
logging.warning(f"No chunks loaded from {file.name}")
|
105 |
continue
|
106 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
107 |
-
|
108 |
-
# Add source to metadata
|
109 |
-
for chunk in data:
|
110 |
-
chunk.metadata["source"] = file.name
|
111 |
-
|
112 |
all_data.extend(data)
|
113 |
total_chunks += len(data)
|
114 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
@@ -184,52 +179,57 @@ def delete_documents(selected_docs):
|
|
184 |
|
185 |
return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
|
186 |
|
187 |
-
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2):
|
188 |
-
print(f"Starting generate_chunked_response with
|
189 |
full_response = ""
|
190 |
messages = [{"role": "user", "content": prompt}]
|
191 |
|
192 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
193 |
-
# Cloudflare API
|
194 |
-
headers = {
|
195 |
-
"Authorization": f"Bearer {API_TOKEN}",
|
196 |
-
"Content-Type": "application/json"
|
197 |
-
}
|
198 |
-
|
199 |
for i in range(num_calls):
|
200 |
print(f"Starting Cloudflare API call {i+1}")
|
|
|
|
|
|
|
201 |
try:
|
202 |
response = requests.post(
|
203 |
f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
|
|
|
204 |
json={
|
205 |
-
"
|
206 |
-
"
|
|
|
|
|
|
|
207 |
"max_tokens": max_tokens,
|
208 |
"temperature": temperature
|
209 |
},
|
210 |
-
|
211 |
-
stream=True
|
212 |
)
|
|
|
213 |
for line in response.iter_lines():
|
|
|
|
|
|
|
214 |
if line:
|
215 |
try:
|
216 |
json_data = json.loads(line.decode('utf-8').split('data: ')[1])
|
217 |
-
chunk = json_data
|
218 |
full_response += chunk
|
219 |
-
|
220 |
-
except (json.JSONDecodeError, IndexError):
|
221 |
continue
|
222 |
print(f"Cloudflare API call {i+1} completed")
|
223 |
except Exception as e:
|
224 |
print(f"Error in generating response from Cloudflare: {str(e)}")
|
225 |
-
if i == num_calls - 1: # If this is the last attempt
|
226 |
-
yield full_response # Yield whatever response we have so far
|
227 |
else:
|
228 |
-
# Hugging Face API logic
|
229 |
client = InferenceClient(model, token=huggingface_token)
|
230 |
|
231 |
for i in range(num_calls):
|
232 |
print(f"Starting Hugging Face API call {i+1}")
|
|
|
|
|
|
|
233 |
try:
|
234 |
for message in client.chat_completion(
|
235 |
messages=messages,
|
@@ -237,19 +237,16 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
|
|
237 |
temperature=temperature,
|
238 |
stream=True,
|
239 |
):
|
|
|
|
|
|
|
240 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
241 |
chunk = message.choices[0].delta.content
|
242 |
full_response += chunk
|
243 |
-
yield full_response
|
244 |
print(f"Hugging Face API call {i+1} completed")
|
245 |
except Exception as e:
|
246 |
print(f"Error in generating response from Hugging Face: {str(e)}")
|
247 |
-
if i == num_calls - 1: # If this is the last attempt
|
248 |
-
yield full_response # Yield whatever response we have so far
|
249 |
|
250 |
-
if not full_response:
|
251 |
-
yield "I apologize, but I couldn't generate a response at this time. Please try again later."
|
252 |
-
|
253 |
# Clean up the response
|
254 |
clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
|
255 |
clean_response = clean_response.replace("Using the following context:", "").strip()
|
@@ -270,7 +267,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
|
|
270 |
final_response = '\n\n'.join(unique_paragraphs)
|
271 |
|
272 |
print(f"Final clean response: {final_response[:100]}...")
|
273 |
-
|
274 |
|
275 |
def duckduckgo_search(query):
|
276 |
with DDGS() as ddgs:
|
@@ -308,16 +305,19 @@ def retry_last_response(history, use_web_search, model, temperature, num_calls):
|
|
308 |
|
309 |
return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
|
310 |
|
311 |
-
def respond(message, history,
|
312 |
logging.info(f"User Query: {message}")
|
313 |
logging.info(f"Model Used: {model}")
|
314 |
logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
|
|
|
315 |
logging.info(f"Selected Documents: {selected_docs}")
|
316 |
|
317 |
try:
|
318 |
if use_web_search:
|
319 |
for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
|
320 |
response = f"{main_content}\n\n{sources}"
|
|
|
|
|
321 |
yield response
|
322 |
else:
|
323 |
embed = get_embeddings()
|
@@ -335,26 +335,28 @@ def respond(message, history, use_web_search, model, temperature, num_calls, sel
|
|
335 |
|
336 |
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
337 |
else:
|
|
|
338 |
yield "No documents available. Please upload PDF documents to answer questions."
|
339 |
return
|
340 |
|
341 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
342 |
# Use Cloudflare API
|
343 |
-
prompt
|
344 |
-
|
345 |
-
|
346 |
-
for partial_response in generate_chunked_response(prompt, model, num_calls=num_calls, temperature=temperature):
|
347 |
yield partial_response
|
348 |
else:
|
349 |
# Use Hugging Face API
|
350 |
for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
|
|
|
|
|
351 |
yield partial_response
|
352 |
except Exception as e:
|
353 |
logging.error(f"Error with {model}: {str(e)}")
|
354 |
if "microsoft/Phi-3-mini-4k-instruct" in model:
|
355 |
logging.info("Falling back to Mistral model due to Phi-3 error")
|
356 |
fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
|
357 |
-
yield from respond(message, history,
|
358 |
else:
|
359 |
yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
|
360 |
|
@@ -458,36 +460,25 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
458 |
yield "No documents available. Please upload PDF documents to answer questions."
|
459 |
return
|
460 |
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
logging.info(f"Number of documents
|
465 |
-
|
466 |
-
#
|
467 |
-
|
468 |
-
logging.info(f"Number of
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
yield "No documents found for the selected sources. Please check your document selection."
|
474 |
-
return
|
475 |
-
|
476 |
-
# Perform similarity search on selected documents
|
477 |
-
relevant_docs = database.similarity_search(query, k=5, filter=lambda doc: doc.metadata["source"] in selected_docs)
|
478 |
-
logging.info(f"Number of relevant documents after similarity search: {len(relevant_docs)}")
|
479 |
-
logging.info(f"Relevant documents: {[doc.metadata['source'] for doc in relevant_docs]}")
|
480 |
-
|
481 |
-
if not relevant_docs:
|
482 |
-
logging.warning("No relevant documents found after similarity search")
|
483 |
-
yield "No relevant information found in the selected documents. Please try rephrasing your query."
|
484 |
return
|
485 |
|
486 |
-
for doc in
|
487 |
logging.info(f"Document source: {doc.metadata['source']}")
|
488 |
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
489 |
|
490 |
-
context_str = "\n".join([doc.page_content for doc in
|
491 |
logging.info(f"Total context length: {len(context_str)}")
|
492 |
|
493 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
|
|
104 |
logging.warning(f"No chunks loaded from {file.name}")
|
105 |
continue
|
106 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
|
|
|
|
|
|
|
|
|
|
107 |
all_data.extend(data)
|
108 |
total_chunks += len(data)
|
109 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
|
|
179 |
|
180 |
return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
|
181 |
|
182 |
+
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
183 |
+
print(f"Starting generate_chunked_response with {num_calls} calls")
|
184 |
full_response = ""
|
185 |
messages = [{"role": "user", "content": prompt}]
|
186 |
|
187 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
188 |
+
# Cloudflare API
|
|
|
|
|
|
|
|
|
|
|
189 |
for i in range(num_calls):
|
190 |
print(f"Starting Cloudflare API call {i+1}")
|
191 |
+
if should_stop:
|
192 |
+
print("Stop clicked, breaking loop")
|
193 |
+
break
|
194 |
try:
|
195 |
response = requests.post(
|
196 |
f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
|
197 |
+
headers={"Authorization": f"Bearer {API_TOKEN}"},
|
198 |
json={
|
199 |
+
"stream": true,
|
200 |
+
"messages": [
|
201 |
+
{"role": "system", "content": "You are a friendly assistant"},
|
202 |
+
{"role": "user", "content": prompt}
|
203 |
+
],
|
204 |
"max_tokens": max_tokens,
|
205 |
"temperature": temperature
|
206 |
},
|
207 |
+
stream=true
|
|
|
208 |
)
|
209 |
+
|
210 |
for line in response.iter_lines():
|
211 |
+
if should_stop:
|
212 |
+
print("Stop clicked during streaming, breaking")
|
213 |
+
break
|
214 |
if line:
|
215 |
try:
|
216 |
json_data = json.loads(line.decode('utf-8').split('data: ')[1])
|
217 |
+
chunk = json_data['response']
|
218 |
full_response += chunk
|
219 |
+
except json.JSONDecodeError:
|
|
|
220 |
continue
|
221 |
print(f"Cloudflare API call {i+1} completed")
|
222 |
except Exception as e:
|
223 |
print(f"Error in generating response from Cloudflare: {str(e)}")
|
|
|
|
|
224 |
else:
|
225 |
+
# Original Hugging Face API logic
|
226 |
client = InferenceClient(model, token=huggingface_token)
|
227 |
|
228 |
for i in range(num_calls):
|
229 |
print(f"Starting Hugging Face API call {i+1}")
|
230 |
+
if should_stop:
|
231 |
+
print("Stop clicked, breaking loop")
|
232 |
+
break
|
233 |
try:
|
234 |
for message in client.chat_completion(
|
235 |
messages=messages,
|
|
|
237 |
temperature=temperature,
|
238 |
stream=True,
|
239 |
):
|
240 |
+
if should_stop:
|
241 |
+
print("Stop clicked during streaming, breaking")
|
242 |
+
break
|
243 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
244 |
chunk = message.choices[0].delta.content
|
245 |
full_response += chunk
|
|
|
246 |
print(f"Hugging Face API call {i+1} completed")
|
247 |
except Exception as e:
|
248 |
print(f"Error in generating response from Hugging Face: {str(e)}")
|
|
|
|
|
249 |
|
|
|
|
|
|
|
250 |
# Clean up the response
|
251 |
clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
|
252 |
clean_response = clean_response.replace("Using the following context:", "").strip()
|
|
|
267 |
final_response = '\n\n'.join(unique_paragraphs)
|
268 |
|
269 |
print(f"Final clean response: {final_response[:100]}...")
|
270 |
+
return final_response
|
271 |
|
272 |
def duckduckgo_search(query):
|
273 |
with DDGS() as ddgs:
|
|
|
305 |
|
306 |
return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
|
307 |
|
308 |
+
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
|
309 |
logging.info(f"User Query: {message}")
|
310 |
logging.info(f"Model Used: {model}")
|
311 |
logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
|
312 |
+
|
313 |
logging.info(f"Selected Documents: {selected_docs}")
|
314 |
|
315 |
try:
|
316 |
if use_web_search:
|
317 |
for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
|
318 |
response = f"{main_content}\n\n{sources}"
|
319 |
+
first_line = response.split('\n')[0] if response else ''
|
320 |
+
# logging.info(f"Generated Response (first line): {first_line}")
|
321 |
yield response
|
322 |
else:
|
323 |
embed = get_embeddings()
|
|
|
335 |
|
336 |
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
337 |
else:
|
338 |
+
context_str = "No documents available."
|
339 |
yield "No documents available. Please upload PDF documents to answer questions."
|
340 |
return
|
341 |
|
342 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
343 |
# Use Cloudflare API
|
344 |
+
for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
|
345 |
+
first_line = partial_response.split('\n')[0] if partial_response else ''
|
346 |
+
# logging.info(f"Generated Response (first line): {first_line}")
|
|
|
347 |
yield partial_response
|
348 |
else:
|
349 |
# Use Hugging Face API
|
350 |
for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
|
351 |
+
first_line = partial_response.split('\n')[0] if partial_response else ''
|
352 |
+
# logging.info(f"Generated Response (first line): {first_line}")
|
353 |
yield partial_response
|
354 |
except Exception as e:
|
355 |
logging.error(f"Error with {model}: {str(e)}")
|
356 |
if "microsoft/Phi-3-mini-4k-instruct" in model:
|
357 |
logging.info("Falling back to Mistral model due to Phi-3 error")
|
358 |
fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
|
359 |
+
yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
|
360 |
else:
|
361 |
yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
|
362 |
|
|
|
460 |
yield "No documents available. Please upload PDF documents to answer questions."
|
461 |
return
|
462 |
|
463 |
+
retriever = database.as_retriever()
|
464 |
+
logging.info(f"Retrieving relevant documents for query: {query}")
|
465 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
466 |
+
logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
|
467 |
+
|
468 |
+
# Filter relevant_docs based on selected documents
|
469 |
+
filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
|
470 |
+
logging.info(f"Number of filtered documents: {len(filtered_docs)}")
|
471 |
+
|
472 |
+
if not filtered_docs:
|
473 |
+
logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
|
474 |
+
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
return
|
476 |
|
477 |
+
for doc in filtered_docs:
|
478 |
logging.info(f"Document source: {doc.metadata['source']}")
|
479 |
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
480 |
|
481 |
+
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
482 |
logging.info(f"Total context length: {len(context_str)}")
|
483 |
|
484 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|