Shreyas094 commited on
Commit
a322a99
·
verified ·
1 Parent(s): 2deeb1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -50
app.py CHANGED
@@ -477,63 +477,73 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
477
  embed = get_embeddings()
478
  if os.path.exists("faiss_database"):
479
  logging.info("Loading FAISS database")
480
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
481
- logging.info(f"FAISS database loaded with {len(database.index)} vectors")
 
 
 
 
 
482
  else:
483
  logging.warning("No FAISS database found")
484
  yield "No documents available. Please upload PDF documents to answer questions."
485
  return
486
 
487
- retriever = database.as_retriever()
488
- logging.info(f"Retrieving relevant documents for query: {query}")
489
- relevant_docs = retriever.get_relevant_documents(query)
490
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
491
-
492
- # Filter relevant_docs based on selected documents
493
- filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
494
- logging.info(f"Number of filtered documents: {len(filtered_docs)}")
495
-
496
- if not filtered_docs:
497
- logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
498
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
499
- return
500
-
501
- for i, doc in enumerate(filtered_docs):
502
- logging.info(f"Document {i+1} source: {doc.metadata['source']}")
503
- logging.info(f"Document {i+1} content preview: {doc.page_content[:100]}...")
504
-
505
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
506
- logging.info(f"Total context length: {len(context_str)}")
507
-
508
- if model == "@cf/meta/llama-3.1-8b-instruct":
509
- logging.info("Using Cloudflare API")
510
- # Use Cloudflare API with the retrieved context
511
- for response in get_response_from_cloudflare(prompt="", context=context_str, query=query, num_calls=num_calls, temperature=temperature, search_type="pdf"):
512
- yield response
513
- else:
514
- logging.info("Using Hugging Face API")
515
- # Use Hugging Face API
516
- prompt = f"""Using the following context from the PDF documents:
517
- {context_str}
518
- Write a detailed and complete response that answers the following user question: '{query}'"""
519
 
520
- client = InferenceClient(model, token=huggingface_token)
 
 
521
 
522
- response = ""
523
- for i in range(num_calls):
524
- logging.info(f"API call {i+1}/{num_calls}")
525
- for message in client.chat_completion(
526
- messages=[{"role": "user", "content": prompt}],
527
- max_tokens=10000,
528
- temperature=temperature,
529
- stream=True,
530
- ):
531
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
532
- chunk = message.choices[0].delta.content
533
- response += chunk
534
- yield response # Yield partial response
535
-
536
- logging.info("Finished generating response")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
538
  def vote(data: gr.LikeData):
539
  if data.liked:
 
477
  embed = get_embeddings()
478
  if os.path.exists("faiss_database"):
479
  logging.info("Loading FAISS database")
480
+ try:
481
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
482
+ logging.info(f"FAISS database loaded with {database.index.ntotal} vectors")
483
+ except Exception as e:
484
+ logging.error(f"Error loading FAISS database: {str(e)}")
485
+ yield "Error loading the document database. Please try uploading the documents again."
486
+ return
487
  else:
488
  logging.warning("No FAISS database found")
489
  yield "No documents available. Please upload PDF documents to answer questions."
490
  return
491
 
492
+ try:
493
+ retriever = database.as_retriever()
494
+ logging.info(f"Retrieving relevant documents for query: {query}")
495
+ relevant_docs = retriever.get_relevant_documents(query)
496
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
+ # Filter relevant_docs based on selected documents
499
+ filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
500
+ logging.info(f"Number of filtered documents: {len(filtered_docs)}")
501
 
502
+ if not filtered_docs:
503
+ logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
504
+ yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
505
+ return
506
+
507
+ for i, doc in enumerate(filtered_docs):
508
+ logging.info(f"Document {i+1} source: {doc.metadata['source']}")
509
+ logging.info(f"Document {i+1} content preview: {doc.page_content[:100]}...")
510
+
511
+ context_str = "\n".join([doc.page_content for doc in filtered_docs])
512
+ logging.info(f"Total context length: {len(context_str)}")
513
+
514
+ if model == "@cf/meta/llama-3.1-8b-instruct":
515
+ logging.info("Using Cloudflare API")
516
+ # Use Cloudflare API with the retrieved context
517
+ for response in get_response_from_cloudflare(prompt="", context=context_str, query=query, num_calls=num_calls, temperature=temperature, search_type="pdf"):
518
+ yield response
519
+ else:
520
+ logging.info("Using Hugging Face API")
521
+ # Use Hugging Face API
522
+ prompt = f"""Using the following context from the PDF documents:
523
+ {context_str}
524
+ Write a detailed and complete response that answers the following user question: '{query}'"""
525
+
526
+ client = InferenceClient(model, token=huggingface_token)
527
+
528
+ response = ""
529
+ for i in range(num_calls):
530
+ logging.info(f"API call {i+1}/{num_calls}")
531
+ for message in client.chat_completion(
532
+ messages=[{"role": "user", "content": prompt}],
533
+ max_tokens=10000,
534
+ temperature=temperature,
535
+ stream=True,
536
+ ):
537
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
538
+ chunk = message.choices[0].delta.content
539
+ response += chunk
540
+ yield response # Yield partial response
541
+
542
+ logging.info("Finished generating response")
543
+
544
+ except Exception as e:
545
+ logging.error(f"Error in get_response_from_pdf: {str(e)}")
546
+ yield f"An error occurred while processing your query: {str(e)}. Please try again or contact support."
547
 
548
  def vote(data: gr.LikeData):
549
  if data.liked: