Shreyas094 commited on
Commit
9f2051d
·
verified ·
1 Parent(s): 9e40ee6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -28
app.py CHANGED
@@ -460,7 +460,7 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
- retriever = database.as_retriever(search_kwargs={"k": 5})
464
  logging.info(f"Retrieving relevant documents for query: {query}")
465
  relevant_docs = retriever.get_relevant_documents(query)
466
  logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
@@ -476,40 +476,56 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
476
 
477
  for doc in filtered_docs:
478
  logging.info(f"Document source: {doc.metadata['source']}")
479
- logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
480
 
481
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
- logging.info(f"Total context length: {len(context_str)}")
 
 
 
 
 
 
 
 
 
 
483
 
484
- if model == "@cf/meta/llama-3.1-8b-instruct":
485
- logging.info("Using Cloudflare API")
486
- # Use Cloudflare API with the retrieved context
487
- for response in get_response_from_cloudflare(prompt="", context=context_str, query=query, num_calls=num_calls, temperature=temperature, search_type="pdf"):
488
- yield response
489
- else:
490
- logging.info("Using Hugging Face API")
491
- # Use Hugging Face API
492
- prompt = f"""Using the following context from the PDF documents:
 
 
 
 
493
  {context_str}
494
  Write a detailed and complete response that answers the following user question: '{query}'"""
495
 
496
- client = InferenceClient(model, token=huggingface_token)
497
 
498
- response = ""
499
- for i in range(num_calls):
500
- logging.info(f"API call {i+1}/{num_calls}")
501
- for message in client.chat_completion(
502
- messages=[{"role": "user", "content": prompt}],
503
- max_tokens=10000,
504
- temperature=temperature,
505
- stream=True,
506
- ):
507
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
508
- chunk = message.choices[0].delta.content
509
- response += chunk
510
- yield response # Yield partial response
511
 
512
- logging.info("Finished generating response")
 
 
513
 
514
  def vote(data: gr.LikeData):
515
  if data.liked:
 
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
+ retriever = database.as_retriever(search_kwargs={"k": 10}) # Increased k to 10
464
  logging.info(f"Retrieving relevant documents for query: {query}")
465
  relevant_docs = retriever.get_relevant_documents(query)
466
  logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
 
476
 
477
  for doc in filtered_docs:
478
  logging.info(f"Document source: {doc.metadata['source']}")
479
+ logging.info(f"Document content preview: {doc.page_content[:100]}...")
480
 
481
+ # Implement a sliding window approach for context
482
+ max_context_length = 4000 # Adjust based on your model's capacity
483
+ context_chunks = []
484
+ current_chunk = ""
485
+ for doc in filtered_docs:
486
+ if len(current_chunk) + len(doc.page_content) > max_context_length:
487
+ context_chunks.append(current_chunk)
488
+ current_chunk = doc.page_content
489
+ else:
490
+ current_chunk += "\n" + doc.page_content
491
+ if current_chunk:
492
+ context_chunks.append(current_chunk)
493
 
494
+ logging.info(f"Number of context chunks: {len(context_chunks)}")
495
+
496
+ for i, context_str in enumerate(context_chunks):
497
+ logging.info(f"Processing context chunk {i+1}/{len(context_chunks)}")
498
+ logging.info(f"Context chunk length: {len(context_str)}")
499
+
500
+ if model == "@cf/meta/llama-3.1-8b-instruct":
501
+ logging.info("Using Cloudflare API")
502
+ for response in get_response_from_cloudflare(prompt="", context=context_str, query=query, num_calls=num_calls, temperature=temperature, search_type="pdf"):
503
+ yield response
504
+ else:
505
+ logging.info("Using Hugging Face API")
506
+ prompt = f"""Using the following context from the PDF documents:
507
  {context_str}
508
  Write a detailed and complete response that answers the following user question: '{query}'"""
509
 
510
+ client = InferenceClient(model, token=huggingface_token)
511
 
512
+ response = ""
513
+ for j in range(num_calls):
514
+ logging.info(f"API call {j+1}/{num_calls}")
515
+ for message in client.chat_completion(
516
+ messages=[{"role": "user", "content": prompt}],
517
+ max_tokens=10000,
518
+ temperature=temperature,
519
+ stream=True,
520
+ ):
521
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
522
+ chunk = message.choices[0].delta.content
523
+ response += chunk
524
+ yield response # Yield partial response
525
 
526
+ logging.info("Finished generating response for this context chunk")
527
+
528
+ logging.info("Finished processing all context chunks")
529
 
530
  def vote(data: gr.LikeData):
531
  if data.liked: