Pijush2023 commited on
Commit
7941ba5
·
verified ·
1 Parent(s): b632e23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -3
app.py CHANGED
@@ -486,6 +486,52 @@ import re
486
 
487
  # return final_response
488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  def clean_response(response_text):
490
  # Remove system and user tags
491
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -493,10 +539,10 @@ def clean_response(response_text):
493
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
494
 
495
  # Extract the document name and page number
496
- document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
497
  if document_match:
498
- document_name = document_match.group(1).split('/')[-1] # Get the document name
499
- page_number = document_match.group(2) # Get the page number
500
  else:
501
  document_name = "Unknown"
502
  page_number = "Unknown"
 
486
 
487
  # return final_response
488
 
489
+ # def clean_response(response_text):
490
+ # # Remove system and user tags
491
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
492
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
493
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
494
+
495
+ # # Extract the document name and page number
496
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
497
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
498
+ # if document_match:
499
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
500
+ # page_number = document_match.group(2) # Get the page number
501
+ # else:
502
+ # document_name = "Unknown"
503
+ # page_number = "Unknown"
504
+
505
+ # # Remove the entire 'Document(metadata=...' and any mention of it from the response
506
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
507
+
508
+ # # Remove any mention of "Document:" in the response
509
+ # response_text = re.sub(r'- Document:.*', '', response_text)
510
+
511
+ # # Remove any unwanted escape characters like \u and \u00
512
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
513
+
514
+ # # Ensure proper spacing between words and dates
515
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
516
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
517
+
518
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
519
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
520
+
521
+ # # Clean up the text by removing extra whitespace
522
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
523
+
524
+ # # Format the final response with bullet points
525
+ # final_response = f"""
526
+ # Sure! Here is the response for your Query:
527
+ # • Document name - {document_name}
528
+ # • Page No - {page_number}
529
+ # • Response - {cleaned_response}
530
+ # """
531
+
532
+ # return final_response
533
+
534
+
535
  def clean_response(response_text):
536
  # Remove system and user tags
537
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
539
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
540
 
541
  # Extract the document name and page number
542
+ document_match = re.search(r"(\d{6,})_(\d{12,})_V\d+\.pdf,page:(\d+)", response_text)
543
  if document_match:
544
+ document_name = document_match.group(0).split(',')[0] # Get the document name (before ',page')
545
+ page_number = document_match.group(3) # Get the page number
546
  else:
547
  document_name = "Unknown"
548
  page_number = "Unknown"