Spaces:

Cachoups
/

FinanceReport

Sleeping

Cachoups commited on Sep 13, 2024

Commit

524e417

verified ·

1 Parent(s): 9c2be66

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -155,7 +155,7 @@ def find_sentences_with_keywords(text, keywords):
     # Split text into sentences using regular expression to match sentence-ending punctuation
     sentences = re.split(r'(?<=[.!?])\s+', text)
-    matched_sentences = []
     # For each keyword, find sentences that contain the keyword as a whole word
     for keyword in keywords:
@@ -163,9 +163,9 @@ def find_sentences_with_keywords(text, keywords):
         for sentence in sentences:
             if keyword_pattern.search(sentence):
-                matched_sentences.append(sentence)
-    return matched_sentences
 # Main function to process both PDFs based on the Excel file names and the sheet name

     # Split text into sentences using regular expression to match sentence-ending punctuation
     sentences = re.split(r'(?<=[.!?])\s+', text)
+    matched_sentences = set()  # Use a set to store unique sentences
     # For each keyword, find sentences that contain the keyword as a whole word
     for keyword in keywords:
         for sentence in sentences:
             if keyword_pattern.search(sentence):
+                matched_sentences.add(sentence)  # Add to set to ensure uniqueness
+    return list(matched_sentences)  # Convert set back to list for consistent output
 # Main function to process both PDFs based on the Excel file names and the sheet name