Cachoups commited on
Commit
524e417
·
verified ·
1 Parent(s): 9c2be66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -155,7 +155,7 @@ def find_sentences_with_keywords(text, keywords):
155
  # Split text into sentences using regular expression to match sentence-ending punctuation
156
  sentences = re.split(r'(?<=[.!?])\s+', text)
157
 
158
- matched_sentences = []
159
 
160
  # For each keyword, find sentences that contain the keyword as a whole word
161
  for keyword in keywords:
@@ -163,9 +163,9 @@ def find_sentences_with_keywords(text, keywords):
163
 
164
  for sentence in sentences:
165
  if keyword_pattern.search(sentence):
166
- matched_sentences.append(sentence)
167
 
168
- return matched_sentences
169
 
170
 
171
  # Main function to process both PDFs based on the Excel file names and the sheet name
 
155
  # Split text into sentences using regular expression to match sentence-ending punctuation
156
  sentences = re.split(r'(?<=[.!?])\s+', text)
157
 
158
+ matched_sentences = set() # Use a set to store unique sentences
159
 
160
  # For each keyword, find sentences that contain the keyword as a whole word
161
  for keyword in keywords:
 
163
 
164
  for sentence in sentences:
165
  if keyword_pattern.search(sentence):
166
+ matched_sentences.add(sentence) # Add to set to ensure uniqueness
167
 
168
+ return list(matched_sentences) # Convert set back to list for consistent output
169
 
170
 
171
  # Main function to process both PDFs based on the Excel file names and the sheet name