kusa04 commited on
Commit
cd0bd46
·
verified ·
1 Parent(s): f62fbb7

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +56 -16
functions.py CHANGED
@@ -167,21 +167,61 @@ def extract_terms(cell):
167
  return []
168
 
169
 
170
- def remove_excluded_from_list(keywords_list, excluded_keywords):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  """
172
- Remove items from the keywords_list if they contain any of the excluded keywords.
173
- This function checks for partial matches in a case-insensitive manner.
 
174
  """
175
- if not isinstance(keywords_list, list):
176
- return keywords_list # If it's not a list, return as is
177
-
178
- filtered_list = []
179
- for item in keywords_list:
180
- # Check if item contains any excluded keyword (case-insensitive)
181
- if any(kw.lower() in item.lower() for kw in excluded_keywords):
182
- # Skip this item if it matches an excluded keyword
183
- continue
184
- else:
185
- filtered_list.append(item)
186
-
187
- return filtered_list
 
167
  return []
168
 
169
 
170
+ # def remove_excluded_from_list(keywords_list, excluded_keywords):
171
+ # """
172
+ # Remove items from the keywords_list if they contain any of the excluded keywords.
173
+ # This function checks for partial matches in a case-insensitive manner.
174
+ # """
175
+ # if not isinstance(keywords_list, list):
176
+ # return keywords_list # If it's not a list, return as is
177
+
178
+ # filtered_list = []
179
+ # for item in keywords_list:
180
+ # # Check if item contains any excluded keyword (case-insensitive)
181
+ # if any(kw.lower() in item.lower() for kw in excluded_keywords):
182
+ # # Skip this item if it matches an excluded keyword
183
+ # continue
184
+ # else:
185
+ # filtered_list.append(item)
186
+
187
+ # return filtered_list
188
+
189
+
190
+
191
+ def remove_excluded_from_text(text, excluded_keywords):
192
+ """
193
+ Remove occurrences of any excluded keyword from the text.
194
+ Matching is case-insensitive. Extra whitespace is cleaned.
195
+ """
196
+ if not isinstance(text, str):
197
+ return text
198
+ filtered_text = text
199
+ for kw in excluded_keywords:
200
+ # Create a regex pattern for the keyword (case-insensitive)
201
+ pattern = re.compile(re.escape(kw), re.IGNORECASE)
202
+ # Replace any occurrence of the keyword with an empty string
203
+ filtered_text = pattern.sub("", filtered_text)
204
+ # Remove extra spaces and strip the result
205
+ filtered_text = re.sub(r'\s+', ' ', filtered_text).strip()
206
+ return filtered_text
207
+
208
+
209
+
210
+ # Example of processing an extracted result (which may be a list of phrases)
211
+ def process_extracted_result(result, excluded_keywords):
212
  """
213
+ Process an extracted result by removing excluded keywords from each string.
214
+ If result is a list, process each element; if it's a string, process it directly.
215
+ Return a list of non-empty cleaned strings.
216
  """
217
+ cleaned_items = []
218
+ if isinstance(result, list):
219
+ for item in result:
220
+ cleaned_item = remove_excluded_from_text(item, excluded_keywords)
221
+ if cleaned_item: # Only add non-empty strings
222
+ cleaned_items.append(cleaned_item)
223
+ elif isinstance(result, str):
224
+ cleaned_item = remove_excluded_from_text(result, excluded_keywords)
225
+ if cleaned_item:
226
+ cleaned_items.append(cleaned_item)
227
+ return cleaned_items