Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +56 -16
functions.py
CHANGED
@@ -167,21 +167,61 @@ def extract_terms(cell):
|
|
167 |
return []
|
168 |
|
169 |
|
170 |
-
def remove_excluded_from_list(keywords_list, excluded_keywords):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
"""
|
172 |
-
|
173 |
-
|
|
|
174 |
"""
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
return filtered_list
|
|
|
167 |
return []
|
168 |
|
169 |
|
170 |
+
# def remove_excluded_from_list(keywords_list, excluded_keywords):
|
171 |
+
# """
|
172 |
+
# Remove items from the keywords_list if they contain any of the excluded keywords.
|
173 |
+
# This function checks for partial matches in a case-insensitive manner.
|
174 |
+
# """
|
175 |
+
# if not isinstance(keywords_list, list):
|
176 |
+
# return keywords_list # If it's not a list, return as is
|
177 |
+
|
178 |
+
# filtered_list = []
|
179 |
+
# for item in keywords_list:
|
180 |
+
# # Check if item contains any excluded keyword (case-insensitive)
|
181 |
+
# if any(kw.lower() in item.lower() for kw in excluded_keywords):
|
182 |
+
# # Skip this item if it matches an excluded keyword
|
183 |
+
# continue
|
184 |
+
# else:
|
185 |
+
# filtered_list.append(item)
|
186 |
+
|
187 |
+
# return filtered_list
|
188 |
+
|
189 |
+
|
190 |
+
|
191 |
+
def remove_excluded_from_text(text, excluded_keywords):
|
192 |
+
"""
|
193 |
+
Remove occurrences of any excluded keyword from the text.
|
194 |
+
Matching is case-insensitive. Extra whitespace is cleaned.
|
195 |
+
"""
|
196 |
+
if not isinstance(text, str):
|
197 |
+
return text
|
198 |
+
filtered_text = text
|
199 |
+
for kw in excluded_keywords:
|
200 |
+
# Create a regex pattern for the keyword (case-insensitive)
|
201 |
+
pattern = re.compile(re.escape(kw), re.IGNORECASE)
|
202 |
+
# Replace any occurrence of the keyword with an empty string
|
203 |
+
filtered_text = pattern.sub("", filtered_text)
|
204 |
+
# Remove extra spaces and strip the result
|
205 |
+
filtered_text = re.sub(r'\s+', ' ', filtered_text).strip()
|
206 |
+
return filtered_text
|
207 |
+
|
208 |
+
|
209 |
+
|
210 |
+
# Example of processing an extracted result (which may be a list of phrases)
|
211 |
+
def process_extracted_result(result, excluded_keywords):
|
212 |
"""
|
213 |
+
Process an extracted result by removing excluded keywords from each string.
|
214 |
+
If result is a list, process each element; if it's a string, process it directly.
|
215 |
+
Return a list of non-empty cleaned strings.
|
216 |
"""
|
217 |
+
cleaned_items = []
|
218 |
+
if isinstance(result, list):
|
219 |
+
for item in result:
|
220 |
+
cleaned_item = remove_excluded_from_text(item, excluded_keywords)
|
221 |
+
if cleaned_item: # Only add non-empty strings
|
222 |
+
cleaned_items.append(cleaned_item)
|
223 |
+
elif isinstance(result, str):
|
224 |
+
cleaned_item = remove_excluded_from_text(result, excluded_keywords)
|
225 |
+
if cleaned_item:
|
226 |
+
cleaned_items.append(cleaned_item)
|
227 |
+
return cleaned_items
|
|
|
|