Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +47 -9
functions.py
CHANGED
@@ -121,12 +121,50 @@ def preprocess_text(text):
|
|
121 |
return text
|
122 |
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
#
|
127 |
-
|
128 |
-
|
129 |
-
#
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
return text
|
122 |
|
123 |
|
124 |
+
|
125 |
+
def generate_variants(keyword):
|
126 |
+
# Split the keyword into individual words
|
127 |
+
words = keyword.split()
|
128 |
+
|
129 |
+
# Original keyword
|
130 |
+
original = keyword
|
131 |
+
# Convert the keyword to all uppercase letters
|
132 |
+
all_upper = keyword.upper()
|
133 |
+
# Convert the keyword to all lowercase letters
|
134 |
+
all_lower = keyword.lower()
|
135 |
+
# Concatenate words with each word capitalized (no spaces)
|
136 |
+
no_space_title = ''.join(word.capitalize() for word in words)
|
137 |
+
# Concatenate words in all uppercase (no spaces)
|
138 |
+
no_space_upper = ''.join(word.upper() for word in words)
|
139 |
+
# Concatenate words in all lowercase (no spaces)
|
140 |
+
no_space_lower = ''.join(word.lower() for word in words)
|
141 |
+
# Create a string with only the first letter of each word (e.g., MHW)
|
142 |
+
initials = ''.join(word[0].upper() for word in words)
|
143 |
+
|
144 |
+
# Return all variants as a list
|
145 |
+
return [original, all_upper, all_lower, no_space_title, no_space_upper, no_space_lower, initials]
|
146 |
+
|
147 |
+
|
148 |
+
|
149 |
+
# Function to check if a cell contains any excluded keywords
|
150 |
+
def contains_excluded_keywords(cell, excluded_keywords):
|
151 |
+
if isinstance(cell, np.ndarray):
|
152 |
+
# Join array elements into a single string and search for keywords
|
153 |
+
cell_str = ' '.join(map(str, cell))
|
154 |
+
return any(keyword in cell_str for keyword in excluded_keywords)
|
155 |
+
elif isinstance(cell, str):
|
156 |
+
return any(keyword in cell for keyword in excluded_keywords)
|
157 |
+
return
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
+
# Function to extract terms from a cell
|
162 |
+
def extract_terms(cell):
|
163 |
+
if isinstance(cell, np.ndarray):
|
164 |
+
# Convert each element to a string and strip whitespace
|
165 |
+
return [str(item).strip() for item in cell if str(item).strip()]
|
166 |
+
elif isinstance(cell, str):
|
167 |
+
# Split the string by commas and strip whitespace from each term
|
168 |
+
return [term.strip() for term in cell.split(',') if term.strip()]
|
169 |
+
else:
|
170 |
+
return []
|