Spaces:
Sleeping
Sleeping
Try to fix piclking error
Browse files
app.py
CHANGED
@@ -110,6 +110,8 @@ reduce_umap_model = UMAP(
|
|
110 |
random_state=42,
|
111 |
)
|
112 |
|
|
|
|
|
113 |
|
114 |
def get_parquet_urls(dataset, config, split):
|
115 |
parquet_files = session.get(
|
@@ -138,6 +140,8 @@ def calculate_embeddings(docs):
|
|
138 |
|
139 |
@spaces.GPU
|
140 |
def fit_model(docs, embeddings):
|
|
|
|
|
141 |
new_model = BERTopic(
|
142 |
"english",
|
143 |
# Sub-models
|
@@ -151,10 +155,13 @@ def fit_model(docs, embeddings):
|
|
151 |
verbose=True,
|
152 |
min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
|
153 |
)
|
154 |
-
logging.
|
155 |
new_model.fit(docs, embeddings)
|
156 |
-
logging.
|
157 |
-
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def generate_topics(dataset, config, split, column, nested_column):
|
@@ -180,12 +187,12 @@ def generate_topics(dataset, config, split, column, nested_column):
|
|
180 |
)
|
181 |
|
182 |
embeddings = calculate_embeddings(docs)
|
183 |
-
|
184 |
|
185 |
if base_model is None:
|
186 |
-
base_model =
|
187 |
else:
|
188 |
-
updated_model = BERTopic.merge_models([base_model,
|
189 |
nr_new_topics = len(set(updated_model.topics_)) - len(
|
190 |
set(base_model.topics_)
|
191 |
)
|
|
|
110 |
random_state=42,
|
111 |
)
|
112 |
|
113 |
+
global_topic_model = None
|
114 |
+
|
115 |
|
116 |
def get_parquet_urls(dataset, config, split):
|
117 |
parquet_files = session.get(
|
|
|
140 |
|
141 |
@spaces.GPU
|
142 |
def fit_model(docs, embeddings):
|
143 |
+
global global_topic_model
|
144 |
+
|
145 |
new_model = BERTopic(
|
146 |
"english",
|
147 |
# Sub-models
|
|
|
155 |
verbose=True,
|
156 |
min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
|
157 |
)
|
158 |
+
logging.info("Fitting new model")
|
159 |
new_model.fit(docs, embeddings)
|
160 |
+
logging.info("End fitting new model")
|
161 |
+
|
162 |
+
global_topic_model = new_model
|
163 |
+
|
164 |
+
logging.info("Global model updated")
|
165 |
|
166 |
|
167 |
def generate_topics(dataset, config, split, column, nested_column):
|
|
|
187 |
)
|
188 |
|
189 |
embeddings = calculate_embeddings(docs)
|
190 |
+
fit_model(docs, embeddings)
|
191 |
|
192 |
if base_model is None:
|
193 |
+
base_model = global_topic_model
|
194 |
else:
|
195 |
+
updated_model = BERTopic.merge_models([base_model, global_topic_model])
|
196 |
nr_new_topics = len(set(updated_model.topics_)) - len(
|
197 |
set(base_model.topics_)
|
198 |
)
|