asoria HF staff commited on
Commit
fd054e7
1 Parent(s): c79877a

Try to fix piclking error

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -110,6 +110,8 @@ reduce_umap_model = UMAP(
110
  random_state=42,
111
  )
112
 
 
 
113
 
114
  def get_parquet_urls(dataset, config, split):
115
  parquet_files = session.get(
@@ -138,6 +140,8 @@ def calculate_embeddings(docs):
138
 
139
  @spaces.GPU
140
  def fit_model(docs, embeddings):
 
 
141
  new_model = BERTopic(
142
  "english",
143
  # Sub-models
@@ -151,10 +155,13 @@ def fit_model(docs, embeddings):
151
  verbose=True,
152
  min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
153
  )
154
- logging.debug("Fitting new model")
155
  new_model.fit(docs, embeddings)
156
- logging.debug("End fitting new model")
157
- return new_model
 
 
 
158
 
159
 
160
  def generate_topics(dataset, config, split, column, nested_column):
@@ -180,12 +187,12 @@ def generate_topics(dataset, config, split, column, nested_column):
180
  )
181
 
182
  embeddings = calculate_embeddings(docs)
183
- new_model = fit_model(docs, embeddings)
184
 
185
  if base_model is None:
186
- base_model = new_model
187
  else:
188
- updated_model = BERTopic.merge_models([base_model, new_model])
189
  nr_new_topics = len(set(updated_model.topics_)) - len(
190
  set(base_model.topics_)
191
  )
 
110
  random_state=42,
111
  )
112
 
113
+ global_topic_model = None
114
+
115
 
116
  def get_parquet_urls(dataset, config, split):
117
  parquet_files = session.get(
 
140
 
141
  @spaces.GPU
142
  def fit_model(docs, embeddings):
143
+ global global_topic_model
144
+
145
  new_model = BERTopic(
146
  "english",
147
  # Sub-models
 
155
  verbose=True,
156
  min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
157
  )
158
+ logging.info("Fitting new model")
159
  new_model.fit(docs, embeddings)
160
+ logging.info("End fitting new model")
161
+
162
+ global_topic_model = new_model
163
+
164
+ logging.info("Global model updated")
165
 
166
 
167
  def generate_topics(dataset, config, split, column, nested_column):
 
187
  )
188
 
189
  embeddings = calculate_embeddings(docs)
190
+ fit_model(docs, embeddings)
191
 
192
  if base_model is None:
193
+ base_model = global_topic_model
194
  else:
195
+ updated_model = BERTopic.merge_models([base_model, global_topic_model])
196
  nr_new_topics = len(set(updated_model.topics_)) - len(
197
  set(base_model.topics_)
198
  )