asoria HF staff commited on
Commit
fc9ec9d
1 Parent(s): 9b9b3ce

Enable spaces

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # import spaces
2
  import requests
3
  import logging
4
  import duckdb
@@ -57,7 +57,7 @@ model = AutoModelForCausalLM.from_pretrained(
57
  offload_folder="offload", # Offloading part of the model to CPU to save GPU memory
58
  )
59
 
60
- # Enable gradient checkpointing for memory efficiency during backprop
61
  model.gradient_checkpointing_enable()
62
 
63
  generator = pipeline(
@@ -122,13 +122,13 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
122
  return df[column].tolist()
123
 
124
 
125
- # @spaces.GPU
126
  # TODO: Modify batch size to reduce memory consumption during embedding calculation, which value is better?
127
  def calculate_embeddings(docs):
128
  return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
129
 
130
 
131
- # @spaces.GPU
132
  def fit_model(base_model, docs, embeddings):
133
  new_model = BERTopic(
134
  "english",
@@ -195,12 +195,11 @@ def generate_topics(dataset, config, split, column, nested_column):
195
  all_docs.extend(docs)
196
 
197
  topics_info = base_model.get_topic_info()
198
- # topic_plot = base_model.visualize_documents(
199
- # all_docs,
200
- # reduced_embeddings=np.vstack(reduced_embeddings_list),
201
- # custom_labels=True,
202
- # )
203
- topic_plot = base_model.visualize_barchart()
204
 
205
  logging.info(f"Topics: {repr_model_topics}")
206
 
 
1
+ import spaces
2
  import requests
3
  import logging
4
  import duckdb
 
57
  offload_folder="offload", # Offloading part of the model to CPU to save GPU memory
58
  )
59
 
60
+ # Enable gradient checkpointing for memory efficiency during backprop?
61
  model.gradient_checkpointing_enable()
62
 
63
  generator = pipeline(
 
122
  return df[column].tolist()
123
 
124
 
125
+ @spaces.GPU
126
  # TODO: Modify batch size to reduce memory consumption during embedding calculation, which value is better?
127
  def calculate_embeddings(docs):
128
  return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
129
 
130
 
131
+ @spaces.GPU
132
  def fit_model(base_model, docs, embeddings):
133
  new_model = BERTopic(
134
  "english",
 
195
  all_docs.extend(docs)
196
 
197
  topics_info = base_model.get_topic_info()
198
+ topic_plot = base_model.visualize_documents(
199
+ all_docs,
200
+ reduced_embeddings=np.vstack(reduced_embeddings_list),
201
+ custom_labels=True,
202
+ )
 
203
 
204
  logging.info(f"Topics: {repr_model_topics}")
205