Spaces:
Sleeping
Sleeping
Enable spaces
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
import requests
|
3 |
import logging
|
4 |
import duckdb
|
@@ -57,7 +57,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
57 |
offload_folder="offload", # Offloading part of the model to CPU to save GPU memory
|
58 |
)
|
59 |
|
60 |
-
# Enable gradient checkpointing for memory efficiency during backprop
|
61 |
model.gradient_checkpointing_enable()
|
62 |
|
63 |
generator = pipeline(
|
@@ -122,13 +122,13 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
122 |
return df[column].tolist()
|
123 |
|
124 |
|
125 |
-
|
126 |
# TODO: Modify batch size to reduce memory consumption during embedding calculation, which value is better?
|
127 |
def calculate_embeddings(docs):
|
128 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
129 |
|
130 |
|
131 |
-
|
132 |
def fit_model(base_model, docs, embeddings):
|
133 |
new_model = BERTopic(
|
134 |
"english",
|
@@ -195,12 +195,11 @@ def generate_topics(dataset, config, split, column, nested_column):
|
|
195 |
all_docs.extend(docs)
|
196 |
|
197 |
topics_info = base_model.get_topic_info()
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
topic_plot = base_model.visualize_barchart()
|
204 |
|
205 |
logging.info(f"Topics: {repr_model_topics}")
|
206 |
|
|
|
1 |
+
import spaces
|
2 |
import requests
|
3 |
import logging
|
4 |
import duckdb
|
|
|
57 |
offload_folder="offload", # Offloading part of the model to CPU to save GPU memory
|
58 |
)
|
59 |
|
60 |
+
# Enable gradient checkpointing for memory efficiency during backprop?
|
61 |
model.gradient_checkpointing_enable()
|
62 |
|
63 |
generator = pipeline(
|
|
|
122 |
return df[column].tolist()
|
123 |
|
124 |
|
125 |
+
@spaces.GPU
|
126 |
# TODO: Modify batch size to reduce memory consumption during embedding calculation, which value is better?
|
127 |
def calculate_embeddings(docs):
|
128 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
129 |
|
130 |
|
131 |
+
@spaces.GPU
|
132 |
def fit_model(base_model, docs, embeddings):
|
133 |
new_model = BERTopic(
|
134 |
"english",
|
|
|
195 |
all_docs.extend(docs)
|
196 |
|
197 |
topics_info = base_model.get_topic_info()
|
198 |
+
topic_plot = base_model.visualize_documents(
|
199 |
+
all_docs,
|
200 |
+
reduced_embeddings=np.vstack(reduced_embeddings_list),
|
201 |
+
custom_labels=True,
|
202 |
+
)
|
|
|
203 |
|
204 |
logging.info(f"Topics: {repr_model_topics}")
|
205 |
|