Spaces:
Sleeping
Sleeping
Enable Zero
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ from sentence_transformers import SentenceTransformer
|
|
15 |
from dotenv import load_dotenv
|
16 |
import os
|
17 |
|
18 |
-
|
19 |
import gradio as gr
|
20 |
|
21 |
|
@@ -81,20 +81,18 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
81 |
return df[column].tolist()
|
82 |
|
83 |
|
84 |
-
|
85 |
def calculate_embeddings(docs):
|
86 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
87 |
|
88 |
|
89 |
-
# Adjust n_neighbors and n_components based on dataset size
|
90 |
def calculate_n_neighbors_and_components(n_rows):
|
91 |
-
# Ensure n_neighbors is proportional to the dataset size, with reasonable limits
|
92 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
93 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
94 |
return n_neighbors, n_components
|
95 |
|
96 |
|
97 |
-
|
98 |
def fit_model(docs, embeddings, n_neighbors, n_components):
|
99 |
global global_topic_model
|
100 |
|
|
|
15 |
from dotenv import load_dotenv
|
16 |
import os
|
17 |
|
18 |
+
import spaces
|
19 |
import gradio as gr
|
20 |
|
21 |
|
|
|
81 |
return df[column].tolist()
|
82 |
|
83 |
|
84 |
+
@spaces.GPU
|
85 |
def calculate_embeddings(docs):
|
86 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
87 |
|
88 |
|
|
|
89 |
def calculate_n_neighbors_and_components(n_rows):
|
|
|
90 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
91 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
92 |
return n_neighbors, n_components
|
93 |
|
94 |
|
95 |
+
@spaces.GPU
|
96 |
def fit_model(docs, embeddings, n_neighbors, n_components):
|
97 |
global global_topic_model
|
98 |
|