Spaces:
Running
on
T4
Running
on
T4
Adding subtitle
Browse files
app.py
CHANGED
@@ -46,7 +46,7 @@ assert (
|
|
46 |
EXPORTS_REPOSITORY is not None
|
47 |
), "You need to set EXPORTS_REPOSITORY in your environment variables"
|
48 |
|
49 |
-
MAX_ROWS = int(os.getenv("MAX_ROWS", "
|
50 |
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "2_000"))
|
51 |
DATASET_VIEWE_API_URL = "https://datasets-server.huggingface.co/"
|
52 |
DATASETS_TOPICS_ORGANIZATION = os.getenv(
|
@@ -311,11 +311,18 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
311 |
all_topics, _ = base_model.transform(all_docs)
|
312 |
all_topics = np.array(all_topics)
|
313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
topic_plot = (
|
315 |
base_model.visualize_document_datamap(
|
316 |
docs=all_docs,
|
317 |
reduced_embeddings=reduced_embeddings_array,
|
318 |
title=dataset,
|
|
|
319 |
width=800,
|
320 |
height=700,
|
321 |
arrowprops={
|
@@ -324,12 +331,11 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
324 |
"linewidth": 0,
|
325 |
"fc": "#33333377",
|
326 |
},
|
327 |
-
dynamic_label_size=
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
# min_font_size=4,
|
333 |
)
|
334 |
if plot_type == "DataMapPlot"
|
335 |
else base_model.visualize_documents(
|
@@ -386,6 +392,10 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
386 |
topic_names_array,
|
387 |
hover_text=all_docs,
|
388 |
title=dataset,
|
|
|
|
|
|
|
|
|
389 |
enable_search=True,
|
390 |
# TODO: Export data to .arrow and also serve it
|
391 |
inline_data=True,
|
|
|
46 |
EXPORTS_REPOSITORY is not None
|
47 |
), "You need to set EXPORTS_REPOSITORY in your environment variables"
|
48 |
|
49 |
+
MAX_ROWS = int(os.getenv("MAX_ROWS", "8_000"))
|
50 |
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "2_000"))
|
51 |
DATASET_VIEWE_API_URL = "https://datasets-server.huggingface.co/"
|
52 |
DATASETS_TOPICS_ORGANIZATION = os.getenv(
|
|
|
311 |
all_topics, _ = base_model.transform(all_docs)
|
312 |
all_topics = np.array(all_topics)
|
313 |
|
314 |
+
sub_title = (
|
315 |
+
f"Data map for the entire dataset ({limit} rows) using the column '{column}'"
|
316 |
+
if full_processing
|
317 |
+
else f"Data map for a sample of the dataset (first {limit} rows) using the column '{column}'"
|
318 |
+
)
|
319 |
+
|
320 |
topic_plot = (
|
321 |
base_model.visualize_document_datamap(
|
322 |
docs=all_docs,
|
323 |
reduced_embeddings=reduced_embeddings_array,
|
324 |
title=dataset,
|
325 |
+
sub_title=sub_title,
|
326 |
width=800,
|
327 |
height=700,
|
328 |
arrowprops={
|
|
|
331 |
"linewidth": 0,
|
332 |
"fc": "#33333377",
|
333 |
},
|
334 |
+
dynamic_label_size=True,
|
335 |
+
label_wrap_width=12,
|
336 |
+
label_over_points=True,
|
337 |
+
max_font_size=36,
|
338 |
+
min_font_size=4,
|
|
|
339 |
)
|
340 |
if plot_type == "DataMapPlot"
|
341 |
else base_model.visualize_documents(
|
|
|
392 |
topic_names_array,
|
393 |
hover_text=all_docs,
|
394 |
title=dataset,
|
395 |
+
sub_title=sub_title.replace(
|
396 |
+
"dataset",
|
397 |
+
f"<a href='https://huggingface.co/datasets/{dataset}/viewer/{config}/{split}' target='_blank'>dataset</a>",
|
398 |
+
),
|
399 |
enable_search=True,
|
400 |
# TODO: Export data to .arrow and also serve it
|
401 |
inline_data=True,
|