asoria HF staff commited on
Commit
1f396c3
1 Parent(s): 937966f

Adding subtitle

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -46,7 +46,7 @@ assert (
46
  EXPORTS_REPOSITORY is not None
47
  ), "You need to set EXPORTS_REPOSITORY in your environment variables"
48
 
49
- MAX_ROWS = int(os.getenv("MAX_ROWS", "10_000"))
50
  CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "2_000"))
51
  DATASET_VIEWE_API_URL = "https://datasets-server.huggingface.co/"
52
  DATASETS_TOPICS_ORGANIZATION = os.getenv(
@@ -311,11 +311,18 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
311
  all_topics, _ = base_model.transform(all_docs)
312
  all_topics = np.array(all_topics)
313
 
 
 
 
 
 
 
314
  topic_plot = (
315
  base_model.visualize_document_datamap(
316
  docs=all_docs,
317
  reduced_embeddings=reduced_embeddings_array,
318
  title=dataset,
 
319
  width=800,
320
  height=700,
321
  arrowprops={
@@ -324,12 +331,11 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
324
  "linewidth": 0,
325
  "fc": "#33333377",
326
  },
327
- dynamic_label_size=USE_ARROW_STYLE,
328
- # label_wrap_width=12,
329
- # label_over_points=True,
330
- # dynamic_label_size=True,
331
- # max_font_size=36,
332
- # min_font_size=4,
333
  )
334
  if plot_type == "DataMapPlot"
335
  else base_model.visualize_documents(
@@ -386,6 +392,10 @@ def generate_topics(dataset, config, split, column, nested_column, plot_type):
386
  topic_names_array,
387
  hover_text=all_docs,
388
  title=dataset,
 
 
 
 
389
  enable_search=True,
390
  # TODO: Export data to .arrow and also serve it
391
  inline_data=True,
 
46
  EXPORTS_REPOSITORY is not None
47
  ), "You need to set EXPORTS_REPOSITORY in your environment variables"
48
 
49
+ MAX_ROWS = int(os.getenv("MAX_ROWS", "8_000"))
50
  CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "2_000"))
51
  DATASET_VIEWE_API_URL = "https://datasets-server.huggingface.co/"
52
  DATASETS_TOPICS_ORGANIZATION = os.getenv(
 
311
  all_topics, _ = base_model.transform(all_docs)
312
  all_topics = np.array(all_topics)
313
 
314
+ sub_title = (
315
+ f"Data map for the entire dataset ({limit} rows) using the column '{column}'"
316
+ if full_processing
317
+ else f"Data map for a sample of the dataset (first {limit} rows) using the column '{column}'"
318
+ )
319
+
320
  topic_plot = (
321
  base_model.visualize_document_datamap(
322
  docs=all_docs,
323
  reduced_embeddings=reduced_embeddings_array,
324
  title=dataset,
325
+ sub_title=sub_title,
326
  width=800,
327
  height=700,
328
  arrowprops={
 
331
  "linewidth": 0,
332
  "fc": "#33333377",
333
  },
334
+ dynamic_label_size=True,
335
+ label_wrap_width=12,
336
+ label_over_points=True,
337
+ max_font_size=36,
338
+ min_font_size=4,
 
339
  )
340
  if plot_type == "DataMapPlot"
341
  else base_model.visualize_documents(
 
392
  topic_names_array,
393
  hover_text=all_docs,
394
  title=dataset,
395
+ sub_title=sub_title.replace(
396
+ "dataset",
397
+ f"<a href='https://huggingface.co/datasets/{dataset}/viewer/{config}/{split}' target='_blank'>dataset</a>",
398
+ ),
399
  enable_search=True,
400
  # TODO: Export data to .arrow and also serve it
401
  inline_data=True,