gsaivinay commited on
Commit
bbaf2d3
Β·
1 Parent(s): 3ddb851

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -10
app.py CHANGED
@@ -99,7 +99,6 @@ def get_leaderboard_df():
99
 
100
 
101
  def get_evaluation_queue_df():
102
- # todo @saylortwift: replace the repo by the one you created for the eval queue
103
  if eval_queue:
104
  print("Pulling changes for the evaluation queue.")
105
  eval_queue.git_pull()
@@ -141,7 +140,7 @@ def get_evaluation_queue_df():
141
  data["model"] = make_clickable_model(data["model"])
142
  all_evals.append(data)
143
 
144
- pending_list = [e for e in all_evals if e["status"] == "PENDING"]
145
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
146
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED")]
147
  df_pending = pd.DataFrame.from_records(pending_list, columns=EVAL_COLS)
@@ -179,6 +178,7 @@ def add_new_eval(
179
  precision: str,
180
  private: bool,
181
  weight_type: str,
 
182
  ):
183
  precision = precision.split(" ")[0]
184
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -209,6 +209,7 @@ def add_new_eval(
209
  "weight_type": weight_type,
210
  "status": "PENDING",
211
  "submitted_time": current_time,
 
212
  }
213
 
214
  user_name = ""
@@ -296,7 +297,7 @@ with demo:
296
  )
297
 
298
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
299
- with gr.TabItem("πŸ… LLM Benchmark (lite)", elem_id="llm-benchmark-tab-table", id=0):
300
  leaderboard_table_lite = gr.components.Dataframe(
301
  value=leaderboard_df[COLS_LITE],
302
  headers=COLS_LITE,
@@ -318,7 +319,7 @@ with demo:
318
  leaderboard_table_lite,
319
  )
320
 
321
- with gr.TabItem("πŸ“Š Extended view", elem_id="llm-benchmark-tab-table", id=1):
322
  leaderboard_table = gr.components.Dataframe(
323
  value=leaderboard_df,
324
  headers=COLS,
@@ -340,16 +341,16 @@ with demo:
340
  [hidden_leaderboard_table_for_search, search_bar],
341
  leaderboard_table,
342
  )
343
- with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
344
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
345
 
346
- with gr.TabItem("βœ‰οΈβœ¨ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
347
  with gr.Column():
348
  with gr.Row():
349
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
350
 
351
  with gr.Column():
352
- with gr.Accordion(f"βœ… Finished Evaluations: {len(finished_eval_queue_df)}", open=False):
353
  with gr.Row():
354
  finished_eval_table = gr.components.Dataframe(
355
  value=finished_eval_queue_df,
@@ -357,7 +358,7 @@ with demo:
357
  datatype=EVAL_TYPES,
358
  max_rows=5,
359
  )
360
- with gr.Accordion(f"πŸ”„ Running Evaluation Queue: {len(running_eval_queue_df)}", open=False):
361
  with gr.Row():
362
  running_eval_table = gr.components.Dataframe(
363
  value=running_eval_queue_df,
@@ -366,7 +367,7 @@ with demo:
366
  max_rows=5,
367
  )
368
 
369
- with gr.Accordion(f"⏳ Pending Evaluation Queue: {len(pending_eval_queue_df)}", open=False):
370
  with gr.Row():
371
  pending_eval_table = gr.components.Dataframe(
372
  value=pending_eval_queue_df,
@@ -374,6 +375,63 @@ with demo:
374
  datatype=EVAL_TYPES,
375
  max_rows=5,
376
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  with gr.Row():
379
  refresh_button = gr.Button("Refresh")
@@ -407,4 +465,4 @@ with demo:
407
  scheduler = BackgroundScheduler()
408
  scheduler.add_job(restart_space, "interval", seconds=3600)
409
  scheduler.start()
410
- demo.queue(concurrency_count=40).launch()
 
99
 
100
 
101
  def get_evaluation_queue_df():
 
102
  if eval_queue:
103
  print("Pulling changes for the evaluation queue.")
104
  eval_queue.git_pull()
 
140
  data["model"] = make_clickable_model(data["model"])
141
  all_evals.append(data)
142
 
143
+ pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
144
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
145
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED")]
146
  df_pending = pd.DataFrame.from_records(pending_list, columns=EVAL_COLS)
 
178
  precision: str,
179
  private: bool,
180
  weight_type: str,
181
+ model_type: str,
182
  ):
183
  precision = precision.split(" ")[0]
184
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 
209
  "weight_type": weight_type,
210
  "status": "PENDING",
211
  "submitted_time": current_time,
212
+ "model_type": model_type,
213
  }
214
 
215
  user_name = ""
 
297
  )
298
 
299
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
300
+ with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
301
  leaderboard_table_lite = gr.components.Dataframe(
302
  value=leaderboard_df[COLS_LITE],
303
  headers=COLS_LITE,
 
319
  leaderboard_table_lite,
320
  )
321
 
322
+ with gr.TabItem("πŸ” Extended model view", elem_id="llm-benchmark-tab-table", id=1):
323
  leaderboard_table = gr.components.Dataframe(
324
  value=leaderboard_df,
325
  headers=COLS,
 
341
  [hidden_leaderboard_table_for_search, search_bar],
342
  leaderboard_table,
343
  )
344
+ with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
345
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
346
 
347
+ with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
348
  with gr.Column():
349
  with gr.Row():
350
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
351
 
352
  with gr.Column():
353
+ with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
354
  with gr.Row():
355
  finished_eval_table = gr.components.Dataframe(
356
  value=finished_eval_queue_df,
 
358
  datatype=EVAL_TYPES,
359
  max_rows=5,
360
  )
361
+ with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
362
  with gr.Row():
363
  running_eval_table = gr.components.Dataframe(
364
  value=running_eval_queue_df,
 
367
  max_rows=5,
368
  )
369
 
370
+ with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
371
  with gr.Row():
372
  pending_eval_table = gr.components.Dataframe(
373
  value=pending_eval_queue_df,
 
375
  datatype=EVAL_TYPES,
376
  max_rows=5,
377
  )
378
+ with gr.Row():
379
+ gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
380
+
381
+ with gr.Row():
382
+ with gr.Column():
383
+ model_name_textbox = gr.Textbox(label="Model name")
384
+ revision_name_textbox = gr.Textbox(
385
+ label="revision", placeholder="main"
386
+ )
387
+ private = gr.Checkbox(
388
+ False, label="Private", visible=not IS_PUBLIC
389
+ )
390
+ model_type = gr.Dropdown(
391
+ choices=["pretrained", "fine-tuned", "with RL"],
392
+ label="Model type",
393
+ multiselect=False,
394
+ value="pretrained",
395
+ max_choices=1,
396
+ interactive=True,
397
+ )
398
+
399
+ with gr.Column():
400
+ precision = gr.Dropdown(
401
+ choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)"],
402
+ label="Precision",
403
+ multiselect=False,
404
+ value="float16",
405
+ max_choices=1,
406
+ interactive=True,
407
+ )
408
+ weight_type = gr.Dropdown(
409
+ choices=["Original", "Delta", "Adapter"],
410
+ label="Weights type",
411
+ multiselect=False,
412
+ value="Original",
413
+ max_choices=1,
414
+ interactive=True,
415
+ )
416
+ base_model_name_textbox = gr.Textbox(
417
+ label="Base model (for delta or adapter weights)"
418
+ )
419
+
420
+ submit_button = gr.Button("Submit Eval")
421
+ submission_result = gr.Markdown()
422
+ submit_button.click(
423
+ add_new_eval,
424
+ [
425
+ model_name_textbox,
426
+ base_model_name_textbox,
427
+ revision_name_textbox,
428
+ precision,
429
+ private,
430
+ weight_type,
431
+ model_type
432
+ ],
433
+ submission_result,
434
+ )
435
 
436
  with gr.Row():
437
  refresh_button = gr.Button("Refresh")
 
465
  scheduler = BackgroundScheduler()
466
  scheduler.add_job(restart_space, "interval", seconds=3600)
467
  scheduler.start()
468
+ demo.queue(concurrency_count=40).launch()