kaikaidai commited on
Commit
f2d7524
·
verified ·
1 Parent(s): d31079e

Renamed accordion to "Edit Judge Prompt" & added messaging around "Turbo" models

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -386,7 +386,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
386
  gr.Markdown("<br>")
387
 
388
  # Update Evaluator Prompt Accordion
389
- with gr.Accordion("📝 Judge Prompt", open=False):
390
  eval_prompt_editable = gr.TextArea(
391
  value=DEFAULT_EVAL_PROMPT_EDITABLE,
392
  label="Evaluation Criteria",
@@ -413,6 +413,13 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
413
  datatype=["str", "number", "str", "number", "str", "str", "str"],
414
  )
415
 
 
 
 
 
 
 
 
416
  # Add change handler for checkbox
417
  show_preliminary.change(
418
  fn=refresh_leaderboard,
 
386
  gr.Markdown("<br>")
387
 
388
  # Update Evaluator Prompt Accordion
389
+ with gr.Accordion("📝 Edit Judge Prompt", open=False):
390
  eval_prompt_editable = gr.TextArea(
391
  value=DEFAULT_EVAL_PROMPT_EDITABLE,
392
  label="Evaluation Criteria",
 
413
  datatype=["str", "number", "str", "number", "str", "str", "str"],
414
  )
415
 
416
+ gr.Markdown("""<br>
417
+ <br>
418
+ Judge Arena uses Together AI for inference of open-source models. FP8 models are named as -- "Turbo" where the performance of the FP16 reference models is closely matched:
419
+
420
+ [*"Together Turbo achieves this performance while maintaining full accuracy compared to Meta's reference implementation across all models. Llama-3.1-405B-Instruct-Turbo matches the accuracy of Meta reference models."*](https://www.together.ai/blog/together-inference-engine-2)
421
+ """)
422
+
423
  # Add change handler for checkbox
424
  show_preliminary.change(
425
  fn=refresh_leaderboard,