Clémentine
added evaluation + leaderboard generation + reorg of the viz
67741f2
raw
history blame
984 Bytes
import gradio as gr
from utils import run_pipeline, update_examples
from env import TASK
with gr.Blocks(
title="YourBench Leaderboard",
theme=gr.themes.Soft(),
css="button { margin: 0 10px; padding: 5px 15px; }",
) as demo:
# DISPLAY TABLE AND ANALYSIS
title = gr.Markdown(f"YourBench auto-Leaderboard for {TASK}")
leaderboard = gr.DataFrame(label="Results", interactive=False)
samples_ix = gr.Number(
label="Example Index",
value=0,
step=1,
info="Navigate through different examples"
)
with gr.Tab("Hardest samples"):
hard_samples = gr.HTML()
with gr.Tab("Easiest samples"):
easy_samples = gr.HTML()
with gr.Tab("All samples"):
all_samples = gr.HTML()
samples_ix.change(update_examples, samples_ix, [easy_samples, hard_samples, all_samples])
demo.load(run_pipeline, [samples_ix], [leaderboard, easy_samples, hard_samples, all_samples])
demo.launch()