leaderboard / app.py
Clémentine
fix
bd73591
raw
history blame
6.15 kB
import os
from email.utils import parseaddr
import gradio as gr
import pandas as pd
from datasets import load_dataset
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi
# InfoStrings
from content import *
BALM_TOKEN = os.environ.get("BALM_TOKEN", None)
owner="clefourrier" # change to balm once possible
api = HfApi()
eval_results = {}
for level in range(1, 4):
eval_results[level] = load_dataset(f"{owner}/BALM_ResultsLevel{level}", use_auth_token=BALM_TOKEN, split="dev")
eval_dataframe_1 = pd.DataFrame(eval_results[1].remove_columns("mail"))
eval_dataframe_2 = pd.DataFrame(eval_results[2].remove_columns("mail"))
eval_dataframe_3 = pd.DataFrame(eval_results[3].remove_columns("mail"))
def restart_space():
api.restart_space(repo_id=f"{owner}/BALM_Leaderboard", token=BALM_TOKEN)
COLS = ["Model", "Organisation", "Reported accuracy ⬆️"]
TYPES = ["str", "str", "number",]
def add_new_eval(
level_of_dev: str,
model: str,
score: float,
organisation: str,
mail: str,
):
level = int(level_of_dev.split(" ")[-1])
# Very basic email parsing
_, parsed_mail = parseaddr(mail)
if not "@" in parsed_mail:
valid_mail = "Please provide a valid email adress."
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{valid_mail}</p>"
print("Adding new eval")
# Check if the combination model/org already exists and prints a warning message if yes
if model.lower() in set(eval_results[level]["model"]) and organisation.lower() in set(eval_results[level]["organisation"]):
duplicate_request_message = "This model has been already submitted."
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{duplicate_request_message}</p>"
# Actual submission
eval_entry = {
"model": model,
"score": score,
"organisation": organisation,
"mail": mail,
}
eval_results[level].add_item(eval_entry)
eval_results.push_to_hub(f"{owner}/BALM_ResultsLevel{level}", token=BALM_TOKEN, split="dev")
success_message = f"Model {model} submitted by {organisation}."
return f"<p style='color: green; font-size: 20px; text-align: center;'>{success_message}</p>"
def refresh():
eval_results = {}
for level in range(1, 4):
eval_results[level] = load_dataset(f"{owner}/BALM_ResultsLevel{level}", use_auth_token=BALM_TOKEN, split="dev")
eval_dataframe_1 = pd.DataFrame(eval_results[1].remove_columns("mail"))
eval_dataframe_2 = pd.DataFrame(eval_results[2].remove_columns("mail"))
eval_dataframe_3 = pd.DataFrame(eval_results[3].remove_columns("mail"))
return eval_dataframe_1, eval_dataframe_2, eval_dataframe_3
custom_css = """
#changelog-text {
font-size: 16px !important;
}
#changelog-text h2 {
font-size: 18px !important;
}
.markdown-text {
font-size: 16px !important;
}
#citation-button span {
font-size: 16px !important;
}
#citation-button textarea {
font-size: 16px !important;
}
#citation-button > label > button {
margin: 6px;
transform: scale(1.3);
}
"""
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Column():
with gr.Accordion("📙 Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
).style(show_copy_button=True)
with gr.Column():
with gr.Accordion("✨ CHANGELOG", open=False):
changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
with gr.Tab("Results: Level 1"):
with gr.Tab("Results on Dev Set"):
leaderboard_table_1 = gr.components.Dataframe(
value=eval_dataframe_1, headers=COLS, datatype=TYPES, max_rows=20
)
with gr.Tab("Results on Test Set"):
gr.Textbox(value="The test set is currently private! Come back when performances on the dev set increased!")
with gr.Tab("Results: Level 2"):
with gr.Tab("Results on Dev Set"):
leaderboard_table_2 = gr.components.Dataframe(
value=eval_dataframe_2, headers=COLS, datatype=TYPES, max_rows=20
)
with gr.Tab("Results on Test Set"):
gr.Textbox(value="The test set is currently private! Come back when performances on the dev set increased!")
with gr.Tab("Results: Level 3"):
with gr.Tab("Results on Dev Set"):
leaderboard_table_3 = gr.components.Dataframe(
value=eval_dataframe_3, headers=COLS, datatype=TYPES, max_rows=20
)
with gr.Tab("Results on Test Set"):
gr.Textbox(value="The test set is currently private! Come back when performances on the dev set increased!")
refresh_button = gr.Button("Refresh")
refresh_button.click(
refresh,
inputs=[],
outputs=[
leaderboard_table_1,
leaderboard_table_2,
leaderboard_table_3,
],
)
with gr.Accordion("Submit a new model for evaluation"):
#with gr.Row():
with gr.Column():
level_of_dev = gr.Radio(["Level 1", "Level 2", "Level 3"], value="Level 1", label="Dev set")
model_name_textbox = gr.Textbox(label="Model name")
score = gr.Textbox(label="Score")
organisation = gr.Textbox(label="Organisation")
mail = gr.Textbox(label="Contact email")
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
[
level_of_dev,
model_name_textbox,
score,
organisation,
mail
],
submission_result,
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=3600)
scheduler.start()
demo.launch()