import os import regex as re import gradio as gr import pandas as pd from gradio.themes.utils.sizes import text_md from gradio_modal import Modal from content import ( HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN, MODAL_SUBMIT_MARKDOWN, SUBMISSION_DETAILS_MARKDOWN, RANKING_AFTER_SUBMISSION_MARKDOWN, MORE_DETAILS_MARKDOWN, ABOUT_MARKDOWN, ) from server import LeaderboardServer, xmlAndMarkdownEscape, xmlQuoteAttr, api HF_SPACE_TOKEN = os.environ["HF_SPACE_TOKEN"] HF_SPACE_ID = os.environ["HF_SPACE_ID"] # For testing purpose HF_DISABLE_SUBMIT = bool(int(os.environ.get("HF_DISABLE_SUBMIT", "0"))) from server import HF_FAKE_TOURNAMENT from huggingface_hub import dump_environment_info dump_environment_info() leaderboard_server = LeaderboardServer() SUBMISSION_INPUTS = dict.fromkeys(( "team_name", "model_name", "model_type", "parameters", "input_length", "precision", "description", "link_to_model", "submission_file", )).keys() def on_submit_pressed(): return gr.update(value='Processing submission…', interactive=False) def validate_submission_inputs(**inputs): if any(key for key, value in inputs.items() if key != "description" and value in (None, "")): raise ValueError('Please fill in all fields (only the description field is optional)') if not os.path.exists(inputs["submission_file"]): raise ValueError('File does not exist') if not (inputs["link_to_model"].startswith("http://") or inputs["link_to_model"].startswith("https://")): raise ValueError('Link does not starts with "http://" or "https://"') if not inputs["parameters"] > 0: raise ValueError('Attribute `Parameters (B)` should be greater than zero') if not (inputs["input_length"] > 0 and inputs["input_length"] == int(inputs["input_length"])): raise ValueError('Attribute `Input length (# tokens)` should be greater than zero and integer type') def process_submission(*inputs): try: inputs = dict(zip(SUBMISSION_INPUTS, inputs)) for key in inputs: if key in ("team_name", "model_name"): inputs[key] = re.sub(r"""\s+""", " ", inputs[key]).strip() elif key in ("description", "link_to_model"): inputs[key] = inputs[key].strip() validate_submission_inputs(**inputs) metadata = SUBMISSION_INPUTS - {"submission_file"} metadata = {key: inputs[key] for key in metadata} gr.Info('Submission valid, going to queue for the tournament…') pre_submit = leaderboard_server.prepare_model_for_submission(inputs["submission_file"], metadata) if HF_FAKE_TOURNAMENT: pre_submit = None except ValueError as err: gr.Warning(str(err)) return ( gr.update(value='Pre-submit model', visible=True, interactive=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), ) except Exception as err: gr.Warning(str(err), duration=None) return ( gr.update(value='Pre-submit model', visible=True, interactive=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), ) gr.Info('Tournament finished!', duration=5) gr.Info('You can see the results of your model below.', duration=15) if HF_DISABLE_SUBMIT: submit_prompt = gr.update(visible=False) submission_btn_yes = gr.update(visible=False) else: submit_prompt = gr.update(visible=True) submission_btn_yes = gr.update(interactive=True, visible=True) return ( gr.update(visible=False), submit_prompt, submission_btn_yes, gr.update(interactive=True, visible=True), gr.update(visible=True), gr.update( value=leaderboard_server.get_leaderboard(pre_submit), visible=True, datatype="markdown", elem_classes="leaderboard-table", ), ) def get_submission_ids_and_titles(): with leaderboard_server.var_lock.ro: submission_ids_and_titles = [ ( leaderboard_server.submission_id_to_model_title[submission_id], submission_id, ) for submission_id in leaderboard_server.submission_ids ] submission_ids_and_titles.sort(key=lambda x: x[0].lower()) return submission_ids_and_titles def submit_results(): leaderboard_server.save_pre_submit() gr.Info('Submission successful!') with leaderboard_server.var_lock.ro: leaderboard = gr.update( value=leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), visible=True, ) leaderboard_csv = gr.update( value=leaderboard_server.get_leaderboard_csv(category=leaderboard_server.TASKS_CATEGORY_OVERALL), visible=True, ) submission_ids_and_titles = get_submission_ids_and_titles() return ( gr.update(value='Pre-submit model', visible=True, interactive=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), leaderboard, leaderboard_csv, gr.update(visible=False), gr.update(choices=submission_ids_and_titles), gr.update(value=leaderboard_server.TASKS_CATEGORY_OVERALL), gr.update(choices=submission_ids_and_titles), ) def erase_pre_submit(): with leaderboard_server.pre_submit_lock: if leaderboard_server.pre_submit: leaderboard_server.pre_submit = None # NOTE: Is it safe? How to confirm that `submission_id` is equal? return ( gr.update(value='Pre-submit model', visible=True, interactive=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), ) def fetch_model_detail(submission_id): metadata = leaderboard_server.get_model_detail(submission_id) return ( gr.update(value=metadata['description'], visible=True), gr.update(value=metadata['link_to_model'], visible=True) ) def fetch_model_tournament_results_table(submission_id, category): if submission_id == None or category == None: return gr.update( visible=False, ) else: return gr.update( value=leaderboard_server.get_model_tournament_table(submission_id, category), visible=True, ) def fetch_model_tournament_results_table_csv(submission_id, category): if submission_id == None or category == None: return gr.update( visible=False, ) else: return gr.update( value=leaderboard_server.get_model_tournament_table_csv(submission_id, category), visible=True, ) def create_task_abbreviation_legend_table(category): task_abbreviation_legend_body = [] abbreviation2name = leaderboard_server.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS[category] for abbr, name, url in abbreviation2name.values(): task_abbreviation_legend_body.append([ xmlAndMarkdownEscape(abbr), xmlAndMarkdownEscape(name), f'{xmlAndMarkdownEscape(url)}', ]) return task_abbreviation_legend_body def change_leaderboard_category(category, selected_submission_id): if category == leaderboard_server.TASKS_CATEGORY_OVERALL: task_abbreviation_legend = gr.update( visible=False, ) tournament_results_title = gr.update( visible=False, ) tournament_results_dropdown = gr.update( visible=False, ) model_tournament_results_table = gr.update( visible=False, ) model_tournament_results_table_csv = gr.update( visible=False, ) else: task_abbreviation_legend = gr.update( value=create_task_abbreviation_legend_table(category), visible=True, ) tournament_results_title = gr.update( visible=True, ) tournament_results_dropdown = gr.update( visible=True, ) model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category) model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category) leaderboard = gr.update( value=leaderboard_server.get_leaderboard(category=category), visible=True, ) leaderboard_csv = gr.update( value=leaderboard_server.get_leaderboard_csv(category=category), visible=True, ) return ( leaderboard, leaderboard_csv, task_abbreviation_legend, tournament_results_title, tournament_results_dropdown, model_tournament_results_table, model_tournament_results_table_csv, ) def show_modal(): gr.Info('You are going to submit your model.', duration=5) # It is used to scroll up return gr.update(visible=True) def hide_modal(): return gr.update(visible=False) def disable_submit_buttons(): return ( gr.update(interactive=False), gr.update(interactive=False), ) def enable_submit_buttons(): return ( gr.update(interactive=True), gr.update(interactive=True), ) def results_dataset_integrity_check(): leaderboard_server.results_dataset_integrity_check(solve=True) def update_results_dataset_integrity_solving_progress(): progress = leaderboard_server.tournament_results_integrity_solving_progress * 100 if leaderboard_server.tournament_results_corrupted: results_dataset_integrity_solving_progress = gr.update( value=f"Tournament results is preparing, please wait… {progress:.0f}% done" ) results_dataset_integrity_restart_btn = gr.update( interactive=False, visible=False, ) else: results_dataset_integrity_solving_progress = gr.update( value="Tournament results is prepared, please restart this space with the button showed below." ) results_dataset_integrity_restart_btn = gr.update( interactive=True, visible=True, ) return ( results_dataset_integrity_solving_progress, results_dataset_integrity_restart_btn, ) def on_tournament_results_corrupted(): if leaderboard_server.tournament_results_corrupted and not leaderboard_server.tournament_results_integrity_solving: results_dataset_integrity_check_btn = gr.update( interactive=True, visible=True, ) results_dataset_integrity_solving_progress = gr.update( visible=False, ) else: results_dataset_integrity_check_btn = gr.update( interactive=False, visible=False, ) results_dataset_integrity_solving_progress = gr.update( visible=True, ) return ( results_dataset_integrity_check_btn, results_dataset_integrity_solving_progress, ) def restart_space(): api.restart_space(repo_id=HF_SPACE_ID, token=HF_SPACE_TOKEN) def on_application_load(): with leaderboard_server.var_lock.ro: leaderboard = gr.update( value=leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), visible=True, ) leaderboard_csv = gr.update( value=leaderboard_server.get_leaderboard_csv(category=leaderboard_server.TASKS_CATEGORY_OVERALL), visible=True, ) submission_ids_and_titles = get_submission_ids_and_titles() return ( leaderboard, leaderboard_csv, gr.update(choices=submission_ids_and_titles), gr.update(value=leaderboard_server.TASKS_CATEGORY_OVERALL), gr.update(choices=submission_ids_and_titles), ) custom_css = """ footer {visibility: hidden} tr { background-color: var(--table-even-background-fill); font-family: "IBM Plex Mono"; } tr.row_odd { background-color: var(--table-odd-background-fill); } .leaderboard-table td:first-child p, .leaderboard-table-model-details td:first-child p { margin: 0px; } .leaderboard-table th:nth-child(5), .leaderboard-table td:nth-child(5) { border-right-width: 2px; border-right-color: var(--border-color-primary); } .leaderboard-table td:nth-child(5) p { font-weight: bolder; } .leaderboard-table tr[model_title]:hover::after, .leaderboard-table-model-details tr[model_title]:hover::after { content: attr(model_title); position: fixed; background-color: rgba(50, 50, 50, 0.9); color: white; padding: 5px; border-radius: 5px; white-space: nowrap; z-index: 10; pointer-events: none; left: var(--table-rect-left, 0px); top: var(--row-rect-bottom, 0px); transform: translateY(0px); } """ custom_js = """ """ def gradio_app(): with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main: gr.Markdown(HEADER_MARKDOWN) if leaderboard_server.tournament_results_corrupted: gr.Markdown("## Results dataset integrity solving") results_dataset_integrity_check_btn = gr.Button( value='Check integrity of results dataset', interactive=True, visible=False, ) results_dataset_integrity_solving_progress = gr.Markdown( value=None, visible=False, ) results_dataset_integrity_restart_btn = gr.Button( value='Restart this space', interactive=False, visible=False, ) results_dataset_integrity_solving_progress_timer = gr.Timer( value=60, # seconds ) results_dataset_integrity_solving_progress_timer.tick( fn=update_results_dataset_integrity_solving_progress, outputs=[ results_dataset_integrity_solving_progress, results_dataset_integrity_restart_btn, ] ) results_dataset_integrity_check_btn.click( fn=lambda: ( gr.update(interactive=False, visible=False), gr.update(visible=True), ), outputs=[ results_dataset_integrity_check_btn, results_dataset_integrity_solving_progress, ] ).then( fn=results_dataset_integrity_check ) results_dataset_integrity_restart_btn.click( fn=lambda: gr.update(interactive=False), outputs=results_dataset_integrity_restart_btn ).then( fn=restart_space ) main.load( fn=on_tournament_results_corrupted, outputs=[ results_dataset_integrity_check_btn, results_dataset_integrity_solving_progress, ] ).then( fn=update_results_dataset_integrity_solving_progress, outputs=[ results_dataset_integrity_solving_progress, results_dataset_integrity_restart_btn, ] ) return main with gr.Tabs(): with leaderboard_server.var_lock.ro: submission_ids_and_titles = get_submission_ids_and_titles() with gr.TabItem('Leaderboard'): with gr.Column(): gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN) with gr.Row(): leaderboard_category_of_tasks = gr.Dropdown( choices=[leaderboard_server.TASKS_CATEGORY_OVERALL] + sorted(leaderboard_server.TASKS_CATEGORIES), value=leaderboard_server.TASKS_CATEGORY_OVERALL, label="Category of benchmarks", interactive=True, ) with gr.Row(): leaderboard_table = gr.DataFrame( leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL), interactive=False, label=None, visible=True, datatype="markdown", elem_classes="leaderboard-table", ) with gr.Row(): leaderboard_table_csv = gr.DownloadButton( label="Download leaderboard in CSV format", value=leaderboard_server.get_leaderboard_csv(category=leaderboard_server.TASKS_CATEGORY_OVERALL), ) with gr.Row(): leaderboard_table_legend = gr.DataFrame( value=None, headers=[ "Abbr.", # "task abbreviation" "Name", "URL", ], column_widths=["150px"], datatype="markdown", label="Descriptions of the tasks", visible=False, interactive=False, elem_classes="leaderboard-table-legend", ) with gr.Row(): tournament_results_title = gr.Markdown( value="## Tournament results for selected model", visible=False, ) with gr.Row(): tournament_results_dropdown = gr.Dropdown( value=None, choices=submission_ids_and_titles, label="Select model", visible=False, interactive=True, ) with gr.Row(): model_tournament_results_table = gr.DataFrame( value=None, datatype="markdown", label="The model won against…", visible=False, interactive=False, elem_classes="leaderboard-table-model-details", ) with gr.Row(): model_tournament_results_table_csv = gr.DownloadButton( label="Download model tournament results in CSV format", visible=False, ) leaderboard_category_of_tasks.change( fn=change_leaderboard_category, inputs=[ leaderboard_category_of_tasks, tournament_results_dropdown, ], outputs=[ leaderboard_table, leaderboard_table_csv, leaderboard_table_legend, tournament_results_title, tournament_results_dropdown, model_tournament_results_table, model_tournament_results_table_csv, ], ) tournament_results_dropdown.change( fn=fetch_model_tournament_results_table, inputs=[ tournament_results_dropdown, leaderboard_category_of_tasks, ], outputs=model_tournament_results_table, ).then( fn=fetch_model_tournament_results_table_csv, inputs=[ tournament_results_dropdown, leaderboard_category_of_tasks, ], outputs=model_tournament_results_table_csv, ) with gr.TabItem('Model details'): gr.Markdown(MORE_DETAILS_MARKDOWN) with gr.Row(): model_details_model_dropdown = gr.Dropdown( choices=submission_ids_and_titles, label="Select model", interactive=True, ) with gr.Row(): model_details_description = gr.Text(value='', label='Model description', visible=False, interactive=False) model_details_url = gr.Text(value='', label='Model url', visible=False, interactive=False) with gr.Row(): model_details_tournament_results_title = gr.Markdown( value="## Tournament results for selected model", visible=False, ) with gr.Row(): model_details_category_of_tasks = gr.Dropdown( choices=sorted(leaderboard_server.TASKS_CATEGORIES), value=None, label="Category of benchmarks", visible=False, interactive=True, ) with gr.Row(): model_details_model_tournament_results_table = gr.DataFrame( value=None, datatype="markdown", label="The model won against…", visible=False, interactive=False, elem_classes="leaderboard-table-model-details", ) with gr.Row(): model_details_model_tournament_results_table_csv = gr.DownloadButton( label="Download model tournament results in CSV format", visible=False, ) model_details_model_dropdown.change( fn=fetch_model_detail, inputs=[model_details_model_dropdown], outputs=[model_details_description, model_details_url], ).then( fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False), inputs=model_details_model_dropdown, outputs=model_details_tournament_results_title ).then( fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False), inputs=model_details_model_dropdown, outputs=model_details_category_of_tasks ).then( fn=fetch_model_tournament_results_table, inputs=[ model_details_model_dropdown, model_details_category_of_tasks, ], outputs=model_details_model_tournament_results_table ).then( fn=fetch_model_tournament_results_table_csv, inputs=[ model_details_model_dropdown, model_details_category_of_tasks, ], outputs=model_details_model_tournament_results_table_csv ) model_details_category_of_tasks.change( fn=fetch_model_tournament_results_table, inputs=[ model_details_model_dropdown, model_details_category_of_tasks, ], outputs=model_details_model_tournament_results_table, ).then( fn=fetch_model_tournament_results_table_csv, inputs=[ model_details_model_dropdown, model_details_category_of_tasks, ], outputs=model_details_model_tournament_results_table_csv, ) with gr.TabItem('Submission'): with gr.Column(): gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN) submission_inputs = dict.fromkeys(SUBMISSION_INPUTS) with gr.Row(): submission_inputs["team_name"] = gr.Textbox(label='Team name', type='text') submission_inputs["model_name"] = gr.Textbox(label='Model name', type='text') submission_inputs["model_type"] = gr.Dropdown( label="Model type", choices=("chat", "pretrained", "ensemble"), ) submission_inputs["parameters"] = gr.Number( label='Parameters (B)', value=0.01, step=0.01, ) with gr.Row(): submission_inputs["input_length"] = gr.Number( label='Input length (# tokens)', value=0, step=1, ) submission_inputs["precision"] = gr.Dropdown( label="Precision", choices=("float32", "bfloat32", "float16", "bfloat16", "8bit", "4bit"), ) submission_inputs["description"] = gr.Textbox(label='Description', type='text') submission_inputs["link_to_model"] = gr.Textbox(label='Link to model', type='text') submission_inputs["submission_file"] = gr.File(label='Upload your results', type='filepath') pre_submission_btn = gr.Button(value='Pre-submit model', interactive=True) submit_prompt = gr.Markdown( SUBMISSION_DETAILS_MARKDOWN, visible=False ) pre_submit_info = gr.Markdown( RANKING_AFTER_SUBMISSION_MARKDOWN, visible=False ) pre_submit_table = gr.DataFrame(pd.DataFrame(), interactive=False, label=None, visible=False) submission_btn_yes = gr.Button(value='Submit model', interactive=False, visible=False) submission_btn_no = gr.Button(value='Reverse process', interactive=False, visible=False) with Modal(visible=False, allow_user_close=False) as modal_submit: gr.Markdown(MODAL_SUBMIT_MARKDOWN) modal_submit_yes = gr.Button("Yes", interactive=True) modal_submit_no = gr.Button("No", interactive=True) pre_submission_btn.click( fn=on_submit_pressed, outputs=[pre_submission_btn], ).then( fn=process_submission, inputs=list(submission_inputs.values()), outputs=[ pre_submission_btn, submit_prompt, submission_btn_yes, submission_btn_no, pre_submit_info, pre_submit_table, ], concurrency_limit=None, ) submission_btn_yes.click( fn=show_modal, outputs=[modal_submit] ) modal_submit_yes.click( fn=disable_submit_buttons, outputs=[ modal_submit_yes, modal_submit_no, ] ).then( fn=submit_results, outputs=[ pre_submission_btn, submission_btn_yes, submission_btn_no, submit_prompt, pre_submit_info, pre_submit_table, leaderboard_table, leaderboard_table_csv, modal_submit, model_details_model_dropdown, leaderboard_category_of_tasks, tournament_results_dropdown, ], ).then( fn=enable_submit_buttons, outputs=[ modal_submit_yes, modal_submit_no, ] ) modal_submit_no.click( fn=disable_submit_buttons, outputs=[ modal_submit_yes, modal_submit_no, ] ).then( fn=hide_modal, outputs=[modal_submit] ).then( fn=enable_submit_buttons, outputs=[ modal_submit_yes, modal_submit_no, ] ) submission_btn_no.click( fn=erase_pre_submit, outputs=[ pre_submission_btn, submission_btn_yes, submission_btn_no, submit_prompt, pre_submit_info, pre_submit_table, ], ) with gr.TabItem('About'): gr.Markdown(ABOUT_MARKDOWN) main.load( on_application_load, inputs=None, outputs=[ leaderboard_table, leaderboard_table_csv, model_details_model_dropdown, leaderboard_category_of_tasks, tournament_results_dropdown, ] ) return main app = gradio_app() app.launch()