import gradio as gr import pandas as pd import os from huggingface_hub import snapshot_download, login from apscheduler.schedulers.background import BackgroundScheduler from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter from src.display.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, CONTACT_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, SUB_TITLE, ) from src.display.css_html_js import custom_css from src.envs import API from src.leaderboard.load_results import load_data # clone / pull the lmeh eval data TOKEN = os.environ.get("TOKEN", None) login(token=TOKEN) RESULTS_REPO = f"SeaLLMs/SeaExam-results" CACHE_PATH=os.getenv("HF_HOME", ".") EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results") print(EVAL_RESULTS_PATH) snapshot_download( repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", token=TOKEN ) def restart_space(): API.restart_space(repo_id="SeaLLMs/SeaExam_leaderboard", token=TOKEN) all_columns = ['R', 'Model', 'type', 'open?', 'avg-pub', 'avg-prv ⬇️', 'id-pub', 'th-pub', 'vi-pub', 'id-prv', 'th-prv', 'vi-prv', '#P(B)'] show_columns = ['R', 'Model','type','open?','#P(B)', 'avg-pub', 'avg-prv ⬇️', 'id-pub', 'th-pub', 'vi-pub', 'id-prv', 'th-prv', 'vi-prv'] TYPES = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'] show_columns_overall = ['R', 'Model', 'type', 'open?','#P(B)', 'SeaExam-pub', 'SeaExam-prv ⬇️', 'SeaBench-pub', 'SeaBench-prv'] TYPES_overall = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'number', 'number'] # Load the data from the csv file csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_20241210.csv' # csv_path = f'eval-results/SeaExam_results_20241030.csv' df = pd.read_csv(csv_path, skiprows=1, header=0) # df_m3exam, df_mmlu, df_avg = load_data(csv_path) df_seaexam, df_seabench, df_overall = load_data(csv_path) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) # gr.HTML(SUB_TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.Tab("🏅 Overall"): Leaderboard( value=df_overall[show_columns_overall], select_columns=SelectColumns( default_selection=show_columns_overall, cant_deselect=["R", "Model"], label="Select Columns to Display:", ), search_columns=["Model"], # hide_columns=["model_name_for_query", "Model Size"], filter_columns=[ "type", "open?", # ColumnFilter("MOE", type="boolean", default=False, label="MoE"), # ColumnFilter("Flagged", type="boolean", default=False), ColumnFilter("#P(B)", default=[7, 9], label="Paramers(B)"), ], datatype=TYPES_overall, # column_widths=["3%", "20%", "6%", "4%"] ) with gr.Tab("SeaExam"): Leaderboard( value=df_seaexam[show_columns], select_columns=SelectColumns( default_selection=show_columns, cant_deselect=["R", "Model"], label="Select Columns to Display:", ), search_columns=["Model"], # hide_columns=["model_name_for_query", "Model Size"], filter_columns=[ "type", "open?", # ColumnFilter("MOE", type="boolean", default=False, label="MoE"), # ColumnFilter("Flagged", type="boolean", default=False), ColumnFilter("#P(B)", default=[7, 9]), ], datatype=TYPES, # column_widths=["2%", "33%"], ) with gr.Tab("SeaBench"): Leaderboard( value=df_seabench[show_columns], select_columns=SelectColumns( default_selection=show_columns, cant_deselect=["R", "Model"], label="Select Columns to Display:", ), search_columns=["Model"], # hide_columns=["model_name_for_query", "Model Size"], filter_columns=[ "type", "open?", # ColumnFilter("MOE", type="boolean", default=False, label="MoE"), # ColumnFilter("Flagged", type="boolean", default=False), ColumnFilter("#P(B)", default=[7, 9]), ], datatype=TYPES, # column_widths=["2%", "33%"], ) with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") # with gr.Row(): # with gr.Accordion("📙 Citation", open=False): # citation_button = gr.Textbox( # value=CITATION_BUTTON_TEXT, # label=CITATION_BUTTON_LABEL, # lines=20, # elem_id="citation-button", # show_copy_button=True, # ) gr.Markdown(CONTACT_TEXT, elem_classes="markdown-text") demo.launch(share=True) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.queue(default_concurrency_limit=40).launch(share=True)