import os import json import gradio as gr import pandas as pd import numpy as np from collections import defaultdict LENGTHS = ["dataset_total_score", "4k", "8k", "16k", "32k", "64k", "128k"] datasets_params = json.load(open("datasets_config.json", "r")) TASKS = datasets_params.keys() def make_default_md(): leaderboard_md = "LeaderBoard" return leaderboard_md def make_model_desc_md(): with open("docs/description.md", "r") as f: description = f.read() return description def make_overall_table_by_tasks(files): results = defaultdict(list) result_dct = {} for file in files: if not file.endswith("json"): continue path = "results/" + file data = json.load(open(path)) model_name = file.split('/')[-1].split(".json")[0] result_dct[model_name] = {} for dataset in data.keys(): if dataset == "total_score": result_dct[model_name][dataset] = round(data[dataset] * 100, 1) continue result_dct[model_name][dataset] = round(data[dataset]["dataset_total_score"] * 100, 1) for file in files: if not file.endswith("json"): continue model_name = file.split('/')[-1].split(".json")[0] results['Model'].append(model_name) for key in result_dct[model_name].keys(): if key == "total_score": results["Total Score"].append(result_dct[model_name][key]) else: results[datasets_params[key]["name"]].append(result_dct[model_name][key]) table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False) cols = table.columns.tolist() cols = [cols[0]] + [cols[22]] + cols[1:22] return table[cols] def make_overall_table_by_lengths(files): results = defaultdict(list) result_dct = {} for file in files: if not file.endswith("json"): continue path = "results/" + file data = json.load(open(path)) model_name = file.split('/')[-1].split(".json")[0] result_dct[model_name] = {} for dataset in data.keys(): if dataset == "total_score": result_dct[model_name][dataset] = data[dataset] continue for length in data[dataset].keys(): if length == "dataset_total_score": continue if length not in result_dct[model_name]: result_dct[model_name][length] = [] result_dct[model_name][length].append(data[dataset][length]) for model_name in result_dct.keys(): for length in result_dct[model_name].keys(): result_dct[model_name][length] = round(np.mean(result_dct[model_name][length]) * 100, 1) for file in files: if not file.endswith("json"): continue model_name = file.split('/')[-1].split(".json")[0] results['Model'].append(model_name) for key in result_dct[model_name].keys(): if key == "total_score": results["Total Score"].append(result_dct[model_name][key]) else: results[key].append(result_dct[model_name][key]) table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False) cols = table.columns.tolist() cols = [cols[0]] + [cols[7]] + cols[1:7] return table[cols] def load_model(files, tab_name): results = defaultdict(list) for file in files: if not file.endswith("json"): continue model_name = file.split('/')[-1].split(".json")[0] results['Model'].append(model_name) result = json.load(open("results/" + file, "r")) for length in LENGTHS: if length in result[tab_name].keys(): if length == "dataset_total_score": results["Dataset Total Score"].append(round(result[tab_name][length] * 100, 1)) continue results[length].append(round(result[tab_name][length] * 100, 1)) else: results[length].append("-") return pd.DataFrame(results).sort_values(['Dataset Total Score'], ascending=False) def build_leaderboard_tab(files): default_md = make_default_md() md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown") with gr.Tabs() as tabs: with gr.Tab("Results by Lengths", id=0): df = make_overall_table_by_lengths(files) gr.Dataframe( headers=[ "Model", ] + LENGTHS, datatype=[ "markdown", "str", "str", "str", "str", "str", "str", "str", ], value=df, elem_id="arena_leaderboard_dataframe", height=700, wrap=True, ) with gr.Tab("Results by Tasks", id=1): df = make_overall_table_by_tasks(files) gr.Dataframe( headers=[ "Model", ] + LENGTHS, datatype=[ "markdown", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str" ], value=df, elem_id="arena_leaderboard_dataframe", height=700, wrap=False, ) for tab_id, tab_name in enumerate(TASKS): df = load_model(files, tab_name) with gr.Tab(datasets_params[tab_name]["name"], id=tab_id+2): gr.Dataframe( headers=[ "Model", ] + LENGTHS, datatype=[ "markdown", "str", "str", "str", "str", "str", "str", "str", ], value=df, elem_id="arena_leaderboard_dataframe", height=700, wrap=True, ) with gr.Tab("Description", id=tab_id + 3): desc_md = make_model_desc_md() gr.Markdown(desc_md, elem_id="leaderboard_markdown") return [md_1] def build_demo(files): text_size = gr.themes.sizes.text_lg with gr.Blocks(title="LIBRA leaderboard", theme=gr.themes.Base(text_size=text_size)) as demo: build_leaderboard_tab(files) return demo if __name__ == "__main__": files = os.listdir("results") demo = build_demo(files) demo.launch(share=False)