ai-forever's picture
Upload 19 files
1ddbee0 verified
raw
history blame
7.61 kB
import os
import json
import gradio as gr
import pandas as pd
import numpy as np
from collections import defaultdict
LENGTHS = ["dataset_total_score", "4k", "8k", "16k", "32k", "64k", "128k"]
datasets_params = json.load(open("datasets_config.json", "r"))
TASKS = datasets_params.keys()
def make_default_md():
leaderboard_md = "LeaderBoard"
return leaderboard_md
def make_model_desc_md():
with open("docs/description.md", "r") as f:
description = f.read()
return description
def make_overall_table_by_tasks(files):
results = defaultdict(list)
result_dct = {}
for file in files:
if not file.endswith("json"): continue
path = "results/" + file
data = json.load(open(path))
model_name = file.split('/')[-1].split(".json")[0]
result_dct[model_name] = {}
for dataset in data.keys():
if dataset == "total_score":
result_dct[model_name][dataset] = round(data[dataset] * 100, 1)
continue
result_dct[model_name][dataset] = round(data[dataset]["dataset_total_score"] * 100, 1)
for file in files:
if not file.endswith("json"): continue
model_name = file.split('/')[-1].split(".json")[0]
results['Model'].append(model_name)
for key in result_dct[model_name].keys():
if key == "total_score":
results["Total Score"].append(result_dct[model_name][key])
else:
results[datasets_params[key]["name"]].append(result_dct[model_name][key])
table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False)
cols = table.columns.tolist()
cols = [cols[0]] + [cols[22]] + cols[1:22]
return table[cols]
def make_overall_table_by_lengths(files):
results = defaultdict(list)
result_dct = {}
for file in files:
if not file.endswith("json"): continue
path = "results/" + file
data = json.load(open(path))
model_name = file.split('/')[-1].split(".json")[0]
result_dct[model_name] = {}
for dataset in data.keys():
if dataset == "total_score":
result_dct[model_name][dataset] = data[dataset]
continue
for length in data[dataset].keys():
if length == "dataset_total_score": continue
if length not in result_dct[model_name]:
result_dct[model_name][length] = []
result_dct[model_name][length].append(data[dataset][length])
for model_name in result_dct.keys():
for length in result_dct[model_name].keys():
result_dct[model_name][length] = round(np.mean(result_dct[model_name][length]) * 100, 1)
for file in files:
if not file.endswith("json"): continue
model_name = file.split('/')[-1].split(".json")[0]
results['Model'].append(model_name)
for key in result_dct[model_name].keys():
if key == "total_score":
results["Total Score"].append(result_dct[model_name][key])
else:
results[key].append(result_dct[model_name][key])
table = pd.DataFrame(results).sort_values(['Total Score'], ascending=False)
cols = table.columns.tolist()
cols = [cols[0]] + [cols[7]] + cols[1:7]
return table[cols]
def load_model(files, tab_name):
results = defaultdict(list)
for file in files:
if not file.endswith("json"): continue
model_name = file.split('/')[-1].split(".json")[0]
results['Model'].append(model_name)
result = json.load(open("results/" + file, "r"))
for length in LENGTHS:
if length in result[tab_name].keys():
if length == "dataset_total_score":
results["Dataset Total Score"].append(round(result[tab_name][length] * 100, 1))
continue
results[length].append(round(result[tab_name][length] * 100, 1))
else:
results[length].append("-")
return pd.DataFrame(results).sort_values(['Dataset Total Score'], ascending=False)
def build_leaderboard_tab(files):
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
with gr.Tabs() as tabs:
with gr.Tab("Results by Lengths", id=0):
df = make_overall_table_by_lengths(files)
gr.Dataframe(
headers=[
"Model",
] + LENGTHS,
datatype=[
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
wrap=True,
)
with gr.Tab("Results by Tasks", id=1):
df = make_overall_table_by_tasks(files)
gr.Dataframe(
headers=[
"Model",
] + LENGTHS,
datatype=[
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str"
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
wrap=False,
)
for tab_id, tab_name in enumerate(TASKS):
df = load_model(files, tab_name)
with gr.Tab(datasets_params[tab_name]["name"], id=tab_id+2):
gr.Dataframe(
headers=[
"Model",
] + LENGTHS,
datatype=[
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
wrap=True,
)
with gr.Tab("Description", id=tab_id + 3):
desc_md = make_model_desc_md()
gr.Markdown(desc_md, elem_id="leaderboard_markdown")
return [md_1]
def build_demo(files):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(title="LIBRA leaderboard",
theme=gr.themes.Base(text_size=text_size)) as demo:
build_leaderboard_tab(files)
return demo
if __name__ == "__main__":
files = os.listdir("results")
demo = build_demo(files)
demo.launch(share=False)