babilong / app.py
mbur's picture
Update app.py
40dad09 verified
raw
history blame
5.57 kB
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
import ast
import argparse
import glob
import pickle
import gradio as gr
import numpy as np
import pandas as pd
import os
from collections import defaultdict
from matplotlib.colors import LinearSegmentedColormap
def make_default_md():
leaderboard_md = f"""
# πŸ”ŽπŸ“šπŸͺ‘πŸ“šβ“ BABILong Leaderboard πŸ†
[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-lg.svg)](https://huggingface.co/datasets/booydar/babilong)
| [GitHub](https://github.com/booydar/recurrent-memory-transformer/) | [Paper](https://arxiv.org/abs/2406.10149) | [Dataset](https://github.com/booydar/babilong/) |
"""
return leaderboard_md
def make_arena_leaderboard_md(total_models):
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: May 09, 2024."""
return leaderboard_md
def make_model_desc_md(f_len):
desc_md = make_arena_leaderboard_md(f_len)
models = next(os.walk('info'))[2]
for model in models:
model_name = model.split('.md')[0]
with open(os.path.join('info', model), 'r') as f:
description = f.read()
desc_md += f"\n\n### {model_name}\n{description}"
return desc_md
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_model(folders, tab_name, msg_lengths):
results = defaultdict(list)
class NA():
def __repr__(self) -> str:
return '-'
def __float__(self):
return 0.0
mean_score = []
for i, folder in enumerate(folders):
model_name = folder.split('/')[-1]
results['Rank'].append(i)
results['Model'].append(model_name)
for task in msg_lengths:
if not os.path.isfile(f'{folder}/{tab_name}/{task}.csv'):
results[msg_lengths[task]].append(NA())
else:
df = pd.read_csv(f'{folder}/{tab_name}/{task}.csv')
results[msg_lengths[task]].append(int(df['result'].sum() / len(df) * 100))
mean_score.append(-np.mean([float(results[msg_lengths[task]][i]) for task in list(msg_lengths.keys())[:5]]))
for rank, i in enumerate(np.argsort(mean_score)):
results['Rank'][i] = rank + 1
return pd.DataFrame(results).sort_values(['Rank'])
def build_leaderboard_tab(folders):
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
msg_lengths = {
'0': '0k',
'4000': '4k',
'8000': '8k',
'16000': '16k',
'32000': '32k',
'64000': '64k',
'128000': '128k',
'500000': '500k',
'1000000': '1M',
'10000000': '10M'
}
with gr.Tabs() as tabs:
for tab_id, tab_name in enumerate(['qa1','qa2', 'qa3', 'qa4', 'qa5']):
df = load_model(folders, tab_name, msg_lengths)
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
# arena table
with gr.Tab(tab_name, id=tab_id):
md = make_arena_leaderboard_md(len(folders))
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"Model",
] + list(msg_lengths.values()),
datatype=[
"str",
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[20, 150] + [20] * len(msg_lengths),
wrap=True,
)
with gr.Tab("Description", id=tab_id + 1):
desc_md = make_model_desc_md(len(folders))
gr.Markdown(desc_md, elem_id="leaderboard_markdown")
return [md_1]
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def build_demo(folders):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="Babilong leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab(folders)
return demo
if __name__ == "__main__":
folders = [f'results/{folders}' for folders in os.listdir('results')]
demo = build_demo(folders)
demo.launch(share=False)