Spaces:
Running
Running
import json | |
import re | |
import gradio as gr | |
import numpy | |
import pandas as pd | |
from src.display.css_html_js import custom_css | |
from src.about import ( | |
INTRODUCTION_TEXT, | |
TITLE, | |
AUTHORS, | |
) | |
from src.display.formatting import make_clickable_model | |
demo = gr.Blocks(css=custom_css) | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
NUMBER_OF_QUESTIONS = 171.0 | |
# load dataframe from csv | |
# leaderboard_df = pd.read_csv("benchmark_results.csv") | |
leaderboard_df = [] | |
with open("benchmark_results.csv", "r") as f: | |
header = f.readline().strip().split(",") | |
header = [h.strip() for h in header] | |
for i, line in enumerate(f): | |
leaderboard_df.append(line.strip().split(",", 13)) | |
metadata = json.load(open('metadata.json')) | |
for k, v in list(metadata.items()): | |
metadata[k.split(",")[0]] = v | |
# create dataframe from list and header | |
leaderboard_df = pd.DataFrame(leaderboard_df, columns=header) | |
# filter column with value eq-bench_v2_pl | |
print(header) | |
leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | ( | |
leaderboard_df["Benchmark Version"] == 'eq-bench_pl')] | |
# fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all(). | |
# leave only defined columns | |
leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]] | |
# create new column with model name | |
def parse_parseable(x): | |
if x["Num Questions Parseable"] == 'FAILED': | |
m = re.match(r'(\d+)\.0 questions were parseable', x["Error"]) | |
return m.group(1) | |
return x["Num Questions Parseable"] | |
leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply( | |
lambda x: parse_parseable(x), axis=1) | |
def fraction_to_percentage(numerator: float, denominator: float) -> float: | |
return (numerator / denominator) * 100 | |
leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].apply(lambda x: fraction_to_percentage(float(x), NUMBER_OF_QUESTIONS)) | |
def get_params(model_name): | |
if model_name in metadata: | |
return metadata[model_name] | |
else: | |
print(model_name) | |
return numpy.nan | |
leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x)) | |
# move column order | |
leaderboard_df = leaderboard_df[["Model Path", "Params", "Benchmark Score", "Num Questions Parseable", 'Error']] | |
leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x)) | |
# change value of column to nan | |
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan) | |
#scale Benchmark Score by Num Questions Parseable*100 | |
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) * ((leaderboard_df["Num Questions Parseable"].astype(float) / 100)) | |
# set datatype of column | |
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) | |
leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float) | |
# set nan if value of column is less than 0 | |
leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0 | |
# sort by 2 columns | |
leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"], | |
ascending=[False, False]) | |
# rename columns | |
leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"}) | |
leaderboard_df = leaderboard_df.rename(columns={"Num Questions Parseable": "Percentage Questions Parseable"}) | |
leaderboard_df_styled = leaderboard_df.style.background_gradient(cmap="RdYlGn") | |
leaderboard_df_styled = leaderboard_df_styled.background_gradient(cmap="RdYlGn_r", subset=['Params']) | |
rounding = {} | |
# for col in ["Benchmark Score", "Num Questions Parseable"]: | |
rounding["Benchmark Score"] = "{:.2f}" | |
rounding["Percentage Questions Parseable"] = "{:.2f}" | |
rounding["Params"] = "{:.0f}" | |
leaderboard_df_styled = leaderboard_df_styled.format(rounding) | |
leaderboard_table = gr.components.Dataframe( | |
value=leaderboard_df_styled, | |
# headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value, | |
datatype=['markdown', 'number', 'number', 'number', 'str'], | |
elem_id="leaderboard-table", | |
interactive=False, | |
visible=True, | |
) | |
gr.Markdown(AUTHORS, elem_classes="markdown-text") | |
demo.queue(default_concurrency_limit=40).launch() | |