Spaces:
Running
Running
import json | |
import os | |
import re | |
import uuid | |
import random | |
from pathlib import Path | |
import pandas as pd | |
import streamlit as st | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from datasets import load_dataset | |
from huggingface_hub import CommitScheduler, hf_hub_download | |
from huggingface_hub.utils import RepositoryNotFoundError | |
from yaml import safe_load as yaml_load | |
from src.check_validity import validate_model | |
from src.task_mappings import professional_mapping, semantic_categories | |
# ----------------------------------------------------------------------------- | |
# Page configuration and global CSS styles for modern look and improved UX | |
# ----------------------------------------------------------------------------- | |
st.set_page_config( | |
page_title="IberBench", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
page_icon="๐", | |
) | |
st.markdown( | |
""" | |
<style> | |
/* General page styling */ | |
body { | |
background-color: #f7f7f7; | |
font-family: 'Segoe UI', sans-serif; | |
} | |
/* Sidebar styling */ | |
.css-1d391kg { | |
background-color: #ffffff; | |
border-right: 2px solid #eaeaea; | |
} | |
/* Header styling */ | |
.main-header { | |
text-align: center; | |
padding: 2rem 0; | |
background: linear-gradient(90deg, #007BFF, #00BFFF); | |
color: white; | |
border-radius: 10px 10px 10px 10px; | |
} | |
/* Tab styling */ | |
.stTabs > .css-1qimj2v { | |
background: #fff; | |
} | |
/* Form styling */ | |
.stButton>button { | |
background-color: #007BFF; | |
color: white; | |
border: none; | |
border-radius: 5px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
# ----------------------------------------------------------------------------- | |
# Global variables and helper functions | |
# ----------------------------------------------------------------------------- | |
request_file = Path("user_request/") / f"data_{uuid.uuid4()}.json" | |
request_folder = request_file.parent | |
LANGUAGES_SETTINGS = Path("etc/languages_settings.yml") | |
dataset_columns = [ | |
"workshop", | |
"shared_task", | |
"year", | |
"task_type", | |
"language", | |
"url", | |
"language_variety", | |
"problem_type", | |
"num_labels", | |
"labels", | |
] | |
model_columns = ["model_name", "model_type", "num_parameters"] | |
scheduler = CommitScheduler( | |
repo_id="iberbench/user-requests", | |
repo_type="dataset", | |
private=True, | |
folder_path=request_folder, | |
token=st.secrets["HF_TOKEN"], | |
path_in_repo="data", | |
every=10, | |
) | |
def log_submission(input_dict: dict) -> None: | |
with scheduler.lock: | |
with request_file.open("a") as f: | |
f.write(json.dumps(input_dict)) | |
f.write("\n") | |
def get_lang_columns(columns: list, lang: str): | |
# Mixed needs to return all the columns that ends | |
# with the language, but doesn't have variation at the end | |
if "Mixed" in lang: | |
lang = lang.lower().split(" ")[0] | |
return [col for col in columns if col.endswith(lang)] | |
else: | |
lang_norm = lang.lower().replace(" ", "_") | |
return [col for col in columns if lang_norm in col] | |
def load_data(lang) -> pd.DataFrame: | |
try: | |
data = load_dataset( | |
"iberbench/lm-eval-results", token=st.secrets["HF_TOKEN"] | |
)["train"].to_pandas() | |
task_columns = [col for col in data.columns if col not in model_columns] | |
task_lang_columns = get_lang_columns(task_columns, lang) | |
data[task_columns] = data[task_columns] * 100 | |
data = data[model_columns + task_lang_columns] | |
# data["Active"] = False | |
return data | |
except FileNotFoundError: | |
st.error("iberbench/lm-eval-results was not found in the hub ๐") | |
return pd.DataFrame() | |
def load_dataset_card(task) -> list: | |
name_repo = "iberbench/" + task | |
try: | |
info_path = hf_hub_download( | |
repo_id=name_repo, | |
filename="task_metadata.json", | |
repo_type="dataset", | |
) | |
with open(info_path, "r") as f: | |
info = json.load(f) | |
values_ = [] | |
for i in dataset_columns: | |
if i in info: | |
values_.append(info[i]) | |
else: | |
values_.append([] if i == "labels" else "-") | |
return values_ | |
except RepositoryNotFoundError: | |
st.error(task + ": dataset was not found in the hub ๐ซ") | |
return ["-"] * len(dataset_columns) | |
def active_data(lang) -> pd.DataFrame: | |
return st.session_state[f"leaderboard_data_{lang}"][ | |
st.session_state[f"leaderboard_data_{lang}"]["Active"] == True | |
].copy() | |
def get_index(lang, row) -> pd.Series: | |
return active_data(lang).iloc[row].name | |
def commit(lang) -> None: | |
for row in st.session_state[f"edited_data_{lang}"]["edited_rows"]: | |
row_index = get_index(lang, row) | |
for key, value in st.session_state[f"edited_data_{lang}"][ | |
"edited_rows" | |
][row].items(): | |
st.session_state[f"leaderboard_data_{lang}"].at[ | |
row_index, key | |
] = value | |
# ----------------------------------------------------------------------------- | |
# Visualization helper functions | |
# ----------------------------------------------------------------------------- | |
def create_table_results(df_mean: pd.DataFrame): | |
rank_value = [] | |
for i in df_mean["Mean"].rank(method="dense", ascending=False).astype(int): | |
if i == 1: | |
rank_value.append(f"{i} ๐ฅ") | |
elif i == 2: | |
rank_value.append(f"{i} ๐ฅ") | |
elif i == 3: | |
rank_value.append(f"{i} ๐ฅ") | |
else: | |
rank_value.append(str(i)) | |
df_mean.insert(0, "Rank", rank_value) | |
df_final = df_mean.sort_values("Mean", ascending=False) | |
st.dataframe( | |
df_final, | |
hide_index=True, | |
use_container_width=True, | |
column_config={ | |
"model_name": st.column_config.TextColumn("Model ๐ง "), | |
"model_type": st.column_config.TextColumn("Type ๐"), | |
"num_parameters": st.column_config.NumberColumn("Model Size ๐ข"), | |
}, | |
) | |
def create_table_all_results(aggregated_df: pd.DataFrame): | |
combined_df = create_data_results_per_language() | |
df_lang = combined_df.pivot( | |
index="model_name", columns="language", values="Mean" | |
) | |
aggregated_df[df_lang.columns] = df_lang[df_lang.columns].values | |
rank_value = [] | |
for i in ( | |
aggregated_df["Mean"].rank(method="dense", ascending=False).astype(int) | |
): | |
if i == 1: | |
rank_value.append(f"{i} ๐ฅ") | |
elif i == 2: | |
rank_value.append(f"{i} ๐ฅ") | |
elif i == 3: | |
rank_value.append(f"{i} ๐ฅ") | |
else: | |
rank_value.append(str(i)) | |
aggregated_df.insert(0, "Rank", rank_value) | |
df_final = aggregated_df.sort_values("Mean", ascending=False) | |
st.dataframe( | |
df_final, | |
hide_index=True, | |
use_container_width=True, | |
column_config={ | |
"model_name": st.column_config.TextColumn("Model ๐ง "), | |
"model_type": st.column_config.TextColumn("Type ๐"), | |
"num_parameters": st.column_config.NumberColumn("Model Size ๐ข"), | |
}, | |
) | |
def create_scatter_chart(df: pd.DataFrame, id_: str): | |
fig = px.scatter( | |
df, | |
x="num_parameters", | |
y="Mean", | |
color="model_name", | |
size="num_parameters", | |
hover_data=["model_type"], | |
labels={"num_parameters": "Num parameters"}, | |
) | |
fig.update_layout(template="plotly_white") | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def create_radar_chart(df: pd.DataFrame, id_: str): | |
df = df.sort_values(by="Mean", ascending=False) | |
radar_df = pd.DataFrame( | |
{"r": df["Mean"][:10], "theta": df["model_name"][:10]} | |
) | |
fig = px.line_polar( | |
radar_df, | |
r="r", | |
theta="theta", | |
line_close=True, | |
markers=True, | |
) | |
fig.update_traces(fill="toself") | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def create_pie_chart(df: pd.DataFrame, id_: str): | |
df_pie = df["model_type"].value_counts().reset_index() | |
df_pie.columns = ["model_type", "count"] | |
fig = px.pie( | |
df_pie, | |
values="count", | |
names="model_type", | |
labels={"model_type": "Model type"}, | |
) | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def create_box_plot(df: pd.DataFrame, id_: str): | |
fig = px.box( | |
df, | |
x="model_type", | |
y="Mean", | |
points="all", | |
labels={"model_type": "Model type"}, | |
) | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def get_summary_df(lang: str, task_types: list) -> pd.DataFrame: | |
df = st.session_state[f"leaderboard_data_{lang}"][model_columns].copy() | |
if not st.session_state[f"leaderboard_data_{lang}"].empty: | |
for t in task_types: | |
task_list = semantic_categories[t] | |
cols = [ | |
col | |
for col in st.session_state[f"leaderboard_data_{lang}"].columns | |
if "iberbench/" + col in task_list | |
] | |
if cols: | |
tmp = st.session_state[f"leaderboard_data_{lang}"][cols] | |
df[t] = tmp.mean(axis=1).round(2) | |
if df.shape[1] > 4: | |
df.insert(3, "Mean", df.iloc[:, 3:-1].mean(axis=1).round(2)) | |
else: | |
df.insert(3, "Mean", df.iloc[:, 3].round(2)) | |
return df | |
def get_all_languages_summary_df() -> pd.DataFrame: | |
"""Combine leaderboard summary data from all languages using get_summary_df.""" | |
combined_df = pd.DataFrame() | |
for key in st.session_state: | |
if key.startswith("leaderboard_data_"): | |
lang = key.split("leaderboard_data_")[1] | |
task_types = select_task_per_language(lang) | |
summary_df = get_summary_df(lang, task_types) | |
summary_df["language"] = lang | |
combined_df = pd.concat( | |
[combined_df, summary_df], ignore_index=True | |
) | |
return combined_df | |
def create_results_visualization_lang(lang: str): | |
# --------------------------- | |
# In-language plots section | |
# --------------------------- | |
task_types = select_task_per_language(lang) | |
summary_df = get_summary_df(lang, task_types) | |
tasks_df = st.session_state[f"leaderboard_data_{lang}"].copy() | |
create_table_results(summary_df) | |
st.markdown("### Language plots ๐") | |
# Display the results table for the selected language | |
in_lang_tabs = st.tabs( | |
[ | |
"Top 10 performance ๐ฅ", | |
"Performance vs. size ๐", | |
"Performance per type ๐ก", | |
"Fundamental vs industry โ๏ธ", | |
"Performance per task category ๐", | |
] | |
) | |
with in_lang_tabs[0]: | |
create_radar_chart(summary_df, lang + "in_radar") | |
with in_lang_tabs[1]: | |
create_scatter_chart(summary_df, lang + "in_scatter") | |
with in_lang_tabs[2]: | |
create_box_plot(summary_df, lang + "in_box") | |
with in_lang_tabs[3]: | |
create_box_plot_per_task_category(tasks_df, lang + "in_box_task_cat") | |
with in_lang_tabs[4]: | |
create_box_plot_per_semantic_category(tasks_df, lang + "in_box_sem_cat") | |
# ----------------------------------------------------------------------------- | |
# Functions for other visualization sections | |
# ----------------------------------------------------------------------------- | |
def select_task_per_language(lang: str): | |
types = [] | |
for k, v in semantic_categories.items(): | |
for vv in v: | |
task_name = vv.split("iberbench/")[1] | |
if task_name in list( | |
st.session_state[f"leaderboard_data_{lang}"].columns | |
): | |
if k not in types: | |
types.append(k) | |
return types | |
def create_dataset_info_per_language(lang: str): | |
all_values = [] | |
if not st.session_state[f"leaderboard_data_{lang}"].empty: | |
cols = [ | |
col | |
for col in st.session_state[f"leaderboard_data_{lang}"].columns | |
if col not in model_columns | |
] | |
if len(cols) > 1: | |
for task in cols[:-1]: | |
values = load_dataset_card(task) | |
all_values.append(values) | |
else: | |
values = load_dataset_card(cols[0]) | |
all_values.append(values) | |
df = pd.DataFrame(all_values, columns=dataset_columns) | |
st.dataframe( | |
df, | |
column_config={ | |
"workshop": st.column_config.TextColumn( | |
"Workshop ๐ซ", help="Workshop to belong to the shared task" | |
), | |
"shared_task": st.column_config.TextColumn( | |
"Shared Task ๐", help="Shared Task name" | |
), | |
"year": st.column_config.TextColumn( | |
"Year ๐ ", help="Year of the shared task" | |
), | |
"task_type": st.column_config.TextColumn( | |
"Task Type ๐", help="Shared Task type" | |
), | |
"language": st.column_config.TextColumn( | |
"Language ๐", help="Shared Task language" | |
), | |
"url": st.column_config.ListColumn( | |
"Task URL ๐", help="Shared Task url" | |
), | |
"language_variety": st.column_config.TextColumn( | |
"Language Variety ๐ฃ๏ธ", help="Shared Task language variety" | |
), | |
"problem_type": st.column_config.TextColumn( | |
"Problem Type โ", help="Shared Task problem type" | |
), | |
"num_labels": st.column_config.NumberColumn( | |
"Number of Labels ๐ข", help="Shared Task number of labels" | |
), | |
"labels": st.column_config.ListColumn( | |
"Labels ๐ท๏ธ", help="Shared Task labels" | |
), | |
}, | |
hide_index=True, | |
) | |
else: | |
st.write("No data found to display on leaderboard ๐.") | |
def create_box_plot_per_task_category(df: pd.DataFrame, id_: str): | |
# Compute average performance for each professional category (using professional_mapping). | |
melt_vars = [] | |
for category, tasks in professional_mapping.items(): | |
relevant_cols = [ | |
col for col in df.columns if "iberbench/" + col in tasks | |
] | |
if relevant_cols: | |
df[category] = df[relevant_cols].mean(axis=1).round(2) | |
melt_vars.append(category) | |
melt_vars = list(set(melt_vars)) | |
id_vars = model_columns.copy() | |
if "language" in df.columns: | |
id_vars.append("language") | |
df_melt = df.melt( | |
id_vars=id_vars, | |
value_vars=melt_vars, | |
var_name="Task Category", | |
value_name="Performance", | |
) | |
fig = px.box( | |
df_melt, | |
x="Task Category", | |
y="Performance", | |
points="all", | |
labels={"Performance": "Performance (%)"}, | |
) | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def create_box_plot_per_semantic_category(df: pd.DataFrame, id_: str): | |
# Compute average performance for each semantic category defined in semantic_categories. | |
melt_vars = [] | |
for category, tasks in semantic_categories.items(): | |
relevant_cols = [ | |
col for col in df.columns if "iberbench/" + col in tasks | |
] | |
if relevant_cols: | |
df[category] = df[relevant_cols].mean(axis=1).round(2) | |
melt_vars.append(category) | |
melt_vars = list(set(melt_vars)) | |
id_vars = model_columns.copy() | |
if "language" in df.columns: | |
id_vars.append("language") | |
df_melt = df.melt( | |
id_vars=id_vars, | |
value_vars=melt_vars, | |
var_name="Task Category", | |
value_name="Performance", | |
) | |
fig = px.box( | |
df_melt, | |
x="Task Category", | |
y="Performance", | |
points="all", | |
labels={"Performance": "Performance (%)"}, | |
) | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def create_histogram(df: pd.DataFrame, id_: str): | |
fig = px.histogram( | |
df, | |
x="num_parameters", | |
nbins=20, | |
labels={"num_parameters": "Num parameters", "count": "Count"}, | |
) | |
fig.update_layout(template="plotly_white") | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def create_data_results_per_language() -> pd.DataFrame: | |
# Create a combined dataframe from all leaderboard data in session_state. | |
combined_df = pd.DataFrame() | |
for key in st.session_state.keys(): | |
if key.startswith("leaderboard_data_"): | |
temp_df = st.session_state[key].copy() | |
# If the "language" column is missing, use the key to assign a language name. | |
if "language" not in temp_df.columns: | |
lang = key.split("leaderboard_data_")[1] | |
temp_df["language"] = lang | |
combined_df = pd.concat([combined_df, temp_df], ignore_index=True) | |
if combined_df.empty: | |
st.warning("No data available for any language โ ๏ธ.") | |
return | |
# Check if the "Mean" column exists. If not, compute it. | |
if "Mean" not in combined_df.columns: | |
# Define model metadata columns that should be excluded from the performance calculation. | |
model_columns = ["model_name", "model_type", "num_parameters"] | |
# Exclude metadata, language, and any non-numeric columns. | |
performance_cols = [ | |
col | |
for col in combined_df.columns | |
if col not in model_columns + ["language", "Active"] | |
and pd.api.types.is_numeric_dtype(combined_df[col]) | |
] | |
if performance_cols: | |
combined_df["Mean"] = ( | |
combined_df[performance_cols].mean(axis=1).round(2) | |
) | |
else: | |
st.warning( | |
"No numeric task performance columns available to compute 'Mean' โ ๏ธ." | |
) | |
return | |
return combined_df | |
def create_box_plot_per_language(id_: str): | |
# Create a boxplot with performance (Mean) per language. | |
combined_df = create_data_results_per_language() | |
fig = px.box( | |
combined_df, | |
x="language", | |
y="Mean", | |
points="all", | |
labels={"language": "Language", "Mean": "Performance (%)"}, | |
) | |
st.plotly_chart( | |
fig, use_container_width=True, key=id_ + str(random.random()) | |
) | |
def get_all_languages_summary_df() -> pd.DataFrame: | |
"""Combine leaderboard summary data from all languages using get_summary_df.""" | |
combined_df = pd.DataFrame() | |
for key in st.session_state: | |
if key.startswith("leaderboard_data_"): | |
lang = key.split("leaderboard_data_")[1] | |
task_types = select_task_per_language(lang) | |
summary_df = get_summary_df(lang, task_types) | |
summary_df["language"] = lang | |
combined_df = pd.concat( | |
[combined_df, summary_df], ignore_index=True | |
) | |
return combined_df | |
def get_all_languages_aggregated_summary_df() -> pd.DataFrame: | |
""" | |
Aggregate the combined summary data by model_name to compute mean performance | |
across languages. Use this aggregated data for radar, scatter, pie, box, and histogram plots. | |
""" | |
df = get_all_languages_summary_df() | |
agg_df = df.groupby("model_name", as_index=False).agg( | |
{ | |
"model_type": "first", # choose an aggregation that makes sense | |
"num_parameters": "mean", # average model size across languages | |
"Mean": "mean", # average performance | |
} | |
) | |
agg_df["Mean"] = agg_df["Mean"].round(2) | |
return agg_df | |
def get_all_languages_raw_df() -> pd.DataFrame: | |
""" | |
Combine the raw leaderboard data from all languages. | |
This is used for plots (e.g., Fundamental vs Professional) that rely on the original task columns. | |
""" | |
combined_df = pd.DataFrame() | |
for key in st.session_state: | |
if key.startswith("leaderboard_data_"): | |
lang = key.split("leaderboard_data_")[1] | |
temp_df = st.session_state[key].copy() | |
temp_df["language"] = lang | |
combined_df = pd.concat([combined_df, temp_df], ignore_index=True) | |
return combined_df | |
# ----------------------------------------------------------------------------- | |
# Sidebar for Navigation and Global Settings | |
# ----------------------------------------------------------------------------- | |
st.sidebar.markdown( | |
"<h2 style='text-align: center;'>IberBench ๐</h2>", unsafe_allow_html=True | |
) | |
menu = st.sidebar.radio( | |
"", ["Leaderboard ๐", "Submit Model ๐", "Datasets ๐", "About โน๏ธ"] | |
) | |
st.sidebar.markdown("---") | |
st.sidebar.markdown( | |
""" | |
<p style="font-size:0.9rem; text-align:center;"> | |
A leaderboard of LLMs on languages from the Iberian Peninsula and Ibero-America | |
</p> | |
""", | |
unsafe_allow_html=True, | |
) | |
def load_languages_set(): | |
with open(LANGUAGES_SETTINGS, "r") as f: | |
return yaml_load(f) | |
lang_set = load_languages_set() | |
for lang in lang_set.keys(): | |
data = load_data(lang) | |
if f"leaderboard_data_{lang}" not in st.session_state: | |
st.session_state[f"leaderboard_data_{lang}"] = data | |
# ----------------------------------------------------------------------------- | |
# Main Content based on Navigation | |
# ----------------------------------------------------------------------------- | |
if menu == "Leaderboard ๐": | |
st.markdown( | |
"<div class='main-header'><h1>Leaderboard ๐</h1></div>", | |
unsafe_allow_html=True, | |
) | |
lang_iber = [ | |
k | |
for k, v in lang_set.items() | |
if v["category"] == "Iberian Peninsula languages" | |
] | |
st.markdown("### General ranking ๐") | |
# --------------------------- | |
# All-language plots section | |
# --------------------------- | |
# Use aggregated data for plots where each model must appear once with averaged values. | |
aggregated_df = get_all_languages_aggregated_summary_df() | |
create_table_all_results(aggregated_df) | |
st.markdown("### General plots ๐") | |
# Use raw data for Fundamental vs Professional and Task Category plots. | |
raw_all_df = get_all_languages_raw_df() | |
all_lang_tabs = st.tabs( | |
[ | |
"Top 10 performance ๐ฅ", | |
"Performance vs. size ๐", | |
"Type distribution ๐จ", | |
"Performance per type ๐ก", | |
"Distribution of sizes ๐", | |
"Fundamental vs industry โ๏ธ", | |
"Performance per task category ๐", | |
"Performance per language ๐", | |
] | |
) | |
with all_lang_tabs[0]: | |
create_radar_chart(aggregated_df, "all_radar") | |
with all_lang_tabs[1]: | |
create_scatter_chart(aggregated_df, "all_scatter") | |
with all_lang_tabs[2]: | |
create_pie_chart(aggregated_df, "all_pie") | |
with all_lang_tabs[3]: | |
create_box_plot(aggregated_df, "all_box") | |
with all_lang_tabs[4]: | |
create_histogram(aggregated_df, "all_hist") | |
with all_lang_tabs[5]: | |
# Use the raw combined data so that professional task columns are available. | |
create_box_plot_per_task_category(raw_all_df, "all_box_task_cat") | |
with all_lang_tabs[6]: | |
create_box_plot_per_semantic_category(raw_all_df, "all_box_sem_cat") | |
with all_lang_tabs[7]: | |
create_box_plot_per_language("all_box_language") | |
# Results per language | |
st.markdown("---") | |
st.markdown("### Language ranking ๐") | |
lang_choice = st.selectbox( | |
"Select a language ๐:", list(lang_iber), key="lang_leaderboard" | |
) | |
if lang_choice == "Spanish": | |
variations = [ | |
k | |
for k, v in lang_set.items() | |
if v["category"] in ["Spanish Variations languages"] | |
] | |
tabs_var = st.tabs(variations) | |
for var, tab in zip(variations, tabs_var): | |
with tab: | |
create_results_visualization_lang(var) | |
else: | |
create_results_visualization_lang(lang_choice) | |
elif menu == "Submit Model ๐": | |
st.markdown( | |
"<div class='main-header'><h1>Submit Your Model ๐</h1></div>", | |
unsafe_allow_html=True, | |
) | |
st.markdown("## How to submit a model ๐ค") | |
# CSS | |
st.markdown( | |
""" | |
<style> | |
.card-container { | |
max-width: 300px; | |
margin: auto; | |
text-align: left; | |
font-size: 1rem; | |
padding: 0.5rem; | |
box-sizing: border-box; | |
} | |
.id-container { | |
display: flex; | |
align-items: center; | |
margin-bottom: 1rem; | |
} | |
.id-circle { | |
width: 32px; | |
height: 32px; | |
border-radius: 50%; | |
display: flex; | |
align-items: center; | |
justify-content: center; | |
border: 1px solid #007BFF; | |
color: #007BFF; | |
font-size: 0.875rem; | |
font-weight: 600; | |
background-color: transparent; | |
margin-right: 8px; | |
} | |
.guide-content { | |
word-wrap: break-word; | |
} | |
.guide-title { | |
font-weight: bold; | |
font-size: 1rem; | |
margin-left: 8px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
def render_card(content): | |
html = f""" | |
<div class="card-container"> | |
<div class="guide-content"> | |
{content} | |
</div> | |
</div> | |
""" | |
return html | |
# Load your HTML content from files | |
guide_info_list = [] | |
html_path = "assets/html" | |
filenames = sorted(os.listdir(html_path)) | |
for filename in filenames: | |
file_path = os.path.join(html_path, filename) | |
with open(file_path, "r", encoding="utf-8") as file: | |
raw_html = file.read() | |
guide_info_list.append(raw_html) | |
# Create the grid | |
num_columns = 3 | |
num_rows = 2 | |
for row in range(num_rows): | |
cols = st.columns(num_columns) | |
for col in range(num_columns): | |
index = row * num_columns + col | |
if index < len(guide_info_list): | |
with cols[col]: | |
st.markdown( | |
render_card(guide_info_list[index]), | |
unsafe_allow_html=True, | |
) | |
st.markdown("## Submission form ๐") | |
with st.form("submit_model_form", clear_on_submit=True): | |
model_name = st.text_input( | |
"Model Name (format: user_name/model_name) ๐งฉ", | |
help="Your model should be public on the Hub and follow the username/model-id format (e.g. mistralai/Mistral-7B-v0.1).", | |
) | |
description = st.text_area( | |
"Description โ๏ธ", | |
help="Add a description of the proposed model for the evaluation to help prioritize its evaluation.", | |
) | |
user_contact = st.text_input( | |
"Your Contact Email ๐ง", | |
help="User e-mail to contact when there are updates.", | |
) | |
precision_option = st.selectbox( | |
"Choose precision format ๐ข:", | |
help="Size limits vary by precision. Choose carefully as incorrect precision can cause evaluation errors.", | |
options=["float16", "bfloat16", "8bit", "4bit", "GPTQ"], | |
index=0, | |
) | |
weight_type_option = st.selectbox( | |
"Select weight type โ๏ธ:", | |
help="Original: Complete model weights. Delta: Differences from base model. Adapter: Lightweight fine-tuning layers.", | |
options=["Original", "Adapter", "Delta"], | |
index=0, | |
) | |
base_model_name = st.text_input( | |
"Base model (if applicable) ๐๏ธ", | |
help="Required for delta weights or adapters. This helps calculate total parameter count.", | |
value="", | |
) | |
model_type = st.selectbox( | |
"Choose model type ๐:", | |
help="๐ข Pretrained: Base models, ๐ถ Fine-tuned: Domain-specific, ๐ฌ Chat: Conversational, ๐ค Merge: Combined weights.", | |
options=["๐ข Pretrained", "๐ถ Fine-tuned", "๐ฌ Chat", "๐ค Merge"], | |
) | |
submit_button = st.form_submit_button("Submit Request ๐") | |
if submit_button: | |
use_chat_template = True if model_type == "๐ฌ Chat" else False | |
validation_error = validate_model( | |
model_name, | |
precision_option, | |
base_model_name, | |
weight_type_option, | |
use_chat_template, | |
) | |
if validation_error is not None: | |
st.error(validation_error) | |
elif not re.match(r"[^@]+@[^@]+\.[^@]+", user_contact): | |
st.error("Invalid email address โ ๏ธ.") | |
else: | |
input_dict = { | |
"model_name": model_name, | |
"description": description, | |
"user_contact": user_contact, | |
"precision_option": precision_option, | |
"weight_type_option": weight_type_option, | |
"base_model_name": base_model_name, | |
"model_type": model_type, | |
} | |
try: | |
log_submission(input_dict) | |
st.success("Your request has been sent successfully ๐.") | |
except Exception as e: | |
st.error( | |
f"Failed to send your request: {e}. Please try again later." | |
) | |
elif menu == "Datasets ๐": | |
st.markdown( | |
"<div class='main-header'><h1>Dataset Information ๐</h1></div>", | |
unsafe_allow_html=True, | |
) | |
st.markdown("### Check the datasets ๐") | |
lang_iber = [ | |
k | |
for k, v in lang_set.items() | |
if v["category"] == "Iberian Peninsula languages" | |
] | |
lang_choice = st.selectbox( | |
"Select a language ๐:", list(lang_iber), key="lang_dataset" | |
) | |
if lang_choice in ["Spanish"]: | |
variations = [ | |
k | |
for k, v in lang_set.items() | |
if v["category"] in ["Spanish Variations languages"] | |
] | |
tabs_var = st.tabs(variations) | |
for var, tab in zip(variations, tabs_var): | |
with tab: | |
create_dataset_info_per_language(var) | |
else: | |
create_dataset_info_per_language(lang_choice) | |
st.markdown("### Task mappings ๐") | |
st.markdown( | |
"For the sake of completeness, here we show the mappings we use in the leaderboard to aggregate tasks." | |
) | |
tab1, tab2 = st.tabs( | |
["Semantic categories ๐๏ธ", "Fundamental vs. Industry โ๏ธ"] | |
) | |
with tab1: | |
st.json( | |
{ | |
category: [task.removeprefix("iberbench/") for task in tasks] | |
for category, tasks in semantic_categories.items() | |
} | |
) | |
with tab2: | |
st.json( | |
{ | |
category: [task.removeprefix("iberbench/") for task in tasks] | |
for category, tasks in professional_mapping.items() | |
} | |
) | |
elif menu == "About โน๏ธ": | |
st.markdown( | |
"<div class='main-header'><h1>About โน๏ธ</h1></div>", | |
unsafe_allow_html=True, | |
) | |
with open("./assets/md/about.md", "r") as fr: | |
st.markdown(fr.read(), unsafe_allow_html=True) | |