Spaces:

MaxwellLyu
/

m-yass

Sleeping

m-yass / app.py

Maxwell Lyu

init

bcf4698 2 months ago

7.44 kB

	import json
	from dataclasses import dataclass, field, fields
	from functools import cached_property
	from pathlib import Path
	from typing import Literal
	from functools import partial
	from content import *

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.graph_objects as go
	import pandas as pd

	# This dataframe must contain the following columns:
	# - model: the name of the model
	# - language: the language of the model
	# - dataset: the dataset used to evaluate the model
	# - score: the score of the model on the (language, dataset) pair
	# - model_type: the type of the model (e.g. "Chat Model", "Base Model")
	df = pd.read_csv("data/raw_scores.csv")
	choices_language = list(df["language"].unique())
	choices_dataset = list(df["dataset"].unique())
	choices_model_type = list(df["model_type"].unique())

	# Utility functions for data processing
	reduce_functions = {
	"Mean": lambda x: np.mean(x),
	"Median": lambda x: np.median(x),
	"Max": lambda x: np.max(x),
	"Min": lambda x: np.min(x),
	}
	map_functions = {
	"Raw": lambda x: x,
	"Rank": partial(pd.Series.rank, ascending=False, method="dense"),
	"Normalize": lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)),
	}
	score_ascending = {
	"Raw": False,
	"Rank": True,
	"Normalize": False,
	}


	def prepare_dataframe(
	df: pd.DataFrame,
	filters: dict[str, list[str]],
	group_by: Literal["language", "dataset"],
	map_function: str,
	reduce_function: str,
	) -> pd.DataFrame:
	# Filters contains a value subset for each column
	language = filters["language"]
	dataset = filters["dataset"]

	# Columns are divided into two groups: other_columns and group_by_columns, apart from `model` which should be the index
	other_columns = list(set(df.columns) - set(["language", "dataset", "score"]))
	group_by_columns = filters[group_by]

	# Step 1: Filter the dataframe based on the selected language and dataset
	for k, v in filters.items():
	df = df[df[k].isin(v)]
	# If dataframe is empty, return an empty dataframe
	if len(df) == 0:
	gr.Warning(
	f"No scores remain after the filter application. Please verify the checkboxes."
	)
	return pd.DataFrame(columns=other_columns)
	# Sanity check: All score exists for each (language,dataset) pair
	score_count = (
	df.drop_duplicates(subset=["model", "language", "dataset"])
	.groupby(["model"])["score"]
	.count()
	)
	invalid_models = score_count[
	score_count < len(language) * len(dataset)
	].index.tolist()
	df = df[~df["model"].isin(invalid_models)]
	# Send a warning message if there are any invalid models
	for model in invalid_models:
	gr.Warning(
	f"<strong>{model}</strong> is lacking some scores thus hidden. Please report to the maintainers."
	)

	# Step 2: Process Scores
	# Step 2.0: Map the scores along each (language, dataset) pair
	df["score"] = df.groupby(["language", "dataset"])["score"].transform(
	map_functions[map_function]
	)
	# Step 2.1: Reduce the scores along the column other than `group_by`
	df = (
	df.groupby(other_columns + [group_by])
	.agg({"score": reduce_functions[reduce_function]})
	.reset_index()
	)
	# Step 2.2: Reduce the scores along `group_by` to get the overall score of each model
	reduced_col = df.groupby(other_columns).agg(
	{"score": reduce_functions[reduce_function]}
	)["score"]
	# Step 2.3: Pivot the dataframe, then concat the overall score
	df = df.pivot(index=other_columns, columns=group_by, values=["score"]).droplevel(
	0, 1
	)
	df["Overall Score"] = reduced_col

	# Step 3: Styling for display
	# - Sort the dataframe by the reduced score
	# - Sort the columns for better readability
	# - Highlight the maximum value in each column
	# - Format the score to 2 decimal places if it is a float
	other_columns.remove("model")
	df = (
	df.reset_index()[
	["model"] + other_columns + ["Overall Score"] + group_by_columns
	]
	.sort_values(by="Overall Score", ascending=score_ascending[map_function])
	.style.format(precision=2)
	)
	if score_ascending[map_function]:
	df = df.highlight_min(
	axis=0, color="#18864B", subset=["Overall Score"] + group_by_columns
	)
	else:
	df = df.highlight_max(
	axis=0, color="#18864B", subset=["Overall Score"] + group_by_columns
	)
	return df


	with gr.Blocks(theme=gr.themes.Base()) as demo:

	gr.Markdown(
	MARKDOWN_HEADER
	)

	with gr.Column():
	# UI definition
	checkbox_language = gr.CheckboxGroup(
	choices=choices_language,
	value=choices_language,
	label="Language(s)",
	interactive=True,
	)
	checkbox_dataset = gr.CheckboxGroup(
	choices=choices_dataset,
	value=choices_dataset,
	label="Dataset(s)",
	interactive=True,
	)
	checkbox_model_type = gr.CheckboxGroup(
	choices=choices_model_type,
	value=choices_model_type,
	label="Model Type(s)",
	interactive=True,
	)
	dropdown_map_function = gr.Dropdown(
	choices=map_functions.keys(),
	value="Raw",
	label="Map Function",
	interactive=True,
	info=MARKDOWN_MAP_FUNCTION
	)
	dropdown_reduce_function = gr.Dropdown(
	choices=reduce_functions.keys(),
	value="Mean",
	label="Reduce Function",
	interactive=True,
	info=MARKDOWN_REDUCE_FUNCTION
	)
	ratio_group_by = gr.Radio(
	choices=["language", "dataset"],
	value="language",
	label="Group by",
	interactive=True,
	)

	dataframe = gr.DataFrame(
	prepare_dataframe(
	df=df,
	filters={
	"language": choices_language,
	"dataset": choices_dataset,
	},
	group_by="language",
	map_function="Raw",
	reduce_function="Mean",
	)
	)
	# Event listeners
	gr.on(
	triggers=[
	checkbox_model_type.change,
	checkbox_language.change,
	checkbox_dataset.change,
	ratio_group_by.change,
	dropdown_reduce_function.change,
	dropdown_map_function.change,
	],
	fn=lambda model_type, language, dataset, group_by, map_function, reduce_function: prepare_dataframe(
	df=df,
	filters={
	"language": language,
	"dataset": dataset,
	"model_type": model_type,
	},
	group_by=group_by,
	map_function=map_function,
	reduce_function=reduce_function,
	),
	inputs=[
	checkbox_model_type,
	checkbox_language,
	checkbox_dataset,
	ratio_group_by,
	dropdown_map_function,
	dropdown_reduce_function,
	],
	outputs=[dataframe],
	)

	if __name__ == "__main__":
	demo.launch(debug=True, server_port=7899)