from typing import Literal
from functools import partial
from content import *
import gradio as gr
import numpy as np
import pandas as pd
import pandas as pd

# This dataframe must contain the following columns:
# - model: the name of the model
# - language: the language of the model
# - dataset: the dataset used to evaluate the model
# - score: the score of the model on the (language, dataset) pair
# - model_type: the type of the model (e.g. "Chat Model", "Base Model")
df = pd.read_csv("data/raw_scores.csv")
choices_language = list(df["language"].unique())
choices_dataset = list(df["dataset"].unique())
choices_model_type = list(df["model_type"].unique())

# Utility functions for data processing
reduce_functions = {
    "Mean": lambda x: np.mean(x),
    "Median": lambda x: np.median(x),
    "Max": lambda x: np.max(x),
    "Min": lambda x: np.min(x),
}
map_functions = {
    "Raw": lambda x: x,
    "Rank": partial(pd.Series.rank, ascending=False, method="dense"),
    "Normalize": lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)),
}
score_ascending = {
    "Raw": False,
    "Rank": True,
    "Normalize": False,
}


def prepare_dataframe(
    df: pd.DataFrame,
    filters: dict[str, list[str]],
    group_by: Literal["language", "dataset"],
    map_function: str,
    reduce_function: str,
) -> pd.DataFrame:
    # Filters contains a value subset for each column
    language = filters["language"]
    dataset = filters["dataset"]

    # Columns are divided into two groups: other_columns and group_by_columns, apart from `model` which should be the index
    other_columns = list(set(df.columns) - set(["language", "dataset", "score"]))
    group_by_columns = filters[group_by]

    # Step 1: Filter the dataframe based on the selected language and dataset
    for k, v in filters.items():
        df = df[df[k].isin(v)]
    # If dataframe is empty, return an empty dataframe
    if len(df) == 0:
        gr.Warning(
            f"No scores remain after the filter application. Please verify the checkboxes."
        )
        return pd.DataFrame(columns=other_columns)
    # Sanity check: All score exists for each (language,dataset) pair
    score_count = (
        df.drop_duplicates(subset=["model", "language", "dataset"])
        .groupby(["model"])["score"]
        .count()
    )
    invalid_models = score_count[
        score_count < len(language) * len(dataset)
    ].index.tolist()
    df = df[~df["model"].isin(invalid_models)]
    # Send a warning message if there are any invalid models
    for model in invalid_models:
        gr.Warning(
            f"<strong>{model}</strong> is lacking some scores thus hidden. Please report to the maintainers."
        )

    # Step 2: Process Scores
    # Step 2.0: Map the scores along each (language, dataset) pair
    df["score"] = df.groupby(["language", "dataset"])["score"].transform(
        map_functions[map_function]
    )
    # Step 2.1: Reduce the scores along the column other than `group_by`
    df = (
        df.groupby(other_columns + [group_by])
        .agg({"score": reduce_functions[reduce_function]})
        .reset_index()
    )
    # Step 2.2: Reduce the scores along `group_by` to get the overall score of each model
    reduced_col = df.groupby(other_columns).agg(
        {"score": reduce_functions[reduce_function]}
    )["score"]
    # Step 2.3: Pivot the dataframe, then concat the overall score
    df = df.pivot(index=other_columns, columns=group_by, values=["score"]).droplevel(
        0, 1
    )
    df["Overall Score"] = reduced_col

    # Step 3: Styling for display
    # - Sort the dataframe by the reduced score
    # - Sort the columns for better readability
    # - Highlight the maximum value in each column
    # - Format the score to 2 decimal places if it is a float
    other_columns.remove("model")
    df = (
        df.reset_index()[
            ["model"] + other_columns + ["Overall Score"] + group_by_columns
        ]
        .sort_values(by="Overall Score", ascending=score_ascending[map_function])
        .style.format(precision=2)
    )
    if score_ascending[map_function]:
        df = df.highlight_min(
            axis=0, color="#18864B", subset=["Overall Score"] + group_by_columns
        )
    else:
        df = df.highlight_max(
            axis=0, color="#18864B", subset=["Overall Score"] + group_by_columns
        )
    return df


with gr.Blocks(theme=gr.themes.Base()) as demo:
    # UI definition
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                MARKDOWN_HEADER
            )
            checkbox_language = gr.CheckboxGroup(
                choices=choices_language,
                value=choices_language,
                label="Language(s)",
                interactive=True,
            )
            checkbox_dataset = gr.CheckboxGroup(
                choices=choices_dataset,
                value=choices_dataset,
                label="Dataset(s)",
                interactive=True,
            )
            checkbox_model_type = gr.CheckboxGroup(
                choices=choices_model_type,
                value=choices_model_type,
                label="Model Type(s)",
                interactive=True,
            )
            dropdown_map_function = gr.Dropdown(
                choices=map_functions.keys(),
                value="Raw",
                label="Map Function",
                interactive=True,
                info=MARKDOWN_MAP_FUNCTION
            )
            dropdown_reduce_function = gr.Dropdown(
                choices=reduce_functions.keys(),
                value="Mean",
                label="Reduce Function",
                interactive=True,
                info=MARKDOWN_REDUCE_FUNCTION
            )
            ratio_group_by = gr.Radio(
                choices=["language", "dataset"],
                value="language",
                label="Group by",
                interactive=True,
            )

            dataframe = gr.DataFrame(
                prepare_dataframe(
                    df=df,
                    filters={
                        "language": choices_language,
                        "dataset": choices_dataset,
                    },
                    group_by="language",
                    map_function="Raw",
                    reduce_function="Mean",
                ),
                interactive=False,
            )
            gr.Code(
                language="markdown",
                label="Citation",
                value=CITATION,
            )

    # Event listeners
    gr.on(
        triggers=[
            checkbox_model_type.change,
            checkbox_language.change,
            checkbox_dataset.change,
            ratio_group_by.change,
            dropdown_reduce_function.change,
            dropdown_map_function.change,
        ],
        fn=lambda model_type, language, dataset, group_by, map_function, reduce_function: prepare_dataframe(
            df=df,
            filters={
                "language": language,
                "dataset": dataset,
                "model_type": model_type,
            },
            group_by=group_by,
            map_function=map_function,
            reduce_function=reduce_function,
        ),
        inputs=[
            checkbox_model_type,
            checkbox_language,
            checkbox_dataset,
            ratio_group_by,
            dropdown_map_function,
            dropdown_reduce_function,
        ],
        outputs=[dataframe],
    )
if __name__ == "__main__":
    demo.launch()