import hashlib
import pandas as pd
import gradio as gr
from gradio.themes.utils import sizes
from gradio_leaderboard import Leaderboard

from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
from constants import (
    ASSAY_RENAME,  # noqa: F401
    EXAMPLE_FILE_DICT,
    LEADERBOARD_DISPLAY_COLUMNS,
    ABOUT_TAB_NAME,
    FAQ_TAB_NAME,
    TERMS_URL,
    LEADERBOARD_COLUMNS_RENAME,
    LEADERBOARD_COLUMNS_RENAME_LIST,
    SUBMIT_TAB_NAME,
)
from submit import make_submission
from utils import fetch_hf_results, show_output_box, get_time


def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
    """
    Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results().
    """
    df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
    if assay is not None:
        df = df[df["assay"] == assay]
    df = df[LEADERBOARD_DISPLAY_COLUMNS]
    df = df.sort_values(by="spearman", ascending=False)
    # After sorting, just add the reason for excluding heldout test set
    # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
    # Convert spearman column to string to avoid dtype incompatibility when assigning text
    df["spearman"] = df["spearman"].astype(str)
    df.loc[
        (df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
    ] = "N/A, evaluated at competition close"

    # Finally, rename columns for readability
    df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
    return df


def get_leaderboard_object(assay: str | None = None):
    filter_columns = ["dataset"]
    if assay is None:
        filter_columns.append("property")
    # TODO how to sort filter columns alphabetically?
    # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
    # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
    lb = Leaderboard(
        value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
        datatype=["str", "str", "str", "number", "str"],
        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
            ["model", "property", "spearman", "dataset", "user"]
        ),
        search_columns=["Model Name"],
        filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
        every=15,
        render=True,
    )
    return lb


# Initialize global dataframe
current_dataframe = fetch_hf_results()

# Lood: Two problems currently: 
# 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
# 2. The global current_dataframe is being shared across all sessions

# Make font size bigger using gradio theme
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
    timer = gr.Timer(3)  # Run every 3 seconds when page is focused
    data_version = gr.State(value=0)  # Track data changes

    def update_current_dataframe():
        global current_dataframe
        new_dataframe = fetch_hf_results()
        new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest()

        # Check if data has actually changed
        if new_hash != data_version.value:
            print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}")
            current_dataframe = new_dataframe
            data_version.value = new_hash # Increment version to trigger updates
            return new_hash
        return data_version.value

    timer.tick(fn=update_current_dataframe, outputs=data_version)

    ## Header

    with gr.Row():
        with gr.Column(scale=6):  # bigger text area
            gr.Markdown(
                f"""
                ## Welcome to the Ginkgo Antibody Developability Benchmark!

                Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).

                You can **predict any or all of the 5 properties**, and you can filter the main leaderboard by property.
                See more details in the "{ABOUT_TAB_NAME}" tab.
                Submissions close on 1 November 2025.
                """
            )
        with gr.Column(scale=2):  # smaller side column for logo
            gr.Image(
                value="./assets/competition_logo.jpg",
                show_label=False,
                show_download_button=False,
                show_share_button=False,
                width="25vw",  # Take up the width of the column (2/8 = 1/4)
            )

    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
            gr.Markdown(ABOUT_INTRO)
            gr.Image(
                value="./assets/prediction_explainer.png",
                show_label=False,
                show_download_button=False,
                show_share_button=False,
                width="50vw",
            )
            gr.Markdown(ABOUT_TEXT)

        # Procedurally make these 5 tabs
        # for i, assay in enumerate(ASSAY_LIST):
        #     with gr.TabItem(
        #         f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
        #         elem_id="abdev-benchmark-tab-table",
        #     ) as tab_item:
        #         gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
        #         lb = get_leaderboard_object(assay=assay)

        #         def refresh_leaderboard(assay=assay):
        #             return format_leaderboard_table(df_results=current_dataframe, assay=assay)

        #         # Refresh when data version changes
        #         data_version.change(fn=refresh_leaderboard, outputs=lb)

        # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
        with gr.TabItem(
            "🏆 Leaderboard", elem_id="abdev-benchmark-tab-table"
        ) as leaderboard_tab:
            gr.Markdown(
                """
                # Overall Leaderboard (filter below by property)
                Each property has its own prize, and participants can submit models for any combination of properties.
                
                **Note**: It is trivial to overfit the public GDPa1 dataset, which results in very high Spearman correlations.
                We would suggest training using cross-validation a limited number of times to give a better indication of the model's performance on the eventual private test set.
                """
            )
            lb = get_leaderboard_object()

            def refresh_overall_leaderboard():
                print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}")
                return format_leaderboard_table(df_results=current_dataframe)

            # Refresh when data version changes
            data_version.change(fn=refresh_overall_leaderboard, outputs=lb)

            # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
            # gr.Markdown(
            #     "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
            # )

        with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
            gr.Markdown(SUBMIT_INTRUCTIONS)
            submission_type_state = gr.State(value="GDPa1_cross_validation")
            download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"])

            with gr.Row():
                with gr.Column():
                    username_input = gr.Textbox(
                        label="Username",
                        placeholder="Enter your Hugging Face username",
                        info="This will be used to identify valid submissions, and to update your results if you submit again.",
                    )

                    # gr.LoginButton()

                    anonymous_checkbox = gr.Checkbox(
                        label="Anonymous",
                        value=False,
                        info="If checked, your username will be replaced with an anonymous hash on the leaderboard.",
                    )
                    model_name_input = gr.Textbox(
                        label="Model Name",
                        placeholder="Enter your model name (e.g., 'MyProteinLM-v1')",
                        info="This will be displayed on the leaderboard.",
                    )
                    model_description_input = gr.Textbox(
                        label="Model Description (optional)",
                        placeholder="Brief description of your model and approach",
                        info="Describe your model, training data, or methodology.",
                        lines=3,
                    )
                    registration_code = gr.Textbox(
                        label="Registration Code",
                        placeholder="Enter your registration code",
                        info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:antibodycompetition@ginkgobioworks.com'>antibodycompetition@ginkgobioworks.com</a>.",
                    )
                    
                    # Extra validation / warning
                    # Add the conditional warning checkbox
                    high_corr_warning = gr.Markdown(
                        value="",
                        visible=False,
                        elem_classes=["warning-box"]
                    )
                    high_corr_checkbox = gr.Checkbox(
                        label="I understand this may be overfitting",
                        value=False,
                        visible=False,
                        info="This checkbox will appear if your submission shows suspiciously high correlations (>0.9).",
                    )
                    
                with gr.Column():
                    submission_type_dropdown = gr.Dropdown(
                        choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
                        value="GDPa1_cross_validation",
                        label="Submission Type",
                        info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
                    )
                    download_button = gr.DownloadButton(
                        label="📥 Download example submission CSV for GDPa1",
                        value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"],
                        variant="secondary",
                    )
                    submission_file = gr.File(label="Submission CSV")

            def update_submission_type_and_file(submission_type):
                """
                Based on the submission type selected in the dropdown,
                    Update the submission type state
                    Dynamically update example file for download
                """
                download_file = EXAMPLE_FILE_DICT.get(
                    submission_type, EXAMPLE_FILE_DICT[submission_type]
                )
                download_label = (
                    f"📥 Download example submission CSV for {submission_type}"
                )
                return (
                    submission_type,
                    download_file,
                    gr.DownloadButton(
                        label=download_label,
                        value=download_file,
                        variant="secondary",
                    ),
                )

            # Update submission type state and download button when dropdown changes
            submission_type_dropdown.change(
                fn=update_submission_type_and_file,
                inputs=submission_type_dropdown,
                outputs=[submission_type_state, download_file_state, download_button],
            )

            submit_btn = gr.Button("Evaluate")
            message = gr.Textbox(label="Status", lines=1, visible=False)
            # help message
            gr.Markdown(
                "If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space."
            )

            submit_btn.click(
                make_submission,
                inputs=[
                    submission_file,
                    username_input,
                    submission_type_state,
                    model_name_input,
                    model_description_input,
                    anonymous_checkbox,
                    registration_code,
                ],
                outputs=[message],
            ).then(
                fn=show_output_box,
                inputs=[message],
                outputs=[message],
            )
        with gr.Tab(FAQ_TAB_NAME):
            gr.Markdown("# Frequently Asked Questions")
            for i, (question, answer) in enumerate(FAQS.items()):
                # Would love to make questions bold but accordion doesn't support it
                question = f"{i+1}. {question}"
                with gr.Accordion(question, open=False):
                    gr.Markdown(f"*{answer}*")  # Italics for answers

    # Footnote
    gr.Markdown(
        f"""
        <div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
        📬 For questions or feedback, contact <a href="mailto:antibodycompetition@ginkgobioworks.com">antibodycompetition@ginkgobioworks.com</a> or visit the Community tab at the top of this page.<br>
        Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register a team, and see Terms <a href="{TERMS_URL}">here</a>.
        </div>
        """,
        elem_id="contact-footer",
    )

if __name__ == "__main__":
    demo.launch(ssr_mode=False, share=True)