import hashlib import pandas as pd import gradio as gr from gradio.themes.utils import sizes from gradio_leaderboard import Leaderboard from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS from constants import ( ASSAY_RENAME, # noqa: F401 EXAMPLE_FILE_DICT, LEADERBOARD_DISPLAY_COLUMNS, ABOUT_TAB_NAME, FAQ_TAB_NAME, TERMS_URL, LEADERBOARD_COLUMNS_RENAME, LEADERBOARD_COLUMNS_RENAME_LIST, SUBMIT_TAB_NAME, ) from submit import make_submission from utils import fetch_hf_results, show_output_box, get_time def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None): """ Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results(). """ df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy() if assay is not None: df = df[df["assay"] == assay] df = df[LEADERBOARD_DISPLAY_COLUMNS] df = df.sort_values(by="spearman", ascending=False) # After sorting, just add the reason for excluding heldout test set # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close" # Convert spearman column to string to avoid dtype incompatibility when assigning text df["spearman"] = df["spearman"].astype(str) df.loc[ (df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman" ] = "N/A, evaluated at competition close" # Finally, rename columns for readability df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME) return df def get_leaderboard_object(assay: str | None = None): filter_columns = ["dataset"] if assay is None: filter_columns.append("property") # TODO how to sort filter columns alphabetically? # Bug: Can't leave search_columns empty because then it says "Column None not found in headers" # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name lb = Leaderboard( value=format_leaderboard_table(df_results=current_dataframe, assay=assay), datatype=["str", "str", "str", "number", "str"], select_columns=LEADERBOARD_COLUMNS_RENAME_LIST( ["model", "property", "spearman", "dataset", "user"] ), search_columns=["Model Name"], filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns), every=15, render=True, ) return lb # Initialize global dataframe current_dataframe = fetch_hf_results() # Lood: Two problems currently: # 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly) # 2. The global current_dataframe is being shared across all sessions # Make font size bigger using gradio theme with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo: timer = gr.Timer(3) # Run every 3 seconds when page is focused data_version = gr.State(value=0) # Track data changes def update_current_dataframe(): global current_dataframe new_dataframe = fetch_hf_results() new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest() # Check if data has actually changed if new_hash != data_version.value: print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}") current_dataframe = new_dataframe data_version.value = new_hash # Increment version to trigger updates return new_hash return data_version.value timer.tick(fn=update_current_dataframe, outputs=data_version) ## Header with gr.Row(): with gr.Column(scale=6): # bigger text area gr.Markdown( f""" ## Welcome to the Ginkgo Antibody Developability Benchmark! Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "âœ‰ī¸ Submit" tab). You can **predict any or all of the 5 properties**, and you can filter the main leaderboard by property. See more details in the "{ABOUT_TAB_NAME}" tab. Submissions close on 1 November 2025. """ ) with gr.Column(scale=2): # smaller side column for logo gr.Image( value="./assets/competition_logo.jpg", show_label=False, show_download_button=False, show_share_button=False, width="25vw", # Take up the width of the column (2/8 = 1/4) ) with gr.Tabs(elem_classes="tab-buttons"): with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"): gr.Markdown(ABOUT_INTRO) gr.Image( value="./assets/prediction_explainer.png", show_label=False, show_download_button=False, show_share_button=False, width="50vw", ) gr.Markdown(ABOUT_TEXT) # Procedurally make these 5 tabs # for i, assay in enumerate(ASSAY_LIST): # with gr.TabItem( # f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", # elem_id="abdev-benchmark-tab-table", # ) as tab_item: # gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}") # lb = get_leaderboard_object(assay=assay) # def refresh_leaderboard(assay=assay): # return format_leaderboard_table(df_results=current_dataframe, assay=assay) # # Refresh when data version changes # data_version.change(fn=refresh_leaderboard, outputs=lb) # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters with gr.TabItem( "🏆 Leaderboard", elem_id="abdev-benchmark-tab-table" ) as leaderboard_tab: gr.Markdown( """ # Overall Leaderboard (filter below by property) Each property has its own prize, and participants can submit models for any combination of properties. **Note**: It is trivial to overfit the public GDPa1 dataset, which results in very high Spearman correlations. We would suggest training using cross-validation a limited number of times to give a better indication of the model's performance on the eventual private test set. """ ) lb = get_leaderboard_object() def refresh_overall_leaderboard(): print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}") return format_leaderboard_table(df_results=current_dataframe) # Refresh when data version changes data_version.change(fn=refresh_overall_leaderboard, outputs=lb) # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results # gr.Markdown( # "_â„šī¸ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._" # ) with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"): gr.Markdown(SUBMIT_INTRUCTIONS) submission_type_state = gr.State(value="GDPa1_cross_validation") download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"]) with gr.Row(): with gr.Column(): username_input = gr.Textbox( label="Username", placeholder="Enter your Hugging Face username", info="This will be used to identify valid submissions, and to update your results if you submit again.", ) # gr.LoginButton() anonymous_checkbox = gr.Checkbox( label="Anonymous", value=False, info="If checked, your username will be replaced with an anonymous hash on the leaderboard.", ) model_name_input = gr.Textbox( label="Model Name", placeholder="Enter your model name (e.g., 'MyProteinLM-v1')", info="This will be displayed on the leaderboard.", ) model_description_input = gr.Textbox( label="Model Description (optional)", placeholder="Brief description of your model and approach", info="Describe your model, training data, or methodology.", lines=3, ) registration_code = gr.Textbox( label="Registration Code", placeholder="Enter your registration code", info="If you did not receive a registration code, please sign up on the Competition Registration page or email antibodycompetition@ginkgobioworks.com.", ) # Extra validation / warning # Add the conditional warning checkbox high_corr_warning = gr.Markdown( value="", visible=False, elem_classes=["warning-box"] ) high_corr_checkbox = gr.Checkbox( label="I understand this may be overfitting", value=False, visible=False, info="This checkbox will appear if your submission shows suspiciously high correlations (>0.9).", ) with gr.Column(): submission_type_dropdown = gr.Dropdown( choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"], value="GDPa1_cross_validation", label="Submission Type", info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.", ) download_button = gr.DownloadButton( label="đŸ“Ĩ Download example submission CSV for GDPa1", value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"], variant="secondary", ) submission_file = gr.File(label="Submission CSV") def update_submission_type_and_file(submission_type): """ Based on the submission type selected in the dropdown, Update the submission type state Dynamically update example file for download """ download_file = EXAMPLE_FILE_DICT.get( submission_type, EXAMPLE_FILE_DICT[submission_type] ) download_label = ( f"đŸ“Ĩ Download example submission CSV for {submission_type}" ) return ( submission_type, download_file, gr.DownloadButton( label=download_label, value=download_file, variant="secondary", ), ) # Update submission type state and download button when dropdown changes submission_type_dropdown.change( fn=update_submission_type_and_file, inputs=submission_type_dropdown, outputs=[submission_type_state, download_file_state, download_button], ) submit_btn = gr.Button("Evaluate") message = gr.Textbox(label="Status", lines=1, visible=False) # help message gr.Markdown( "If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space." ) submit_btn.click( make_submission, inputs=[ submission_file, username_input, submission_type_state, model_name_input, model_description_input, anonymous_checkbox, registration_code, ], outputs=[message], ).then( fn=show_output_box, inputs=[message], outputs=[message], ) with gr.Tab(FAQ_TAB_NAME): gr.Markdown("# Frequently Asked Questions") for i, (question, answer) in enumerate(FAQS.items()): # Would love to make questions bold but accordion doesn't support it question = f"{i+1}. {question}" with gr.Accordion(question, open=False): gr.Markdown(f"*{answer}*") # Italics for answers # Footnote gr.Markdown( f"""
đŸ“Ŧ For questions or feedback, contact antibodycompetition@ginkgobioworks.com or visit the Community tab at the top of this page.
Visit the Competition Registration page to sign up for updates and to register a team, and see Terms here.
""", elem_id="contact-footer", ) if __name__ == "__main__": demo.launch(ssr_mode=False, share=True)