File size: 14,298 Bytes
393870b 2982a51 281711d 2dafeb1 1bcb06b 281711d 61fa714 84fdef4 2dafeb1 84fdef4 10e69e7 22f82e7 61fa714 15ae508 61fa714 84fdef4 3edbc93 c4f1261 8f9985e 2dafeb1 ba1131a 069fb2c de75bee 1bcb06b 8f9985e 10e69e7 177a597 2dafeb1 61fa714 177a597 1bcb06b 8f9985e fe04bb9 10e69e7 1bcb06b 5b5ee28 11e5e48 10e69e7 de9585b 069fb2c 2dafeb1 069fb2c 2dafeb1 61fa714 de9585b 8f9985e 1bcb06b de9585b 2982a51 de9585b 8f9985e c4f1261 5554fb7 5135eea 6c94821 2dafeb1 de9585b 6c94821 393870b 2dafeb1 6c94821 393870b 6c94821 393870b 6c94821 2dafeb1 6c94821 2dafeb1 22f82e7 2dafeb1 22f82e7 8f9985e 22f82e7 2dafeb1 22f82e7 4965e60 22f82e7 3d4c9af 0fdb208 2dafeb1 3d4c9af 2dafeb1 22f82e7 21f87d6 2dafeb1 0fdb208 2dafeb1 3edbc93 8f9985e 813ce52 61fa714 2dafeb1 61fa714 2dafeb1 61fa714 8f9985e 61fa714 2dafeb1 8f9985e 2dafeb1 069fb2c 2dafeb1 8f9985e de9585b 2dafeb1 6c94821 c4f1261 6c94821 2dafeb1 6c94821 2dafeb1 61fa714 3edbc93 61fa714 22f82e7 d6a0c44 3edbc93 809a553 10e69e7 22f82e7 ef27773 8bbbc7a ef27773 22f82e7 a7cc355 8f9985e 3edbc93 a7cc355 10e69e7 a7cc355 471531b d6a0c44 3edbc93 84fdef4 7ac33bb d6a0c44 84fdef4 61fa714 84fdef4 1a2d1c6 d6a0c44 84fdef4 813ce52 3edbc93 84fdef4 5d5df93 84fdef4 7ac33bb 84fdef4 d834d59 1a2d1c6 84fdef4 d834d59 84fdef4 3edbc93 8f9985e 3edbc93 a7cc355 34f59c0 a7cc355 1de74c6 471531b a7cc355 3edbc93 61fa714 22f82e7 2dafeb1 ba1131a 15ae508 ba1131a 8dcd98f 15ae508 ba1131a 8f9985e ba1131a 281711d d6a0c44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
import hashlib
import pandas as pd
import gradio as gr
from gradio.themes.utils import sizes
from gradio_leaderboard import Leaderboard
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
from constants import (
ASSAY_RENAME, # noqa: F401
EXAMPLE_FILE_DICT,
LEADERBOARD_DISPLAY_COLUMNS,
ABOUT_TAB_NAME,
FAQ_TAB_NAME,
TERMS_URL,
LEADERBOARD_COLUMNS_RENAME,
LEADERBOARD_COLUMNS_RENAME_LIST,
SUBMIT_TAB_NAME,
)
from submit import make_submission
from utils import fetch_hf_results, show_output_box, get_time
def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
"""
Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results().
"""
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
if assay is not None:
df = df[df["assay"] == assay]
df = df[LEADERBOARD_DISPLAY_COLUMNS]
df = df.sort_values(by="spearman", ascending=False)
# After sorting, just add the reason for excluding heldout test set
# Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
# Convert spearman column to string to avoid dtype incompatibility when assigning text
df["spearman"] = df["spearman"].astype(str)
df.loc[
(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
] = "N/A, evaluated at competition close"
# Finally, rename columns for readability
df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
return df
def get_leaderboard_object(assay: str | None = None):
filter_columns = ["dataset"]
if assay is None:
filter_columns.append("property")
# TODO how to sort filter columns alphabetically?
# Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
# Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
lb = Leaderboard(
value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
datatype=["str", "str", "str", "number", "str"],
select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
["model", "property", "spearman", "dataset", "user"]
),
search_columns=["Model Name"],
filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
every=15,
render=True,
)
return lb
# Initialize global dataframe
current_dataframe = fetch_hf_results()
# Lood: Two problems currently:
# 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
# 2. The global current_dataframe is being shared across all sessions
# Make font size bigger using gradio theme
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
timer = gr.Timer(3) # Run every 3 seconds when page is focused
data_version = gr.State(value=0) # Track data changes
def update_current_dataframe():
global current_dataframe
new_dataframe = fetch_hf_results()
new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest()
# Check if data has actually changed
if new_hash != data_version.value:
print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}")
current_dataframe = new_dataframe
data_version.value = new_hash # Increment version to trigger updates
return new_hash
return data_version.value
timer.tick(fn=update_current_dataframe, outputs=data_version)
## Header
with gr.Row():
with gr.Column(scale=6): # bigger text area
gr.Markdown(
f"""
## Welcome to the Ginkgo Antibody Developability Benchmark!
Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).
You can **predict any or all of the 5 properties**, and you can filter the main leaderboard by property.
See more details in the "{ABOUT_TAB_NAME}" tab.
Submissions close on 1 November 2025.
"""
)
with gr.Column(scale=2): # smaller side column for logo
gr.Image(
value="./assets/competition_logo.jpg",
show_label=False,
show_download_button=False,
show_share_button=False,
width="25vw", # Take up the width of the column (2/8 = 1/4)
)
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
gr.Markdown(ABOUT_INTRO)
gr.Image(
value="./assets/prediction_explainer.png",
show_label=False,
show_download_button=False,
show_share_button=False,
width="50vw",
)
gr.Markdown(ABOUT_TEXT)
# Procedurally make these 5 tabs
# for i, assay in enumerate(ASSAY_LIST):
# with gr.TabItem(
# f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
# elem_id="abdev-benchmark-tab-table",
# ) as tab_item:
# gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
# lb = get_leaderboard_object(assay=assay)
# def refresh_leaderboard(assay=assay):
# return format_leaderboard_table(df_results=current_dataframe, assay=assay)
# # Refresh when data version changes
# data_version.change(fn=refresh_leaderboard, outputs=lb)
# Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
with gr.TabItem(
"🏆 Leaderboard", elem_id="abdev-benchmark-tab-table"
) as leaderboard_tab:
gr.Markdown(
"""
# Overall Leaderboard (filter below by property)
Each property has its own prize, and participants can submit models for any combination of properties.
**Note**: It is trivial to overfit the public GDPa1 dataset, which results in very high Spearman correlations.
We would suggest training using cross-validation a limited number of times to give a better indication of the model's performance on the eventual private test set.
"""
)
lb = get_leaderboard_object()
def refresh_overall_leaderboard():
print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}")
return format_leaderboard_table(df_results=current_dataframe)
# Refresh when data version changes
data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
# At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
# gr.Markdown(
# "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
# )
with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
gr.Markdown(SUBMIT_INTRUCTIONS)
submission_type_state = gr.State(value="GDPa1_cross_validation")
download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"])
with gr.Row():
with gr.Column():
username_input = gr.Textbox(
label="Username",
placeholder="Enter your Hugging Face username",
info="This will be used to identify valid submissions, and to update your results if you submit again.",
)
# gr.LoginButton()
anonymous_checkbox = gr.Checkbox(
label="Anonymous",
value=False,
info="If checked, your username will be replaced with an anonymous hash on the leaderboard.",
)
model_name_input = gr.Textbox(
label="Model Name",
placeholder="Enter your model name (e.g., 'MyProteinLM-v1')",
info="This will be displayed on the leaderboard.",
)
model_description_input = gr.Textbox(
label="Model Description (optional)",
placeholder="Brief description of your model and approach",
info="Describe your model, training data, or methodology.",
lines=3,
)
registration_code = gr.Textbox(
label="Registration Code",
placeholder="Enter your registration code",
info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.",
)
# Extra validation / warning
# Add the conditional warning checkbox
high_corr_warning = gr.Markdown(
value="",
visible=False,
elem_classes=["warning-box"]
)
high_corr_checkbox = gr.Checkbox(
label="I understand this may be overfitting",
value=False,
visible=False,
info="This checkbox will appear if your submission shows suspiciously high correlations (>0.9).",
)
with gr.Column():
submission_type_dropdown = gr.Dropdown(
choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
value="GDPa1_cross_validation",
label="Submission Type",
info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
)
download_button = gr.DownloadButton(
label="📥 Download example submission CSV for GDPa1",
value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"],
variant="secondary",
)
submission_file = gr.File(label="Submission CSV")
def update_submission_type_and_file(submission_type):
"""
Based on the submission type selected in the dropdown,
Update the submission type state
Dynamically update example file for download
"""
download_file = EXAMPLE_FILE_DICT.get(
submission_type, EXAMPLE_FILE_DICT[submission_type]
)
download_label = (
f"📥 Download example submission CSV for {submission_type}"
)
return (
submission_type,
download_file,
gr.DownloadButton(
label=download_label,
value=download_file,
variant="secondary",
),
)
# Update submission type state and download button when dropdown changes
submission_type_dropdown.change(
fn=update_submission_type_and_file,
inputs=submission_type_dropdown,
outputs=[submission_type_state, download_file_state, download_button],
)
submit_btn = gr.Button("Evaluate")
message = gr.Textbox(label="Status", lines=1, visible=False)
# help message
gr.Markdown(
"If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space."
)
submit_btn.click(
make_submission,
inputs=[
submission_file,
username_input,
submission_type_state,
model_name_input,
model_description_input,
anonymous_checkbox,
registration_code,
],
outputs=[message],
).then(
fn=show_output_box,
inputs=[message],
outputs=[message],
)
with gr.Tab(FAQ_TAB_NAME):
gr.Markdown("# Frequently Asked Questions")
for i, (question, answer) in enumerate(FAQS.items()):
# Would love to make questions bold but accordion doesn't support it
question = f"{i+1}. {question}"
with gr.Accordion(question, open=False):
gr.Markdown(f"*{answer}*") # Italics for answers
# Footnote
gr.Markdown(
f"""
<div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
📬 For questions or feedback, contact <a href="mailto:[email protected]">[email protected]</a> or visit the Community tab at the top of this page.<br>
Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register a team, and see Terms <a href="{TERMS_URL}">here</a>.
</div>
""",
elem_id="contact-footer",
)
if __name__ == "__main__":
demo.launch(ssr_mode=False, share=True)
|