abdev-leaderboard / constants.py
loodvanniekerkginkgo's picture
Added some changes to filtering / dedup submissions
069fb2c
"""
Constants for the Antibody Developability Benchmark
"""
import os
from huggingface_hub import HfApi
import pandas as pd
ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
ASSAY_RENAME = {
"AC-SINS_pH7.4": "Self-association",
"PR_CHO": "Polyreactivity",
"HIC": "Hydrophobicity",
"Tm2": "Thermostability",
"Titer": "Titer",
}
ASSAY_DESCRIPTION = {
"AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
"PR_CHO": "Polyreactivity by bead-based method against CHO SMP",
"HIC": "Hydrophobicity by HIC",
"Tm2": "Thermostability by nanoDSF",
"Titer": "Titer by Valita",
}
ASSAY_EMOJIS = {
"AC-SINS_pH7.4": "🧲",
"PR_CHO": "🎯",
"HIC": "πŸ’§",
"Tm2": "🌑️",
"Titer": "πŸ§ͺ",
}
# Tabs with emojis
ABOUT_TAB_NAME = "πŸ“– About / Rules"
FAQ_TAB_NAME = "❓ FAQs"
SUBMIT_TAB_NAME = "βœ‰οΈ Submit"
REGISTRATION_CODE = os.environ.get("REGISTRATION_CODE")
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
# Input CSV file requirements
REQUIRED_COLUMNS: list[str] = [
"antibody_name",
"vh_protein_sequence",
"vl_protein_sequence",
]
# Cross validation
CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
# Example files
EXAMPLE_FILE_DICT = {
"GDPa1": "data/example-predictions.csv",
"GDPa1_cross_validation": "data/example-predictions-cv.csv",
"Heldout Test Set": "data/example-predictions-heldout.csv",
}
ANTIBODY_NAMES_DICT = {
"GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
"GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
"antibody_name"
].tolist(),
"Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])[
"antibody_name"
].tolist(),
}
# Huggingface API
TOKEN = os.environ.get("HF_TOKEN")
CACHE_PATH = os.getenv("HF_HOME", ".")
API = HfApi(token=TOKEN)
# Huggingface repos
ORGANIZATION = "ginkgo-datapoints"
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
# Leaderboard dataframes
LEADERBOARD_RESULTS_COLUMNS = [
"model",
"assay",
"spearman",
"dataset",
"user",
"submission_time",
] # The columns expected from the results dataset
LEADERBOARD_DISPLAY_COLUMNS = [
"model",
"property",
"spearman",
"dataset",
"user",
"submission_time",
] # After changing assay to property (pretty formatting)
LEADERBOARD_COLUMNS_RENAME = {
"spearman": "Spearman Correlation",
"dataset": "Dataset",
"user": "User",
"submission_time": "Submission Time",
"model": "Model Name",
"property": "Property",
}
BASELINE_USERNAMES = ["loodvanniekerkginkgo"]
def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))