""" Constants for the Antibody Developability Benchmark """ import os from huggingface_hub import HfApi import pandas as pd ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"] ASSAY_RENAME = { "AC-SINS_pH7.4": "Self-association", "PR_CHO": "Polyreactivity", "HIC": "Hydrophobicity", "Tm2": "Thermostability", "Titer": "Titer", } ASSAY_DESCRIPTION = { "AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4", "PR_CHO": "Polyreactivity by bead-based method against CHO SMP", "HIC": "Hydrophobicity by HIC", "Tm2": "Thermostability by nanoDSF", "Titer": "Titer by Valita", } ASSAY_EMOJIS = { "AC-SINS_pH7.4": "๐Ÿงฒ", "PR_CHO": "๐ŸŽฏ", "HIC": "๐Ÿ’ง", "Tm2": "๐ŸŒก๏ธ", "Titer": "๐Ÿงช", } ASSAY_HIGHER_IS_BETTER = { "HIC": False, "Tm2": True, "Titer": True, "PR_CHO": False, "AC-SINS_pH7.4": False, } # Tabs with emojis ABOUT_TAB_NAME = "๐Ÿ“– About / Rules" FAQ_TAB_NAME = "โ“ FAQs" SUBMIT_TAB_NAME = "โœ‰๏ธ Submit" REGISTRATION_CODE = os.environ.get("REGISTRATION_CODE") TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf" # Input CSV file requirements REQUIRED_COLUMNS: list[str] = [ "antibody_name", "vh_protein_sequence", "vl_protein_sequence", ] # Cross validation CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold" # Example files EXAMPLE_FILE_DICT = { "GDPa1": "data/example-predictions.csv", "GDPa1_cross_validation": "data/example-predictions-cv.csv", "Heldout Test Set": "data/example-predictions-heldout.csv", } # GDPa1 dataset GDPa1_path = "hf://datasets/ginkgo-datapoints/GDPa1/GDPa1_v1.2_20250814.csv" # Huggingface API TOKEN = os.environ.get("HF_TOKEN") CACHE_PATH = os.getenv("HF_HOME", ".") API = HfApi(token=TOKEN) # Huggingface repos ORGANIZATION = "ginkgo-datapoints" SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions" RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results" # Leaderboard dataframes LEADERBOARD_RESULTS_COLUMNS = [ "model", "assay", "spearman", "dataset", "user", "submission_time", ] # The columns expected from the results dataset LEADERBOARD_DISPLAY_COLUMNS = [ "model", "property", "spearman", "dataset", "user", "submission_time", ] # After changing assay to property (pretty formatting) LEADERBOARD_COLUMNS_RENAME = { "spearman": "Spearman Correlation", "dataset": "Dataset", "user": "User", "submission_time": "Submission Time", "model": "Model Name", "property": "Property", } BASELINE_USERNAMES = ["loodvanniekerkginkgo"] def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]: return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))