Commit
·
62b6599
1
Parent(s):
34f59c0
Text changes
Browse files- about.py +3 -3
- app.py +2 -1
- constants.py +1 -1
- submit.py +2 -1
- utils.py +1 -1
- validation.py +1 -1
about.py
CHANGED
@@ -3,7 +3,7 @@ ABOUT_TEXT = """
|
|
3 |
|
4 |
We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
|
5 |
|
6 |
-
**What is antibody developability?**
|
7 |
|
8 |
Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
|
9 |
Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
|
@@ -13,12 +13,12 @@ Here we show 5 of these properties and invite the community to submit and develo
|
|
13 |
|
14 |
1. Download the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1)
|
15 |
2. Make predictions for all the antibody sequences for your property of interest.
|
16 |
-
3. Submit a CSV file containing the `"antibody_name"` column and a column matching the property name you are predicting (e.g. `"antibody_name,Titer"` if you are predicting Titer).
|
17 |
There is an example submission file on the "✉️ Submit" tab.
|
18 |
|
19 |
For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
|
20 |
Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
|
21 |
-
There is also an example cross-validation submission file on the "✉️ Submit" tab, and we will be releasing a full code tutorial shortly.
|
22 |
|
23 |
**How to evaluate?**
|
24 |
|
|
|
3 |
|
4 |
We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
|
5 |
|
6 |
+
**What is antibody developability and why is it important?**
|
7 |
|
8 |
Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
|
9 |
Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
|
|
|
13 |
|
14 |
1. Download the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1)
|
15 |
2. Make predictions for all the antibody sequences for your property of interest.
|
16 |
+
3. Submit a CSV file containing the `"antibody_name"` column and a column from GDPa1 matching the property name you are predicting (e.g. `"antibody_name,Titer"` if you are predicting Titer).
|
17 |
There is an example submission file on the "✉️ Submit" tab.
|
18 |
|
19 |
For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
|
20 |
Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
|
21 |
+
There is also an example cross-validation submission file on the "✉️ Submit" tab, and we will be releasing a full cross-validation code tutorial shortly.
|
22 |
|
23 |
**How to evaluate?**
|
24 |
|
app.py
CHANGED
@@ -116,9 +116,10 @@ with gr.Blocks() as demo:
|
|
116 |
|
117 |
with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
|
118 |
gr.Markdown(
|
119 |
-
|
120 |
# Antibody Developability Submission
|
121 |
Upload a CSV to get a score!
|
|
|
122 |
|
123 |
Please use your Hugging Face account name to submit your model - we use this to track separate submissions, and if you would like to remain anonymous please set up an anonymous huggingface account.
|
124 |
Your submission will be evaluated and added to the leaderboard.
|
|
|
116 |
|
117 |
with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
|
118 |
gr.Markdown(
|
119 |
+
f"""
|
120 |
# Antibody Developability Submission
|
121 |
Upload a CSV to get a score!
|
122 |
+
List of valid property names: `{', '.join(ASSAY_LIST)}`.
|
123 |
|
124 |
Please use your Hugging Face account name to submit your model - we use this to track separate submissions, and if you would like to remain anonymous please set up an anonymous huggingface account.
|
125 |
Your submission will be evaluated and added to the leaderboard.
|
constants.py
CHANGED
@@ -16,7 +16,7 @@ ASSAY_RENAME = {
|
|
16 |
}
|
17 |
ASSAY_DESCRIPTION = {
|
18 |
"AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
|
19 |
-
"PR_CHO": "Polyreactivity by bead-based method against CHO SMP
|
20 |
"HIC": "Hydrophobicity by HIC",
|
21 |
"Tm2": "Thermostability by nanoDSF",
|
22 |
"Titer": "Titer by Valita",
|
|
|
16 |
}
|
17 |
ASSAY_DESCRIPTION = {
|
18 |
"AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
|
19 |
+
"PR_CHO": "Polyreactivity by bead-based method against CHO SMP",
|
20 |
"HIC": "Hydrophobicity by HIC",
|
21 |
"Tm2": "Thermostability by nanoDSF",
|
22 |
"Titer": "Titer by Valita",
|
submit.py
CHANGED
@@ -71,7 +71,8 @@ def make_submission(
|
|
71 |
if not model_name:
|
72 |
raise gr.Error("Please provide a model name.")
|
73 |
if not model_description:
|
74 |
-
|
|
|
75 |
if submitted_file is None:
|
76 |
raise gr.Error("Please upload a CSV file before submitting.")
|
77 |
|
|
|
71 |
if not model_name:
|
72 |
raise gr.Error("Please provide a model name.")
|
73 |
if not model_description:
|
74 |
+
model_description = ""
|
75 |
+
# raise gr.Error("Please provide a model description.")
|
76 |
if submitted_file is None:
|
77 |
raise gr.Error("Please upload a CSV file before submitting.")
|
78 |
|
utils.py
CHANGED
@@ -22,6 +22,6 @@ def fetch_hf_results():
|
|
22 |
)["train"].to_pandas()
|
23 |
assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
|
24 |
# Show latest submission only
|
25 |
-
df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay"], keep="first")
|
26 |
df["property"] = df["assay"].map(ASSAY_RENAME)
|
27 |
return df
|
|
|
22 |
)["train"].to_pandas()
|
23 |
assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
|
24 |
# Show latest submission only
|
25 |
+
df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
|
26 |
df["property"] = df["assay"].map(ASSAY_RENAME)
|
27 |
return df
|
validation.py
CHANGED
@@ -49,7 +49,7 @@ def validate_username(username: str) -> bool:
|
|
49 |
)
|
50 |
elif response.status_code == 404:
|
51 |
raise gr.Error(
|
52 |
-
f"❌ Hugging Face user '{username}' does not exist. Please check the username or create an account at https://huggingface.co"
|
53 |
)
|
54 |
else:
|
55 |
raise gr.Error(
|
|
|
49 |
)
|
50 |
elif response.status_code == 404:
|
51 |
raise gr.Error(
|
52 |
+
f"❌ Hugging Face user '{username}' does not exist. Please check the username or create an account at https://huggingface.co. This is used to track unique submissions."
|
53 |
)
|
54 |
else:
|
55 |
raise gr.Error(
|