at least 1 assay column required
Browse files- constants.py +1 -1
- test/conftest.py +2 -19
- test/test_validation.py +10 -3
- validation.py +18 -3
constants.py
CHANGED
@@ -35,7 +35,7 @@ REQUIRED_COLUMNS: list[str] = [
|
|
35 |
"antibody_name",
|
36 |
"vh_protein_sequence",
|
37 |
"vl_protein_sequence",
|
38 |
-
]
|
39 |
ANTIBODY_NAMES = pd.read_csv("data/example-predictions.csv")["antibody_name"].tolist()
|
40 |
|
41 |
# Huggingface API
|
|
|
35 |
"antibody_name",
|
36 |
"vh_protein_sequence",
|
37 |
"vl_protein_sequence",
|
38 |
+
]
|
39 |
ANTIBODY_NAMES = pd.read_csv("data/example-predictions.csv")["antibody_name"].tolist()
|
40 |
|
41 |
# Huggingface API
|
test/conftest.py
CHANGED
@@ -1,27 +1,10 @@
|
|
1 |
import pytest
|
2 |
import pandas as pd
|
3 |
-
from constants import MINIMAL_NUMBER_OF_ROWS, ASSAY_LIST, ANTIBODY_NAMES
|
4 |
|
5 |
|
6 |
@pytest.fixture
|
7 |
-
def
|
8 |
-
return
|
9 |
-
"antibody_name": ANTIBODY_NAMES[:MINIMAL_NUMBER_OF_ROWS],
|
10 |
-
"vh_protein_sequence": [
|
11 |
-
"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS"
|
12 |
-
]
|
13 |
-
* MINIMAL_NUMBER_OF_ROWS,
|
14 |
-
"vl_protein_sequence": [
|
15 |
-
"DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASTLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPFTFGQGTKVEIK"
|
16 |
-
]
|
17 |
-
* MINIMAL_NUMBER_OF_ROWS,
|
18 |
-
**{assay: [0.85] * MINIMAL_NUMBER_OF_ROWS for assay in ASSAY_LIST},
|
19 |
-
}
|
20 |
-
|
21 |
-
|
22 |
-
@pytest.fixture
|
23 |
-
def valid_input_dataframe(valid_csv_data):
|
24 |
-
return pd.DataFrame(valid_csv_data)
|
25 |
|
26 |
|
27 |
@pytest.fixture
|
|
|
1 |
import pytest
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
|
5 |
@pytest.fixture
|
6 |
+
def valid_input_dataframe():
|
7 |
+
return pd.read_csv("data/example-predictions.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
@pytest.fixture
|
test/test_validation.py
CHANGED
@@ -2,7 +2,7 @@ import pytest
|
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
from validation import validate_csv_file, validate_csv_can_be_read, validate_dataframe
|
5 |
-
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS
|
6 |
|
7 |
|
8 |
class TestValidateCsvCanBeRead:
|
@@ -11,8 +11,6 @@ class TestValidateCsvCanBeRead:
|
|
11 |
def test_valid_csv_can_be_read(self, valid_csv_content):
|
12 |
df = validate_csv_can_be_read(valid_csv_content)
|
13 |
assert isinstance(df, pd.DataFrame)
|
14 |
-
assert len(df) == MINIMAL_NUMBER_OF_ROWS
|
15 |
-
assert list(df.columns) == list(REQUIRED_COLUMNS)
|
16 |
|
17 |
def test_empty_csv_raises_error(self):
|
18 |
empty_csv = ""
|
@@ -56,6 +54,15 @@ class TestValidateDataframe:
|
|
56 |
|
57 |
assert f"Missing required columns: {missing_column}" in str(exc_info.value)
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
def test_empty_dataframe_raises_error(self, valid_input_dataframe):
|
60 |
empty_df = valid_input_dataframe.head(0)
|
61 |
|
|
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
from validation import validate_csv_file, validate_csv_can_be_read, validate_dataframe
|
5 |
+
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS, ASSAY_LIST
|
6 |
|
7 |
|
8 |
class TestValidateCsvCanBeRead:
|
|
|
11 |
def test_valid_csv_can_be_read(self, valid_csv_content):
|
12 |
df = validate_csv_can_be_read(valid_csv_content)
|
13 |
assert isinstance(df, pd.DataFrame)
|
|
|
|
|
14 |
|
15 |
def test_empty_csv_raises_error(self):
|
16 |
empty_csv = ""
|
|
|
54 |
|
55 |
assert f"Missing required columns: {missing_column}" in str(exc_info.value)
|
56 |
|
57 |
+
def test_at_least_one_assay_column_raises_error(self, valid_input_dataframe):
|
58 |
+
df = valid_input_dataframe.copy()
|
59 |
+
df.drop(columns=ASSAY_LIST, inplace=True, errors="ignore")
|
60 |
+
with pytest.raises(gr.Error) as exc_info:
|
61 |
+
validate_dataframe(df)
|
62 |
+
assert "CSV should include at least one of the following assay columns" in str(
|
63 |
+
exc_info.value
|
64 |
+
)
|
65 |
+
|
66 |
def test_empty_dataframe_raises_error(self, valid_input_dataframe):
|
67 |
empty_df = valid_input_dataframe.head(0)
|
68 |
|
validation.py
CHANGED
@@ -1,7 +1,12 @@
|
|
1 |
import pandas as pd
|
2 |
import io
|
3 |
import gradio as gr
|
4 |
-
from constants import
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
|
@@ -57,12 +62,22 @@ def validate_dataframe(df: pd.DataFrame) -> None:
|
|
57 |
if missing_columns:
|
58 |
raise gr.Error(f"❌ Missing required columns: {', '.join(missing_columns)}")
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
# Data should not be empty
|
61 |
if df.empty:
|
62 |
raise gr.Error("❌ CSV file is empty")
|
63 |
|
64 |
-
# No missing values in
|
65 |
-
for col in
|
66 |
missing_count = df[col].isnull().sum()
|
67 |
if missing_count > 0:
|
68 |
raise gr.Error(f"❌ Column '{col}' contains {missing_count} missing values")
|
|
|
1 |
import pandas as pd
|
2 |
import io
|
3 |
import gradio as gr
|
4 |
+
from constants import (
|
5 |
+
REQUIRED_COLUMNS,
|
6 |
+
MINIMAL_NUMBER_OF_ROWS,
|
7 |
+
ANTIBODY_NAMES,
|
8 |
+
ASSAY_LIST,
|
9 |
+
)
|
10 |
|
11 |
|
12 |
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
|
|
|
62 |
if missing_columns:
|
63 |
raise gr.Error(f"❌ Missing required columns: {', '.join(missing_columns)}")
|
64 |
|
65 |
+
# Should include at least 1 assay column
|
66 |
+
assay_columns = [col for col in df.columns if col in ASSAY_LIST]
|
67 |
+
if len(assay_columns) < 1:
|
68 |
+
raise gr.Error(
|
69 |
+
"❌ CSV should include at least one of the following assay columns: "
|
70 |
+
+ ", ".join(ASSAY_LIST)
|
71 |
+
)
|
72 |
+
# Submission are name, sequence, and at least one assay column
|
73 |
+
submission_columns = REQUIRED_COLUMNS + assay_columns
|
74 |
+
|
75 |
# Data should not be empty
|
76 |
if df.empty:
|
77 |
raise gr.Error("❌ CSV file is empty")
|
78 |
|
79 |
+
# No missing values in submission columns
|
80 |
+
for col in submission_columns:
|
81 |
missing_count = df[col].isnull().sum()
|
82 |
if missing_count > 0:
|
83 |
raise gr.Error(f"❌ Column '{col}' contains {missing_count} missing values")
|