Spaces:
Running
Running
Ensure all ints are ints
Browse files
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.45.2
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
+
title: EvoDiff
|
3 |
+
emoji: 🧬
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: orange
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.45.2
|
8 |
app_file: app.py
|
app.py
CHANGED
@@ -70,12 +70,12 @@ def make_uncond_seq(seq_len, model_type, pred_structure):
|
|
70 |
if model_type == "EvoDiff-Seq-OADM 38M":
|
71 |
checkpoint = OA_DM_38M()
|
72 |
model, collater, tokenizer, scheme = checkpoint
|
73 |
-
tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')
|
74 |
|
75 |
if model_type == "EvoDiff-D3PM-Uniform 38M":
|
76 |
checkpoint = D3PM_UNIFORM_38M(return_all=True)
|
77 |
model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
|
78 |
-
tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')
|
79 |
|
80 |
if pred_structure:
|
81 |
path_to_pdb = predict_protein(generated_sequence)
|
@@ -85,11 +85,11 @@ def make_uncond_seq(seq_len, model_type, pred_structure):
|
|
85 |
else:
|
86 |
return generated_sequence, None
|
87 |
|
88 |
-
def make_cond_seq(seq_len, msa_file, model_type, pred_structure):
|
89 |
if model_type == "EvoDiff-MSA":
|
90 |
checkpoint = MSA_OA_DM_MAXSUB()
|
91 |
model, collater, tokenizer, scheme = checkpoint
|
92 |
-
tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, n_sequences
|
93 |
|
94 |
if pred_structure:
|
95 |
path_to_pdb = predict_protein(generated_sequence)
|
@@ -103,7 +103,7 @@ def make_inpainted_idrs(sequence, start_idx, end_idx, model_type, pred_structure
|
|
103 |
if model_type == "EvoDiff-Seq":
|
104 |
checkpoint = OA_DM_38M()
|
105 |
model, collater, tokenizer, scheme = checkpoint
|
106 |
-
sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, start_idx, end_idx, tokenizer=tokenizer, device='cpu')
|
107 |
|
108 |
generated_idr_output = {
|
109 |
"original_sequence": sequence,
|
@@ -125,6 +125,8 @@ def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_ty
|
|
125 |
checkpoint = OA_DM_38M()
|
126 |
model, collater, tokenizer, scheme = checkpoint
|
127 |
data_top_dir = './'
|
|
|
|
|
128 |
generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device='cpu')
|
129 |
|
130 |
generated_scaffold_output = {
|
@@ -162,6 +164,7 @@ csg_app = gr.Interface(
|
|
162 |
inputs=[
|
163 |
gr.Slider(10, 100, label = "Sequence Length"),
|
164 |
gr.File(file_types=["a3m"], label = "MSA File"),
|
|
|
165 |
gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
|
166 |
gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
|
167 |
],
|
@@ -178,8 +181,8 @@ idr_app = gr.Interface(
|
|
178 |
fn=make_inpainted_idrs,
|
179 |
inputs=[
|
180 |
gr.Textbox(placeholder="DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI", label = "Sequence"),
|
181 |
-
gr.Number(value=20, placeholder=20, label = "Start Index"),
|
182 |
-
gr.Number(value=50, placeholder=50, label = "End Index"),
|
183 |
gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model"),
|
184 |
gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
|
185 |
],
|
@@ -197,7 +200,7 @@ scaffold_app = gr.Interface(
|
|
197 |
gr.Textbox(placeholder="1prw", label = "PDB Code"),
|
198 |
gr.Textbox(value="[15, 51]", placeholder="[15, 51]", label = "Start Index (as list)"),
|
199 |
gr.Textbox(value="[34, 70]", placeholder="[34, 70]", label = "End Index (as list)"),
|
200 |
-
gr.Number(value=75, placeholder=75, label = "Scaffold Length"),
|
201 |
gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model"),
|
202 |
gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
|
203 |
],
|
|
|
70 |
if model_type == "EvoDiff-Seq-OADM 38M":
|
71 |
checkpoint = OA_DM_38M()
|
72 |
model, collater, tokenizer, scheme = checkpoint
|
73 |
+
tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, int(seq_len), batch_size=1, device='cpu')
|
74 |
|
75 |
if model_type == "EvoDiff-D3PM-Uniform 38M":
|
76 |
checkpoint = D3PM_UNIFORM_38M(return_all=True)
|
77 |
model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
|
78 |
+
tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device='cpu')
|
79 |
|
80 |
if pred_structure:
|
81 |
path_to_pdb = predict_protein(generated_sequence)
|
|
|
85 |
else:
|
86 |
return generated_sequence, None
|
87 |
|
88 |
+
def make_cond_seq(seq_len, msa_file, n_sequences, model_type, pred_structure):
|
89 |
if model_type == "EvoDiff-MSA":
|
90 |
checkpoint = MSA_OA_DM_MAXSUB()
|
91 |
model, collater, tokenizer, scheme = checkpoint
|
92 |
+
tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device='cpu', selection_type='random')
|
93 |
|
94 |
if pred_structure:
|
95 |
path_to_pdb = predict_protein(generated_sequence)
|
|
|
103 |
if model_type == "EvoDiff-Seq":
|
104 |
checkpoint = OA_DM_38M()
|
105 |
model, collater, tokenizer, scheme = checkpoint
|
106 |
+
sample, entire_sequence, generated_idr = inpaint_simple(model, sequence, int(start_idx), int(end_idx), tokenizer=tokenizer, device='cpu')
|
107 |
|
108 |
generated_idr_output = {
|
109 |
"original_sequence": sequence,
|
|
|
125 |
checkpoint = OA_DM_38M()
|
126 |
model, collater, tokenizer, scheme = checkpoint
|
127 |
data_top_dir = './'
|
128 |
+
start_idx = list(map(int, start_idx.strip('][').split(', ')))
|
129 |
+
end_idx = list(map(int, end_idx.strip('][').split(', ')))
|
130 |
generated_sequence, new_start_idx, new_end_idx = generate_scaffold(model, pdb_code, start_idx, end_idx, scaffold_length, data_top_dir, tokenizer, device='cpu')
|
131 |
|
132 |
generated_scaffold_output = {
|
|
|
164 |
inputs=[
|
165 |
gr.Slider(10, 100, label = "Sequence Length"),
|
166 |
gr.File(file_types=["a3m"], label = "MSA File"),
|
167 |
+
gr.Number(value=1, placeholder=1, precision=0, label = "Number of Sequences")
|
168 |
gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
|
169 |
gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
|
170 |
],
|
|
|
181 |
fn=make_inpainted_idrs,
|
182 |
inputs=[
|
183 |
gr.Textbox(placeholder="DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI", label = "Sequence"),
|
184 |
+
gr.Number(value=20, placeholder=20, precision=0, label = "Start Index"),
|
185 |
+
gr.Number(value=50, placeholder=50, precision=0, label = "End Index"),
|
186 |
gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model"),
|
187 |
gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
|
188 |
],
|
|
|
200 |
gr.Textbox(placeholder="1prw", label = "PDB Code"),
|
201 |
gr.Textbox(value="[15, 51]", placeholder="[15, 51]", label = "Start Index (as list)"),
|
202 |
gr.Textbox(value="[34, 70]", placeholder="[34, 70]", label = "End Index (as list)"),
|
203 |
+
gr.Number(value=75, placeholder=75, precision=0, label = "Scaffold Length"),
|
204 |
gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model"),
|
205 |
gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
|
206 |
],
|