colbyford commited on
Commit
6bc4f0e
1 Parent(s): d7676d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -112
app.py CHANGED
@@ -7,66 +7,8 @@ from evodiff.generate import generate_oaardm, generate_d3pm
7
  from evodiff.generate_msa import generate_query_oadm_msa_simple
8
  from evodiff.conditional_generation import inpaint_simple, generate_scaffold
9
 
10
- import py3Dmol
11
- from colabfold.download import download_alphafold_params
12
- from colabfold.batch import run
13
-
14
- def a3m_file(file):
15
- return "tmp.a3m"
16
-
17
- def predict_protein(sequence):
18
- download_alphafold_params("alphafold2_ptm", Path("."))
19
- results = run(
20
- queries=[('evodiff_protein', sequence, None)],
21
- result_dir='evodiff_protein',
22
- use_templates=False,
23
- num_relax=0,
24
- msa_mode="mmseqs2_uniref_env",
25
- model_type="alphafold2_ptm",
26
- num_models=1,
27
- num_recycles=1,
28
- model_order=[1],
29
- is_complex=False,
30
- data_dir=Path("."),
31
- keep_existing_results=False,
32
- rank_by="auto",
33
- stop_at_score=float(100),
34
- zip_results=False,
35
- user_agent="colabfold/google-colab-main"
36
- )
37
-
38
- return f"evodiff_protein/evodiff_protein_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb"
39
-
40
- def display_pdb(path_to_pdb):
41
- '''
42
- #function to display pdb in py3dmol
43
- SOURCE: https://huggingface.co/spaces/merle/PROTEIN_GENERATOR/blob/main/app.py
44
- '''
45
- pdb = open(path_to_pdb, "r").read()
46
-
47
- view = py3Dmol.view(width=500, height=500)
48
- view.addModel(pdb, "pdb")
49
- view.setStyle({'model': -1}, {"cartoon": {'colorscheme':{'prop':'b','gradient':'roygb','min':0,'max':1}}})#'linear', 'min': 0, 'max': 1, 'colors': ["#ff9ef0","#a903fc",]}}})
50
- view.zoomTo()
51
- output = view._make_html().replace("'", '"')
52
- print(view._make_html())
53
- x = f"""<!DOCTYPE html><html></center> {output} </center></html>""" # do not use ' in this input
54
-
55
- return f"""<iframe height="500px" width="100%" name="result" allow="midi; geolocation; microphone; camera;
56
- display-capture; encrypted-media;" sandbox="allow-modals allow-forms
57
- allow-scripts allow-same-origin allow-popups
58
- allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
59
- allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
60
-
61
- '''
62
- return f"""<iframe style="width: 100%; height:700px" name="result" allow="midi; geolocation; microphone; camera;
63
- display-capture; encrypted-media;" sandbox="allow-modals allow-forms
64
- allow-scripts allow-same-origin allow-popups
65
- allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
66
- allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
67
- '''
68
-
69
- def make_uncond_seq(seq_len, model_type, pred_structure):
70
  if model_type == "EvoDiff-Seq-OADM 38M":
71
  checkpoint = OA_DM_38M()
72
  model, collater, tokenizer, scheme = checkpoint
@@ -77,29 +19,17 @@ def make_uncond_seq(seq_len, model_type, pred_structure):
77
  model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
78
  tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device='cpu')
79
 
80
- if pred_structure:
81
- path_to_pdb = predict_protein(generated_sequence)
82
- molhtml = display_pdb(path_to_pdb)
83
-
84
- return generated_sequence, molhtml
85
- else:
86
- return generated_sequence, None
87
 
88
- def make_cond_seq(seq_len, msa_file, n_sequences, model_type, pred_structure):
89
  if model_type == "EvoDiff-MSA":
90
  checkpoint = MSA_OA_DM_MAXSUB()
91
  model, collater, tokenizer, scheme = checkpoint
92
  tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device='cpu', selection_type='random')
93
 
94
- if pred_structure:
95
- path_to_pdb = predict_protein(generated_sequence)
96
- molhtml = display_pdb(path_to_pdb)
97
-
98
- return generated_sequence, molhtml
99
- else:
100
- return generated_sequence, None
101
 
102
- def make_inpainted_idrs(sequence, start_idx, end_idx, model_type, pred_structure):
103
  if model_type == "EvoDiff-Seq":
104
  checkpoint = OA_DM_38M()
105
  model, collater, tokenizer, scheme = checkpoint
@@ -112,15 +42,9 @@ def make_inpainted_idrs(sequence, start_idx, end_idx, model_type, pred_structure
112
  "generated_region": generated_idr
113
  }
114
 
115
- if pred_structure:
116
- path_to_pdb = predict_protein(entire_sequence)
117
- molhtml = display_pdb(path_to_pdb)
118
-
119
- return generated_idr_output, molhtml
120
- else:
121
- return generated_idr_output, None
122
 
123
- def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_type, pred_structure):
124
  if model_type == "EvoDiff-Seq":
125
  checkpoint = OA_DM_38M()
126
  model, collater, tokenizer, scheme = checkpoint
@@ -135,26 +59,16 @@ def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_ty
135
  "new_end_index": new_end_idx
136
  }
137
 
138
- return generated_scaffold_output, None
139
- # if pred_structure:
140
- # # path_to_pdb = predict_protein(generated_sequence)
141
- # path_to_pdb = f"scaffolding-pdbs/{pdb_code}.pdb"
142
- # molhtml = display_pdb(path_to_pdb)
143
-
144
- # return generated_scaffold_output, molhtml
145
- # else:
146
- # return generated_scaffold_output, None
147
 
148
  usg_app = gr.Interface(
149
  fn=make_uncond_seq,
150
  inputs=[
151
- gr.Slider(10, 100, step=1, label = "Sequence Length"),
152
- gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model"),
153
- gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
154
  ],
155
  outputs=[
156
- "text",
157
- gr.HTML()
158
  ],
159
  title = "Unconditional sequence generation",
160
  description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
@@ -163,15 +77,13 @@ usg_app = gr.Interface(
163
  csg_app = gr.Interface(
164
  fn=make_cond_seq,
165
  inputs=[
166
- gr.Slider(10, 100, label = "Sequence Length"),
167
  gr.File(file_types=["a3m"], label = "MSA File"),
168
- gr.Number(value=1, placeholder=1, precision=0, label = "Number of Sequences"),
169
- gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
170
- gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
171
  ],
172
  outputs=[
173
- "text",
174
- gr.HTML()
175
  ],
176
  # examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]],
177
  title = "Conditional sequence generation",
@@ -184,12 +96,10 @@ idr_app = gr.Interface(
184
  gr.Textbox(placeholder="DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI", label = "Sequence"),
185
  gr.Number(value=20, placeholder=20, precision=0, label = "Start Index"),
186
  gr.Number(value=50, placeholder=50, precision=0, label = "End Index"),
187
- gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model"),
188
- gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
189
  ],
190
  outputs=[
191
- "text",
192
- gr.HTML()
193
  ],
194
  title = "Inpainting IDRs",
195
  description="Inpaining a new region inside a given sequence using the `EvoDiff-Seq` model."
@@ -202,12 +112,10 @@ scaffold_app = gr.Interface(
202
  gr.Textbox(value="[15, 51]", placeholder="[15, 51]", label = "Start Index (as list)"),
203
  gr.Textbox(value="[34, 70]", placeholder="[34, 70]", label = "End Index (as list)"),
204
  gr.Number(value=75, placeholder=75, precision=0, label = "Scaffold Length"),
205
- gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model"),
206
- gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
207
  ],
208
  outputs=[
209
- "text",
210
- gr.HTML()
211
  ],
212
  title = "Scaffolding functional motifs",
213
  description="Scaffolding a new functional motif inside a given PDB structure using the `EvoDiff-Seq` model."
 
7
  from evodiff.generate_msa import generate_query_oadm_msa_simple
8
  from evodiff.conditional_generation import inpaint_simple, generate_scaffold
9
 
10
+
11
+ def make_uncond_seq(seq_len, model_type):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  if model_type == "EvoDiff-Seq-OADM 38M":
13
  checkpoint = OA_DM_38M()
14
  model, collater, tokenizer, scheme = checkpoint
 
19
  model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
20
  tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, int(seq_len), batch_size=1, device='cpu')
21
 
22
+ return generated_sequence
 
 
 
 
 
 
23
 
24
+ def make_cond_seq(seq_len, msa_file, n_sequences, model_type):
25
  if model_type == "EvoDiff-MSA":
26
  checkpoint = MSA_OA_DM_MAXSUB()
27
  model, collater, tokenizer, scheme = checkpoint
28
  tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, int(n_sequences), seq_length=int(seq_len), device='cpu', selection_type='random')
29
 
30
+ return generated_sequence
 
 
 
 
 
 
31
 
32
+ def make_inpainted_idrs(sequence, start_idx, end_idx, model_type):
33
  if model_type == "EvoDiff-Seq":
34
  checkpoint = OA_DM_38M()
35
  model, collater, tokenizer, scheme = checkpoint
 
42
  "generated_region": generated_idr
43
  }
44
 
45
+ return generated_idr_output
 
 
 
 
 
 
46
 
47
+ def make_scaffold_motifs(pdb_code, start_idx, end_idx, scaffold_length, model_type):
48
  if model_type == "EvoDiff-Seq":
49
  checkpoint = OA_DM_38M()
50
  model, collater, tokenizer, scheme = checkpoint
 
59
  "new_end_index": new_end_idx
60
  }
61
 
62
+ return generated_scaffold_output
 
 
 
 
 
 
 
 
63
 
64
  usg_app = gr.Interface(
65
  fn=make_uncond_seq,
66
  inputs=[
67
+ gr.Slider(10, 250, step=1, label = "Sequence Length"),
68
+ gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model")
 
69
  ],
70
  outputs=[
71
+ "text"
 
72
  ],
73
  title = "Unconditional sequence generation",
74
  description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
 
77
  csg_app = gr.Interface(
78
  fn=make_cond_seq,
79
  inputs=[
80
+ gr.Slider(10, 250, label = "Sequence Length"),
81
  gr.File(file_types=["a3m"], label = "MSA File"),
82
+ gr.Number(value=64, placeholder=64, precision=0, label = "Number of Sequences to Sample"),
83
+ gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model")
 
84
  ],
85
  outputs=[
86
+ "text"
 
87
  ],
88
  # examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]],
89
  title = "Conditional sequence generation",
 
96
  gr.Textbox(placeholder="DQTERTVRSFEGRRTAPYLDSRNVLTIGYGHLLNRPGANKSWEGRLTSALPREFKQRLTELAASQLHETDVRLATARAQALYGSGAYFESVPVSLNDLWFDSVFNLGERKLLNWSGLRTKLESRDWGAAAKDLGRHTFGREPVSRRMAESMRMRRGIDLNHYNI", label = "Sequence"),
97
  gr.Number(value=20, placeholder=20, precision=0, label = "Start Index"),
98
  gr.Number(value=50, placeholder=50, precision=0, label = "End Index"),
99
+ gr.Dropdown(["EvoDiff-Seq"], value="EvoDiff-Seq", type="value", label = "Model")
 
100
  ],
101
  outputs=[
102
+ "text"
 
103
  ],
104
  title = "Inpainting IDRs",
105
  description="Inpaining a new region inside a given sequence using the `EvoDiff-Seq` model."
 
112
  gr.Textbox(value="[15, 51]", placeholder="[15, 51]", label = "Start Index (as list)"),
113
  gr.Textbox(value="[34, 70]", placeholder="[34, 70]", label = "End Index (as list)"),
114
  gr.Number(value=75, placeholder=75, precision=0, label = "Scaffold Length"),
115
+ gr.Dropdown(["EvoDiff-Seq", "EvoDiff-MSA"], value="EvoDiff-Seq", type="value", label = "Model")
 
116
  ],
117
  outputs=[
118
+ "text"
 
119
  ],
120
  title = "Scaffolding functional motifs",
121
  description="Scaffolding a new functional motif inside a given PDB structure using the `EvoDiff-Seq` model."