Spaces:
Runtime error
Runtime error
xuyingli
commited on
Commit
·
b8f7a8b
1
Parent(s):
eb20a68
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
import streamlit as st
|
2 |
import torch
|
3 |
import esm
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from myscaledb import Client
|
6 |
import random
|
7 |
from collections import Counter
|
8 |
from tqdm import tqdm
|
9 |
from statistics import mean
|
10 |
-
|
11 |
import torch
|
12 |
import matplotlib.pyplot as plt
|
13 |
import numpy as np
|
@@ -17,8 +18,6 @@ from stmol import *
|
|
17 |
import py3Dmol
|
18 |
# from streamlit_3Dmol import component_3dmol
|
19 |
|
20 |
-
import esm
|
21 |
-
|
22 |
import scipy
|
23 |
from sklearn.model_selection import GridSearchCV, train_test_split
|
24 |
from sklearn.decomposition import PCA
|
@@ -102,6 +101,18 @@ def visualize_3D_Coordinates(coords):
|
|
102 |
)
|
103 |
return fig
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
def esm_search(model, sequnce, batch_converter,top_k=5):
|
106 |
data = [
|
107 |
("protein1", sequnce),
|
@@ -130,6 +141,19 @@ def esm_search(model, sequnce, batch_converter,top_k=5):
|
|
130 |
|
131 |
return result_temp_seq
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
def KNN_search(sequence):
|
134 |
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
|
135 |
batch_converter = alphabet.get_batch_converter()
|
@@ -390,53 +414,22 @@ else:
|
|
390 |
st.text('search result (top 5): ')
|
391 |
# tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
|
392 |
tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
|
393 |
-
# option2 = st.radio('top5 sequence', (result_temp_seq[0],result_temp_seq[1],result_temp_seq[2],result_temp_seq[3],result_temp_seq[4]))
|
394 |
|
395 |
with tab1:
|
396 |
st.write(result_temp_seq[0])
|
397 |
-
|
398 |
-
# print(random.randint(0,9))
|
399 |
-
prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
|
400 |
-
# protein=st.selectbox('select protein',prot_list)
|
401 |
-
protein = prot_str[random.randint(14,18)]
|
402 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
403 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
404 |
-
showmol(xyzview, height = 500,width=800)
|
405 |
-
# st.write(result_temp_seq[4])
|
406 |
with tab2:
|
407 |
-
import random
|
408 |
-
# print(random.randint(0,9))
|
409 |
st.write(result_temp_seq[1])
|
410 |
-
|
411 |
-
# protein=st.selectbox('select protein',prot_list)
|
412 |
-
protein = prot_str[random.randint(0,4)]
|
413 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
414 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
415 |
-
showmol(xyzview, height = 500,width=800)
|
416 |
with tab3:
|
417 |
st.write(result_temp_seq[2])
|
418 |
-
|
419 |
-
# protein=st.selectbox('select protein',prot_list)
|
420 |
-
protein = prot_str[random.randint(4,8)]
|
421 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
422 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
423 |
-
showmol(xyzview, height = 500,width=800)
|
424 |
with tab4:
|
425 |
st.write(result_temp_seq[3])
|
426 |
-
|
427 |
-
# protein=st.selectbox('select protein',prot_list)
|
428 |
-
protein = prot_str[random.randint(4,8)]
|
429 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
430 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
431 |
-
showmol(xyzview, height = 500,width=800)
|
432 |
with tab5:
|
433 |
st.write(result_temp_seq[4])
|
434 |
-
|
435 |
-
# protein=st.selectbox('select protein',prot_list)
|
436 |
-
protein = prot_str[random.randint(4,8)]
|
437 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
438 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
439 |
-
showmol(xyzview, height = 500,width=800)
|
440 |
|
441 |
|
442 |
elif option == 'activity prediction':
|
@@ -473,18 +466,4 @@ else:
|
|
473 |
expander.markdown("""
|
474 |
A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
|
475 |
see https://www.rcsb.org/ for more information.
|
476 |
-
""")
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
|
|
1 |
import streamlit as st
|
2 |
import torch
|
3 |
import esm
|
4 |
+
import requests
|
5 |
import matplotlib.pyplot as plt
|
6 |
from myscaledb import Client
|
7 |
import random
|
8 |
from collections import Counter
|
9 |
from tqdm import tqdm
|
10 |
from statistics import mean
|
11 |
+
import biotite.structure.io as bsio
|
12 |
import torch
|
13 |
import matplotlib.pyplot as plt
|
14 |
import numpy as np
|
|
|
18 |
import py3Dmol
|
19 |
# from streamlit_3Dmol import component_3dmol
|
20 |
|
|
|
|
|
21 |
import scipy
|
22 |
from sklearn.model_selection import GridSearchCV, train_test_split
|
23 |
from sklearn.decomposition import PCA
|
|
|
101 |
)
|
102 |
return fig
|
103 |
|
104 |
+
def render_mol(pdb):
|
105 |
+
pdbview = py3Dmol.view()
|
106 |
+
pdbview.addModel(pdb,'pdb')
|
107 |
+
pdbview.setStyle({'cartoon':{'color':'spectrum'}})
|
108 |
+
pdbview.setBackgroundColor('white')#('0xeeeeee')
|
109 |
+
pdbview.zoomTo()
|
110 |
+
pdbview.zoom(2, 800)
|
111 |
+
pdbview.spin(True)
|
112 |
+
showmol(pdbview, height = 500,width=800)
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
def esm_search(model, sequnce, batch_converter,top_k=5):
|
117 |
data = [
|
118 |
("protein1", sequnce),
|
|
|
141 |
|
142 |
return result_temp_seq
|
143 |
|
144 |
+
def show_protein_structure(sequence):
|
145 |
+
headers = {
|
146 |
+
'Content-Type': 'application/x-www-form-urlencoded',
|
147 |
+
}
|
148 |
+
response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', headers=headers, data=sequence)
|
149 |
+
name = sequence[:3] + sequence[-3:]
|
150 |
+
pdb_string = response.content.decode('utf-8')
|
151 |
+
with open('predicted.pdb', 'w') as f:
|
152 |
+
f.write(pdb_string)
|
153 |
+
struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
|
154 |
+
b_value = round(struct.b_factor.mean(), 4)
|
155 |
+
render_mol(pdb_string)
|
156 |
+
|
157 |
def KNN_search(sequence):
|
158 |
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
|
159 |
batch_converter = alphabet.get_batch_converter()
|
|
|
414 |
st.text('search result (top 5): ')
|
415 |
# tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
|
416 |
tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
|
|
|
417 |
|
418 |
with tab1:
|
419 |
st.write(result_temp_seq[0])
|
420 |
+
show_protein_structure(result_temp_seq[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
with tab2:
|
|
|
|
|
422 |
st.write(result_temp_seq[1])
|
423 |
+
show_protein_structure(result_temp_seq[1])
|
|
|
|
|
|
|
|
|
|
|
424 |
with tab3:
|
425 |
st.write(result_temp_seq[2])
|
426 |
+
show_protein_structure(result_temp_seq[2])
|
|
|
|
|
|
|
|
|
|
|
427 |
with tab4:
|
428 |
st.write(result_temp_seq[3])
|
429 |
+
show_protein_structure(result_temp_seq[3])
|
|
|
|
|
|
|
|
|
|
|
430 |
with tab5:
|
431 |
st.write(result_temp_seq[4])
|
432 |
+
show_protein_structure(result_temp_seq[4])
|
|
|
|
|
|
|
|
|
|
|
433 |
|
434 |
|
435 |
elif option == 'activity prediction':
|
|
|
466 |
expander.markdown("""
|
467 |
A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
|
468 |
see https://www.rcsb.org/ for more information.
|
469 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|