xuyingli commited on
Commit
b8f7a8b
·
1 Parent(s): eb20a68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -54
app.py CHANGED
@@ -1,13 +1,14 @@
1
  import streamlit as st
2
  import torch
3
  import esm
 
4
  import matplotlib.pyplot as plt
5
  from myscaledb import Client
6
  import random
7
  from collections import Counter
8
  from tqdm import tqdm
9
  from statistics import mean
10
-
11
  import torch
12
  import matplotlib.pyplot as plt
13
  import numpy as np
@@ -17,8 +18,6 @@ from stmol import *
17
  import py3Dmol
18
  # from streamlit_3Dmol import component_3dmol
19
 
20
- import esm
21
-
22
  import scipy
23
  from sklearn.model_selection import GridSearchCV, train_test_split
24
  from sklearn.decomposition import PCA
@@ -102,6 +101,18 @@ def visualize_3D_Coordinates(coords):
102
  )
103
  return fig
104
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def esm_search(model, sequnce, batch_converter,top_k=5):
106
  data = [
107
  ("protein1", sequnce),
@@ -130,6 +141,19 @@ def esm_search(model, sequnce, batch_converter,top_k=5):
130
 
131
  return result_temp_seq
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  def KNN_search(sequence):
134
  model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
135
  batch_converter = alphabet.get_batch_converter()
@@ -390,53 +414,22 @@ else:
390
  st.text('search result (top 5): ')
391
  # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
392
  tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
393
- # option2 = st.radio('top5 sequence', (result_temp_seq[0],result_temp_seq[1],result_temp_seq[2],result_temp_seq[3],result_temp_seq[4]))
394
 
395
  with tab1:
396
  st.write(result_temp_seq[0])
397
- import random
398
- # print(random.randint(0,9))
399
- prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
400
- # protein=st.selectbox('select protein',prot_list)
401
- protein = prot_str[random.randint(14,18)]
402
- xyzview = py3Dmol.view(query='pdb:'+protein)
403
- xyzview.setStyle({'stick':{'color':'spectrum'}})
404
- showmol(xyzview, height = 500,width=800)
405
- # st.write(result_temp_seq[4])
406
  with tab2:
407
- import random
408
- # print(random.randint(0,9))
409
  st.write(result_temp_seq[1])
410
- prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
411
- # protein=st.selectbox('select protein',prot_list)
412
- protein = prot_str[random.randint(0,4)]
413
- xyzview = py3Dmol.view(query='pdb:'+protein)
414
- xyzview.setStyle({'stick':{'color':'spectrum'}})
415
- showmol(xyzview, height = 500,width=800)
416
  with tab3:
417
  st.write(result_temp_seq[2])
418
- prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
419
- # protein=st.selectbox('select protein',prot_list)
420
- protein = prot_str[random.randint(4,8)]
421
- xyzview = py3Dmol.view(query='pdb:'+protein)
422
- xyzview.setStyle({'stick':{'color':'spectrum'}})
423
- showmol(xyzview, height = 500,width=800)
424
  with tab4:
425
  st.write(result_temp_seq[3])
426
- prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
427
- # protein=st.selectbox('select protein',prot_list)
428
- protein = prot_str[random.randint(4,8)]
429
- xyzview = py3Dmol.view(query='pdb:'+protein)
430
- xyzview.setStyle({'stick':{'color':'spectrum'}})
431
- showmol(xyzview, height = 500,width=800)
432
  with tab5:
433
  st.write(result_temp_seq[4])
434
- prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
435
- # protein=st.selectbox('select protein',prot_list)
436
- protein = prot_str[random.randint(4,8)]
437
- xyzview = py3Dmol.view(query='pdb:'+protein)
438
- xyzview.setStyle({'stick':{'color':'spectrum'}})
439
- showmol(xyzview, height = 500,width=800)
440
 
441
 
442
  elif option == 'activity prediction':
@@ -473,18 +466,4 @@ else:
473
  expander.markdown("""
474
  A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
475
  see https://www.rcsb.org/ for more information.
476
- """)
477
-
478
-
479
-
480
-
481
-
482
-
483
-
484
-
485
-
486
-
487
-
488
-
489
-
490
-
 
1
  import streamlit as st
2
  import torch
3
  import esm
4
+ import requests
5
  import matplotlib.pyplot as plt
6
  from myscaledb import Client
7
  import random
8
  from collections import Counter
9
  from tqdm import tqdm
10
  from statistics import mean
11
+ import biotite.structure.io as bsio
12
  import torch
13
  import matplotlib.pyplot as plt
14
  import numpy as np
 
18
  import py3Dmol
19
  # from streamlit_3Dmol import component_3dmol
20
 
 
 
21
  import scipy
22
  from sklearn.model_selection import GridSearchCV, train_test_split
23
  from sklearn.decomposition import PCA
 
101
  )
102
  return fig
103
 
104
+ def render_mol(pdb):
105
+ pdbview = py3Dmol.view()
106
+ pdbview.addModel(pdb,'pdb')
107
+ pdbview.setStyle({'cartoon':{'color':'spectrum'}})
108
+ pdbview.setBackgroundColor('white')#('0xeeeeee')
109
+ pdbview.zoomTo()
110
+ pdbview.zoom(2, 800)
111
+ pdbview.spin(True)
112
+ showmol(pdbview, height = 500,width=800)
113
+
114
+
115
+
116
  def esm_search(model, sequnce, batch_converter,top_k=5):
117
  data = [
118
  ("protein1", sequnce),
 
141
 
142
  return result_temp_seq
143
 
144
+ def show_protein_structure(sequence):
145
+ headers = {
146
+ 'Content-Type': 'application/x-www-form-urlencoded',
147
+ }
148
+ response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', headers=headers, data=sequence)
149
+ name = sequence[:3] + sequence[-3:]
150
+ pdb_string = response.content.decode('utf-8')
151
+ with open('predicted.pdb', 'w') as f:
152
+ f.write(pdb_string)
153
+ struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
154
+ b_value = round(struct.b_factor.mean(), 4)
155
+ render_mol(pdb_string)
156
+
157
  def KNN_search(sequence):
158
  model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
159
  batch_converter = alphabet.get_batch_converter()
 
414
  st.text('search result (top 5): ')
415
  # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
416
  tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
 
417
 
418
  with tab1:
419
  st.write(result_temp_seq[0])
420
+ show_protein_structure(result_temp_seq[0])
 
 
 
 
 
 
 
 
421
  with tab2:
 
 
422
  st.write(result_temp_seq[1])
423
+ show_protein_structure(result_temp_seq[1])
 
 
 
 
 
424
  with tab3:
425
  st.write(result_temp_seq[2])
426
+ show_protein_structure(result_temp_seq[2])
 
 
 
 
 
427
  with tab4:
428
  st.write(result_temp_seq[3])
429
+ show_protein_structure(result_temp_seq[3])
 
 
 
 
 
430
  with tab5:
431
  st.write(result_temp_seq[4])
432
+ show_protein_structure(result_temp_seq[4])
 
 
 
 
 
433
 
434
 
435
  elif option == 'activity prediction':
 
466
  expander.markdown("""
467
  A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
468
  see https://www.rcsb.org/ for more information.
469
+ """)