CPT / app.py
cz-ye's picture
Update app.py
d220a3f
import streamlit as st
import pandas as pd
from os import listdir
import plotly.graph_objects as go
gene_names_eve = [file.split('_')[0] for file in listdir('./CPT1_score_EVE_set/')]
gene_names_no_eve1 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_1/')]
gene_names_no_eve2 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_2/')]
st.subheader('CPT-1')
st.markdown('Cross-protein transfer learning for variant effect prediction')
st.markdown('This is a lookup tool for the variant effect preditions of CPT-1 for 18,602 human proteins, initially released with the manuscript "Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects (2022)".')
# Input query gene
gene = st.selectbox(
'Which gene/protein are you interested in? (in UniProt gene names)',
gene_names_eve + gene_names_no_eve1 + gene_names_no_eve2)
if st.button('Show results'):
# Read the gene file
if gene in gene_names_eve:
pred = pd.read_csv('./CPT1_score_EVE_set/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
elif gene in gene_names_no_eve1:
pred = pd.read_csv('./CPT1_score_no_EVE_set_1/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
else:
pred = pd.read_csv('./CPT1_score_no_EVE_set_2/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
# Plot heatmap
mat = pred.copy()
mat['Mutant amino acid'] = mat['mutant'].str[-1]
mat['Position on protein sequence'] = mat['mutant'].str[:-1]
mat = mat.set_index('mutant')
mat['Position on protein sequence'] = pd.Categorical(
mat['Position on protein sequence'],
categories = mat['Position on protein sequence'].unique(),
ordered = True)
mat = mat.pivot(index='Mutant amino acid', columns='Position on protein sequence', values = mat.columns[0])
fig = go.Figure()
fig.add_trace(
go.Heatmap(z = mat, y = mat.index, x = mat.columns,
colorbar = dict(title='Variant effect'),
colorscale=[[0, '#6FA8DC'],
[0.3, '#CFE2F3'],
[0.5, '#FFFFFF'],
[0.7, '#F4CCCC'],
[1.0, '#BA1111']],
hovertemplate='Wild-type amino acid and position: %{x}\
<br>Mutant amino acid: %{y}\
<br>Prediction: %{z}<extra></extra>')
)
fig.update_layout(
title_text = "CPT-1 variant effect prediction for " + gene + " (red: pathogenic, blue: benign)",
xaxis = dict(
title = 'Position on protein sequence',
rangeslider = dict(visible=True)
),
yaxis = dict(
title = 'Mutant amino acid'
),
yaxis_nticks = mat.shape[0],
height = 600
)
st.plotly_chart(fig, theme = "streamlit", use_container_width = True, height = 600)
# Generate download link
st.download_button('Download CSV',
pred.set_index('mutant').to_csv().encode('utf-8'),
gene + '_CPT_score.csv',
'text/csv')
# Citation
st.markdown('</br>'
'<h5> Citation </h5>'
'Jagota, M.\*, Ye, C.\*, Rastogi, R., Albors, C., Koehl, A., Ioannidis, N., and Song, Y.S.&dagger;'
'"Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects", '
'bioRxiv (2022) </br>'
'\*These authors contributed equally to this work. </br>'
'&dagger; To whom correspondence should be addressed: [email protected] </br>'
'DOI: https://doi.org/10.1101/2022.11.15.516532',
unsafe_allow_html = True)