PINNACLE / app.py
michellemli's picture
Update publication link
89649f4 verified
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
from collections import Counter
css_colors = ["darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dodgerblue", "firebrick", "coral", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "aqua", "aquamarine", "azure", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgreen", "darkkhaki", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "green", "greenyellow", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", "lightcyan", "lightgoldenrodyellow", "lightgreen", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", "powderblue", "purple", "red", "rosybrown", "royalblue", "rebeccapurple", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", "slateblue", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"] # "darkgray", "darkgrey", "slategray", "slategrey", "lightslategray", "lightslategrey", "lightgray", "lightgrey", "gray", "grey", "dimgray", "dimgrey", "darkslategray", "darkslategrey", "aliceblue", "black", "beige", "antiquewhite", "bisque", "blanchedalmond",
# Read data
data = []
with open("data/inventory.txt", "r") as fin:
for f in fin:
c_data = pd.read_csv(f.strip(), sep = "\t")
data.append(c_data)
data = pd.concat(data)
unique_celltypes = sorted([c for c in data["Celltype"].unique() if "CCI" not in c and "BTO" not in c])
max_safe_scores = pd.read_csv("data/max_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Max SAFE Score", "Label": "Celltype"})
mean_safe_scores = pd.read_csv("data/mean_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Mean SAFE Score", "Label": "Celltype"})
neighborhood_enrichment = pd.read_csv("data/safe_neighborhoods_enriched.csv", sep = "\t").rename(columns = {"Label": "Celltype"})
safe_scores = max_safe_scores.merge(mean_safe_scores, on = "Celltype")
safe_scores = safe_scores.merge(neighborhood_enrichment, on = "Celltype")
print(safe_scores)
# Helper functions
def plot_protein_emb(protein):
hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False, "Selected": False}
p_data = data.copy()
p_data["Selected"] = [c if p == protein.lower() else "Not Selected" for p, c in zip(p_data["Name"].str.lower(), p_data["Celltype"].tolist())]
p_data["Size"] = [1 if i == "Not Selected" else 10 for i in p_data["Selected"].tolist()]
symbol_map = {s: "circle" if s == 1 else "star" for s in p_data["Size"].unique()}
p_celltypes = p_data["Selected"].unique()
color_map = {c: i for c, i in zip(p_celltypes, css_colors) if c != "Not Selected"}
color_map.update({"Not Selected": "lightgrey"})
fig = px.scatter(p_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, symbol = "Size", symbol_map = symbol_map, size = "Size", opacity = 0.8, hover_data = hover_keys)
fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
fig.update_xaxes(title_text = "", showticklabels = False)
fig.update_yaxes(title_text = "", showticklabels = False)
fig.update_layout(showlegend = False)
fig.update_traces(marker=dict(line=dict(width=0)))
protein_context_df = p_data[p_data["Selected"] != "Not Selected"][["Name", "Celltype", "x", "y"]]
return fig, protein_context_df
def get_protein_counts(df):
counts = Counter(df["Celltype"].tolist())
df = pd.DataFrame({"Celltype": list(counts.keys()), "Activated Proteins": list(counts.values())})
df = df.sort_values(by = "Celltype")
df = df.merge(safe_scores, on = "Celltype")
print(df)
return df
def plot_celltype_emb(celltype):
hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False}
if "All" in celltype:
fig = px.scatter(data, x = "x", y = "y", color = "Celltype", opacity = 0.4, hover_data = hover_keys)
activated_proteins_df = get_protein_counts(data)
else:
hover_keys.update({"Selected": False})
c_data = data.copy()
celltype = [c.lower() for c in celltype]
color_map = {c: i for c, i in zip(celltype, css_colors)}
color_map.update({"Not Selected": "lightgrey"})
c_data["Selected"] = [c if c in celltype else "Not Selected" for c in c_data["Celltype"].tolist()]
fig = px.scatter(c_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, opacity = 0.8, hover_data = hover_keys)
activated_proteins_df = get_protein_counts(c_data[c_data["Selected"] != "Not Selected"])
fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
fig.update_xaxes(title_text = "", showticklabels = False)
fig.update_yaxes(title_text = "", showticklabels = False)
fig.update_layout(showlegend = False)
return fig, activated_proteins_df
# Create gradio interface
with gr.Blocks() as demo:
gr.Markdown('<center><h1>Contextualizing Protein Representations with PINNACLE</h1></center>')
gr.Markdown('Protein interaction networks are a critical component to study the function and therapeutic potential of proteins. \
However, accurately modeling protein interactions across diverse biological contexts, such as tissues and cell types, \
remains a significant challenge for existing algorithms. Here, we introduce <b>PINNACLE</b>, a flexible geometric deep learning approach \
that trains on contextualized protein interaction networks to generate context-aware protein representations. Leveraging a \
multi-organ single cell transcriptomic atlas of humans, <b>PINNACLE provides 394,760 protein representations split across 156 cell-type \
contexts from 24 tissues and organs</b>. Our contextualized protein representations, infused with cellular and tissue organization, \
can easily be adapted for diverse downstream tasks.')
gr.Markdown(' For more information, please check out our manuscript and documentation (links provided at the bottom of the page)!')
with gr.Tabs():
with gr.TabItem("Protein"):
with gr.Column():
gr.Markdown('<center><h3>Select protein of interest to examine across biological contexts</h3></center>')
protein = gr.Textbox(info = "Enter a protein name (in HGNC symbol)", lines = 1, value = "TNF", label = "Protein")
protein_submit_btn = gr.Button("Submit")
gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
protein_plot = gr.Plot()
with gr.Accordion(label = "Protein Contexts", open = False):
protein_context_df = gr.Dataframe(headers = ["Protein", "Celltype", "x", "y"], overflow_row_behaviour = "paginate")
with gr.TabItem("Cell Type"):
with gr.Column():
gr.Markdown('<center><h3>Select biological context by specifying cell type of interest</h3></center>')
celltype = gr.Dropdown(["All"] + unique_celltypes, info = "Please select from the following cell types.", value = ["All"], multiselect = True, label="Cell Type")
celltype_submit_btn = gr.Button("Submit")
gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
celltype_plot = gr.Plot()
with gr.Accordion(label = "Cell Type Context", open = False):
activated_proteins_df = gr.Dataframe(headers = ["Celltype", "Activated Proteins"], overflow_row_behaviour = "paginate")
gr.Markdown("<p style='text-align: center'><a href='https://github.com/mims-harvard/PINNACLE'>Github Repo</a>" \
"| <a href='https://zitniklab.hms.harvard.edu/projects/PINNACLE/'>Documentation</a> " \
"| <a href='https://www.nature.com/articles/s41592-024-02341-3/'>Publication</a></p>")
protein_submit_btn.click(plot_protein_emb, inputs = [protein], outputs = [protein_plot, protein_context_df])
celltype_submit_btn.click(plot_celltype_emb, inputs = [celltype], outputs = [celltype_plot, activated_proteins_df])
# Launch
if __name__ == "__main__":
demo.launch()