Spaces:

michellemli
/

PINNACLE

Running

App Files Files Community

PINNACLE / app.py

michellemli

Update publication link

89649f4 verified 11 months ago

raw

history blame contribute delete

9.2 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.express as px
	from collections import Counter


	css_colors = ["darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dodgerblue", "firebrick", "coral", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "aqua", "aquamarine", "azure", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgreen", "darkkhaki", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "green", "greenyellow", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", "lightcyan", "lightgoldenrodyellow", "lightgreen", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", "powderblue", "purple", "red", "rosybrown", "royalblue", "rebeccapurple", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", "slateblue", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"] # "darkgray", "darkgrey", "slategray", "slategrey", "lightslategray", "lightslategrey", "lightgray", "lightgrey", "gray", "grey", "dimgray", "dimgrey", "darkslategray", "darkslategrey", "aliceblue", "black", "beige", "antiquewhite", "bisque", "blanchedalmond",

	# Read data
	data = []
	with open("data/inventory.txt", "r") as fin:
	for f in fin:
	c_data = pd.read_csv(f.strip(), sep = "\t")
	data.append(c_data)
	data = pd.concat(data)
	unique_celltypes = sorted([c for c in data["Celltype"].unique() if "CCI" not in c and "BTO" not in c])

	max_safe_scores = pd.read_csv("data/max_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Max SAFE Score", "Label": "Celltype"})
	mean_safe_scores = pd.read_csv("data/mean_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Mean SAFE Score", "Label": "Celltype"})
	neighborhood_enrichment = pd.read_csv("data/safe_neighborhoods_enriched.csv", sep = "\t").rename(columns = {"Label": "Celltype"})
	safe_scores = max_safe_scores.merge(mean_safe_scores, on = "Celltype")
	safe_scores = safe_scores.merge(neighborhood_enrichment, on = "Celltype")
	print(safe_scores)

	# Helper functions
	def plot_protein_emb(protein):
	hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False, "Selected": False}
	p_data = data.copy()
	p_data["Selected"] = [c if p == protein.lower() else "Not Selected" for p, c in zip(p_data["Name"].str.lower(), p_data["Celltype"].tolist())]
	p_data["Size"] = [1 if i == "Not Selected" else 10 for i in p_data["Selected"].tolist()]
	symbol_map = {s: "circle" if s == 1 else "star" for s in p_data["Size"].unique()}
	p_celltypes = p_data["Selected"].unique()
	color_map = {c: i for c, i in zip(p_celltypes, css_colors) if c != "Not Selected"}
	color_map.update({"Not Selected": "lightgrey"})

	fig = px.scatter(p_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, symbol = "Size", symbol_map = symbol_map, size = "Size", opacity = 0.8, hover_data = hover_keys)
	fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
	fig.update_xaxes(title_text = "", showticklabels = False)
	fig.update_yaxes(title_text = "", showticklabels = False)
	fig.update_layout(showlegend = False)
	fig.update_traces(marker=dict(line=dict(width=0)))

	protein_context_df = p_data[p_data["Selected"] != "Not Selected"][["Name", "Celltype", "x", "y"]]

	return fig, protein_context_df


	def get_protein_counts(df):
	counts = Counter(df["Celltype"].tolist())
	df = pd.DataFrame({"Celltype": list(counts.keys()), "Activated Proteins": list(counts.values())})
	df = df.sort_values(by = "Celltype")
	df = df.merge(safe_scores, on = "Celltype")
	print(df)
	return df


	def plot_celltype_emb(celltype):
	hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False}
	if "All" in celltype:
	fig = px.scatter(data, x = "x", y = "y", color = "Celltype", opacity = 0.4, hover_data = hover_keys)
	activated_proteins_df = get_protein_counts(data)
	else:
	hover_keys.update({"Selected": False})
	c_data = data.copy()
	celltype = [c.lower() for c in celltype]

	color_map = {c: i for c, i in zip(celltype, css_colors)}
	color_map.update({"Not Selected": "lightgrey"})

	c_data["Selected"] = [c if c in celltype else "Not Selected" for c in c_data["Celltype"].tolist()]
	fig = px.scatter(c_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, opacity = 0.8, hover_data = hover_keys)

	activated_proteins_df = get_protein_counts(c_data[c_data["Selected"] != "Not Selected"])

	fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
	fig.update_xaxes(title_text = "", showticklabels = False)
	fig.update_yaxes(title_text = "", showticklabels = False)
	fig.update_layout(showlegend = False)
	return fig, activated_proteins_df


	# Create gradio interface
	with gr.Blocks() as demo:
	gr.Markdown('<center><h1>Contextualizing Protein Representations with PINNACLE</h1></center>')
	gr.Markdown('Protein interaction networks are a critical component to study the function and therapeutic potential of proteins. \
	However, accurately modeling protein interactions across diverse biological contexts, such as tissues and cell types, \
	remains a significant challenge for existing algorithms. Here, we introduce <b>PINNACLE</b>, a flexible geometric deep learning approach \
	that trains on contextualized protein interaction networks to generate context-aware protein representations. Leveraging a \
	multi-organ single cell transcriptomic atlas of humans, <b>PINNACLE provides 394,760 protein representations split across 156 cell-type \
	contexts from 24 tissues and organs</b>. Our contextualized protein representations, infused with cellular and tissue organization, \
	can easily be adapted for diverse downstream tasks.')
	gr.Markdown(' For more information, please check out our manuscript and documentation (links provided at the bottom of the page)!')

	with gr.Tabs():

	with gr.TabItem("Protein"):
	with gr.Column():
	gr.Markdown('<center><h3>Select protein of interest to examine across biological contexts</h3></center>')
	protein = gr.Textbox(info = "Enter a protein name (in HGNC symbol)", lines = 1, value = "TNF", label = "Protein")
	protein_submit_btn = gr.Button("Submit")

	gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
	protein_plot = gr.Plot()

	with gr.Accordion(label = "Protein Contexts", open = False):
	protein_context_df = gr.Dataframe(headers = ["Protein", "Celltype", "x", "y"], overflow_row_behaviour = "paginate")

	with gr.TabItem("Cell Type"):
	with gr.Column():
	gr.Markdown('<center><h3>Select biological context by specifying cell type of interest</h3></center>')
	celltype = gr.Dropdown(["All"] + unique_celltypes, info = "Please select from the following cell types.", value = ["All"], multiselect = True, label="Cell Type")
	celltype_submit_btn = gr.Button("Submit")

	gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
	celltype_plot = gr.Plot()

	with gr.Accordion(label = "Cell Type Context", open = False):
	activated_proteins_df = gr.Dataframe(headers = ["Celltype", "Activated Proteins"], overflow_row_behaviour = "paginate")

	gr.Markdown("<p style='text-align: center'><a href='https://github.com/mims-harvard/PINNACLE'>Github Repo</a>" \
	"\| <a href='https://zitniklab.hms.harvard.edu/projects/PINNACLE/'>Documentation</a> " \
	"\| <a href='https://www.nature.com/articles/s41592-024-02341-3/'>Publication</a></p>")

	protein_submit_btn.click(plot_protein_emb, inputs = [protein], outputs = [protein_plot, protein_context_df])
	celltype_submit_btn.click(plot_celltype_emb, inputs = [celltype], outputs = [celltype_plot, activated_proteins_df])


	# Launch
	if __name__ == "__main__":
	demo.launch()