import gradio as gr import numpy as np import pandas as pd import plotly.express as px from collections import Counter css_colors = ["darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dodgerblue", "firebrick", "coral", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "aqua", "aquamarine", "azure", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgreen", "darkkhaki", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "green", "greenyellow", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", "lightcyan", "lightgoldenrodyellow", "lightgreen", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", "powderblue", "purple", "red", "rosybrown", "royalblue", "rebeccapurple", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", "slateblue", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"] # "darkgray", "darkgrey", "slategray", "slategrey", "lightslategray", "lightslategrey", "lightgray", "lightgrey", "gray", "grey", "dimgray", "dimgrey", "darkslategray", "darkslategrey", "aliceblue", "black", "beige", "antiquewhite", "bisque", "blanchedalmond", # Read data data = [] with open("data/inventory.txt", "r") as fin: for f in fin: c_data = pd.read_csv(f.strip(), sep = "\t") data.append(c_data) data = pd.concat(data) unique_celltypes = sorted([c for c in data["Celltype"].unique() if "CCI" not in c and "BTO" not in c]) max_safe_scores = pd.read_csv("data/max_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Max SAFE Score", "Label": "Celltype"}) mean_safe_scores = pd.read_csv("data/mean_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Mean SAFE Score", "Label": "Celltype"}) neighborhood_enrichment = pd.read_csv("data/safe_neighborhoods_enriched.csv", sep = "\t").rename(columns = {"Label": "Celltype"}) safe_scores = max_safe_scores.merge(mean_safe_scores, on = "Celltype") safe_scores = safe_scores.merge(neighborhood_enrichment, on = "Celltype") print(safe_scores) # Helper functions def plot_protein_emb(protein): hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False, "Selected": False} p_data = data.copy() p_data["Selected"] = [c if p == protein.lower() else "Not Selected" for p, c in zip(p_data["Name"].str.lower(), p_data["Celltype"].tolist())] p_data["Size"] = [1 if i == "Not Selected" else 10 for i in p_data["Selected"].tolist()] symbol_map = {s: "circle" if s == 1 else "star" for s in p_data["Size"].unique()} p_celltypes = p_data["Selected"].unique() color_map = {c: i for c, i in zip(p_celltypes, css_colors) if c != "Not Selected"} color_map.update({"Not Selected": "lightgrey"}) fig = px.scatter(p_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, symbol = "Size", symbol_map = symbol_map, size = "Size", opacity = 0.8, hover_data = hover_keys) fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"}) fig.update_xaxes(title_text = "", showticklabels = False) fig.update_yaxes(title_text = "", showticklabels = False) fig.update_layout(showlegend = False) fig.update_traces(marker=dict(line=dict(width=0))) protein_context_df = p_data[p_data["Selected"] != "Not Selected"][["Name", "Celltype", "x", "y"]] return fig, protein_context_df def get_protein_counts(df): counts = Counter(df["Celltype"].tolist()) df = pd.DataFrame({"Celltype": list(counts.keys()), "Activated Proteins": list(counts.values())}) df = df.sort_values(by = "Celltype") df = df.merge(safe_scores, on = "Celltype") print(df) return df def plot_celltype_emb(celltype): hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False} if "All" in celltype: fig = px.scatter(data, x = "x", y = "y", color = "Celltype", opacity = 0.4, hover_data = hover_keys) activated_proteins_df = get_protein_counts(data) else: hover_keys.update({"Selected": False}) c_data = data.copy() celltype = [c.lower() for c in celltype] color_map = {c: i for c, i in zip(celltype, css_colors)} color_map.update({"Not Selected": "lightgrey"}) c_data["Selected"] = [c if c in celltype else "Not Selected" for c in c_data["Celltype"].tolist()] fig = px.scatter(c_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, opacity = 0.8, hover_data = hover_keys) activated_proteins_df = get_protein_counts(c_data[c_data["Selected"] != "Not Selected"]) fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"}) fig.update_xaxes(title_text = "", showticklabels = False) fig.update_yaxes(title_text = "", showticklabels = False) fig.update_layout(showlegend = False) return fig, activated_proteins_df # Create gradio interface with gr.Blocks() as demo: gr.Markdown('

Contextualizing Protein Representations with PINNACLE

') gr.Markdown('Protein interaction networks are a critical component to study the function and therapeutic potential of proteins. \ However, accurately modeling protein interactions across diverse biological contexts, such as tissues and cell types, \ remains a significant challenge for existing algorithms. Here, we introduce PINNACLE, a flexible geometric deep learning approach \ that trains on contextualized protein interaction networks to generate context-aware protein representations. Leveraging a \ multi-organ single cell transcriptomic atlas of humans, PINNACLE provides 394,760 protein representations split across 156 cell-type \ contexts from 24 tissues and organs. Our contextualized protein representations, infused with cellular and tissue organization, \ can easily be adapted for diverse downstream tasks.') gr.Markdown(' For more information, please check out our manuscript and documentation (links provided at the bottom of the page)!') with gr.Tabs(): with gr.TabItem("Protein"): with gr.Column(): gr.Markdown('

Select protein of interest to examine across biological contexts

') protein = gr.Textbox(info = "Enter a protein name (in HGNC symbol)", lines = 1, value = "TNF", label = "Protein") protein_submit_btn = gr.Button("Submit") gr.Markdown('

Contextualized protein representations

') protein_plot = gr.Plot() with gr.Accordion(label = "Protein Contexts", open = False): protein_context_df = gr.Dataframe(headers = ["Protein", "Celltype", "x", "y"], overflow_row_behaviour = "paginate") with gr.TabItem("Cell Type"): with gr.Column(): gr.Markdown('

Select biological context by specifying cell type of interest

') celltype = gr.Dropdown(["All"] + unique_celltypes, info = "Please select from the following cell types.", value = ["All"], multiselect = True, label="Cell Type") celltype_submit_btn = gr.Button("Submit") gr.Markdown('

Contextualized protein representations

') celltype_plot = gr.Plot() with gr.Accordion(label = "Cell Type Context", open = False): activated_proteins_df = gr.Dataframe(headers = ["Celltype", "Activated Proteins"], overflow_row_behaviour = "paginate") gr.Markdown("

Github Repo" \ "| Documentation " \ "| Publication

") protein_submit_btn.click(plot_protein_emb, inputs = [protein], outputs = [protein_plot, protein_context_df]) celltype_submit_btn.click(plot_celltype_emb, inputs = [celltype], outputs = [celltype_plot, activated_proteins_df]) # Launch if __name__ == "__main__": demo.launch()