Spaces:
Running
Running
File size: 9,203 Bytes
c7ced0b 89649f4 c7ced0b 979e80d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
from collections import Counter
css_colors = ["darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dodgerblue", "firebrick", "coral", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "aqua", "aquamarine", "azure", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgreen", "darkkhaki", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "green", "greenyellow", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", "lightcyan", "lightgoldenrodyellow", "lightgreen", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", "powderblue", "purple", "red", "rosybrown", "royalblue", "rebeccapurple", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", "slateblue", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"] # "darkgray", "darkgrey", "slategray", "slategrey", "lightslategray", "lightslategrey", "lightgray", "lightgrey", "gray", "grey", "dimgray", "dimgrey", "darkslategray", "darkslategrey", "aliceblue", "black", "beige", "antiquewhite", "bisque", "blanchedalmond",
# Read data
data = []
with open("data/inventory.txt", "r") as fin:
for f in fin:
c_data = pd.read_csv(f.strip(), sep = "\t")
data.append(c_data)
data = pd.concat(data)
unique_celltypes = sorted([c for c in data["Celltype"].unique() if "CCI" not in c and "BTO" not in c])
max_safe_scores = pd.read_csv("data/max_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Max SAFE Score", "Label": "Celltype"})
mean_safe_scores = pd.read_csv("data/mean_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Mean SAFE Score", "Label": "Celltype"})
neighborhood_enrichment = pd.read_csv("data/safe_neighborhoods_enriched.csv", sep = "\t").rename(columns = {"Label": "Celltype"})
safe_scores = max_safe_scores.merge(mean_safe_scores, on = "Celltype")
safe_scores = safe_scores.merge(neighborhood_enrichment, on = "Celltype")
print(safe_scores)
# Helper functions
def plot_protein_emb(protein):
hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False, "Selected": False}
p_data = data.copy()
p_data["Selected"] = [c if p == protein.lower() else "Not Selected" for p, c in zip(p_data["Name"].str.lower(), p_data["Celltype"].tolist())]
p_data["Size"] = [1 if i == "Not Selected" else 10 for i in p_data["Selected"].tolist()]
symbol_map = {s: "circle" if s == 1 else "star" for s in p_data["Size"].unique()}
p_celltypes = p_data["Selected"].unique()
color_map = {c: i for c, i in zip(p_celltypes, css_colors) if c != "Not Selected"}
color_map.update({"Not Selected": "lightgrey"})
fig = px.scatter(p_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, symbol = "Size", symbol_map = symbol_map, size = "Size", opacity = 0.8, hover_data = hover_keys)
fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
fig.update_xaxes(title_text = "", showticklabels = False)
fig.update_yaxes(title_text = "", showticklabels = False)
fig.update_layout(showlegend = False)
fig.update_traces(marker=dict(line=dict(width=0)))
protein_context_df = p_data[p_data["Selected"] != "Not Selected"][["Name", "Celltype", "x", "y"]]
return fig, protein_context_df
def get_protein_counts(df):
counts = Counter(df["Celltype"].tolist())
df = pd.DataFrame({"Celltype": list(counts.keys()), "Activated Proteins": list(counts.values())})
df = df.sort_values(by = "Celltype")
df = df.merge(safe_scores, on = "Celltype")
print(df)
return df
def plot_celltype_emb(celltype):
hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False}
if "All" in celltype:
fig = px.scatter(data, x = "x", y = "y", color = "Celltype", opacity = 0.4, hover_data = hover_keys)
activated_proteins_df = get_protein_counts(data)
else:
hover_keys.update({"Selected": False})
c_data = data.copy()
celltype = [c.lower() for c in celltype]
color_map = {c: i for c, i in zip(celltype, css_colors)}
color_map.update({"Not Selected": "lightgrey"})
c_data["Selected"] = [c if c in celltype else "Not Selected" for c in c_data["Celltype"].tolist()]
fig = px.scatter(c_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, opacity = 0.8, hover_data = hover_keys)
activated_proteins_df = get_protein_counts(c_data[c_data["Selected"] != "Not Selected"])
fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
fig.update_xaxes(title_text = "", showticklabels = False)
fig.update_yaxes(title_text = "", showticklabels = False)
fig.update_layout(showlegend = False)
return fig, activated_proteins_df
# Create gradio interface
with gr.Blocks() as demo:
gr.Markdown('<center><h1>Contextualizing Protein Representations with PINNACLE</h1></center>')
gr.Markdown('Protein interaction networks are a critical component to study the function and therapeutic potential of proteins. \
However, accurately modeling protein interactions across diverse biological contexts, such as tissues and cell types, \
remains a significant challenge for existing algorithms. Here, we introduce <b>PINNACLE</b>, a flexible geometric deep learning approach \
that trains on contextualized protein interaction networks to generate context-aware protein representations. Leveraging a \
multi-organ single cell transcriptomic atlas of humans, <b>PINNACLE provides 394,760 protein representations split across 156 cell-type \
contexts from 24 tissues and organs</b>. Our contextualized protein representations, infused with cellular and tissue organization, \
can easily be adapted for diverse downstream tasks.')
gr.Markdown(' For more information, please check out our manuscript and documentation (links provided at the bottom of the page)!')
with gr.Tabs():
with gr.TabItem("Protein"):
with gr.Column():
gr.Markdown('<center><h3>Select protein of interest to examine across biological contexts</h3></center>')
protein = gr.Textbox(info = "Enter a protein name (in HGNC symbol)", lines = 1, value = "TNF", label = "Protein")
protein_submit_btn = gr.Button("Submit")
gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
protein_plot = gr.Plot()
with gr.Accordion(label = "Protein Contexts", open = False):
protein_context_df = gr.Dataframe(headers = ["Protein", "Celltype", "x", "y"], overflow_row_behaviour = "paginate")
with gr.TabItem("Cell Type"):
with gr.Column():
gr.Markdown('<center><h3>Select biological context by specifying cell type of interest</h3></center>')
celltype = gr.Dropdown(["All"] + unique_celltypes, info = "Please select from the following cell types.", value = ["All"], multiselect = True, label="Cell Type")
celltype_submit_btn = gr.Button("Submit")
gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
celltype_plot = gr.Plot()
with gr.Accordion(label = "Cell Type Context", open = False):
activated_proteins_df = gr.Dataframe(headers = ["Celltype", "Activated Proteins"], overflow_row_behaviour = "paginate")
gr.Markdown("<p style='text-align: center'><a href='https://github.com/mims-harvard/PINNACLE'>Github Repo</a>" \
"| <a href='https://zitniklab.hms.harvard.edu/projects/PINNACLE/'>Documentation</a> " \
"| <a href='https://www.nature.com/articles/s41592-024-02341-3/'>Publication</a></p>")
protein_submit_btn.click(plot_protein_emb, inputs = [protein], outputs = [protein_plot, protein_context_df])
celltype_submit_btn.click(plot_celltype_emb, inputs = [celltype], outputs = [celltype_plot, activated_proteins_df])
# Launch
if __name__ == "__main__":
demo.launch()
|