File size: 9,203 Bytes
c7ced0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89649f4
c7ced0b
 
 
 
 
 
 
979e80d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
from collections import Counter


css_colors = ["darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dodgerblue", "firebrick", "coral", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "aqua", "aquamarine", "azure", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgreen", "darkkhaki", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "green", "greenyellow", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", "lightcyan", "lightgoldenrodyellow", "lightgreen", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", "powderblue", "purple", "red", "rosybrown", "royalblue", "rebeccapurple", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", "slateblue", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"] # "darkgray", "darkgrey", "slategray", "slategrey", "lightslategray", "lightslategrey", "lightgray", "lightgrey", "gray", "grey", "dimgray", "dimgrey", "darkslategray", "darkslategrey", "aliceblue",  "black", "beige", "antiquewhite", "bisque", "blanchedalmond", 

# Read data
data = []
with open("data/inventory.txt", "r") as fin:
    for f in fin:
        c_data = pd.read_csv(f.strip(), sep = "\t")
        data.append(c_data)
data = pd.concat(data)
unique_celltypes = sorted([c for c in data["Celltype"].unique() if "CCI" not in c and "BTO" not in c])

max_safe_scores = pd.read_csv("data/max_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Max SAFE Score", "Label": "Celltype"})
mean_safe_scores = pd.read_csv("data/mean_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Mean SAFE Score", "Label": "Celltype"})
neighborhood_enrichment = pd.read_csv("data/safe_neighborhoods_enriched.csv", sep = "\t").rename(columns = {"Label": "Celltype"})
safe_scores = max_safe_scores.merge(mean_safe_scores, on = "Celltype")
safe_scores = safe_scores.merge(neighborhood_enrichment, on = "Celltype")
print(safe_scores)

# Helper functions
def plot_protein_emb(protein):
    hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False, "Selected": False}
    p_data = data.copy()
    p_data["Selected"] = [c if p == protein.lower() else "Not Selected" for p, c in zip(p_data["Name"].str.lower(), p_data["Celltype"].tolist())]
    p_data["Size"] = [1 if i == "Not Selected" else 10 for i in p_data["Selected"].tolist()]
    symbol_map = {s: "circle" if s == 1 else "star" for s in p_data["Size"].unique()}
    p_celltypes = p_data["Selected"].unique()
    color_map = {c: i for c, i in zip(p_celltypes, css_colors) if c != "Not Selected"}
    color_map.update({"Not Selected": "lightgrey"})

    fig = px.scatter(p_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, symbol = "Size", symbol_map = symbol_map, size = "Size", opacity = 0.8, hover_data = hover_keys)
    fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
    fig.update_xaxes(title_text = "", showticklabels = False)
    fig.update_yaxes(title_text = "", showticklabels = False)
    fig.update_layout(showlegend = False)
    fig.update_traces(marker=dict(line=dict(width=0)))

    protein_context_df = p_data[p_data["Selected"] != "Not Selected"][["Name", "Celltype", "x", "y"]]
    
    return fig, protein_context_df


def get_protein_counts(df):
    counts = Counter(df["Celltype"].tolist())
    df = pd.DataFrame({"Celltype": list(counts.keys()), "Activated Proteins": list(counts.values())})
    df = df.sort_values(by = "Celltype")
    df = df.merge(safe_scores, on = "Celltype")
    print(df)
    return df


def plot_celltype_emb(celltype):
    hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False}
    if "All" in celltype:
        fig = px.scatter(data, x = "x", y = "y", color = "Celltype", opacity = 0.4, hover_data = hover_keys)
        activated_proteins_df = get_protein_counts(data)
    else:
        hover_keys.update({"Selected": False})
        c_data = data.copy()
        celltype = [c.lower() for c in celltype]
        
        color_map = {c: i for c, i in zip(celltype, css_colors)}
        color_map.update({"Not Selected": "lightgrey"})
        
        c_data["Selected"] = [c if c in celltype else "Not Selected" for c in c_data["Celltype"].tolist()]
        fig = px.scatter(c_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, opacity = 0.8, hover_data = hover_keys)
        
        activated_proteins_df = get_protein_counts(c_data[c_data["Selected"] != "Not Selected"])

    fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"})
    fig.update_xaxes(title_text = "", showticklabels = False)
    fig.update_yaxes(title_text = "", showticklabels = False)
    fig.update_layout(showlegend = False)
    return fig, activated_proteins_df


# Create gradio interface
with gr.Blocks() as demo:
    gr.Markdown('<center><h1>Contextualizing Protein Representations with PINNACLE</h1></center>')
    gr.Markdown('Protein interaction networks are a critical component to study the function and therapeutic potential of proteins. \
                 However, accurately modeling protein interactions across diverse biological contexts, such as tissues and cell types, \
                 remains a significant challenge for existing algorithms. Here, we introduce <b>PINNACLE</b>, a flexible geometric deep learning approach \
                 that trains on contextualized protein interaction networks to generate context-aware protein representations. Leveraging a \
                 multi-organ single cell transcriptomic atlas of humans, <b>PINNACLE provides 394,760 protein representations split across 156 cell-type \
                 contexts from 24 tissues and organs</b>. Our contextualized protein representations, infused with cellular and tissue organization, \
                 can easily be adapted for diverse downstream tasks.')
    gr.Markdown(' For more information, please check out our manuscript and documentation (links provided at the bottom of the page)!')
    
    with gr.Tabs():

        with gr.TabItem("Protein"):
            with gr.Column():
                gr.Markdown('<center><h3>Select protein of interest to examine across biological contexts</h3></center>')
                protein = gr.Textbox(info = "Enter a protein name (in HGNC symbol)", lines = 1, value = "TNF", label = "Protein")
                protein_submit_btn = gr.Button("Submit")
                
                gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
                protein_plot = gr.Plot()

                with gr.Accordion(label = "Protein Contexts", open = False):
                    protein_context_df = gr.Dataframe(headers = ["Protein", "Celltype", "x", "y"], overflow_row_behaviour = "paginate")

        with gr.TabItem("Cell Type"):
            with gr.Column():
                gr.Markdown('<center><h3>Select biological context by specifying cell type of interest</h3></center>')
                celltype = gr.Dropdown(["All"] + unique_celltypes, info = "Please select from the following cell types.", value = ["All"], multiselect = True, label="Cell Type")
                celltype_submit_btn = gr.Button("Submit")
                
                gr.Markdown('<center><h3>Contextualized protein representations</h3></center>')
                celltype_plot = gr.Plot()

                with gr.Accordion(label = "Cell Type Context", open = False):
                    activated_proteins_df = gr.Dataframe(headers = ["Celltype", "Activated Proteins"], overflow_row_behaviour = "paginate")

    gr.Markdown("<p style='text-align: center'><a href='https://github.com/mims-harvard/PINNACLE'>Github Repo</a>" \
                "| <a href='https://zitniklab.hms.harvard.edu/projects/PINNACLE/'>Documentation</a> " \
                "| <a href='https://www.nature.com/articles/s41592-024-02341-3/'>Publication</a></p>")

    protein_submit_btn.click(plot_protein_emb, inputs = [protein], outputs = [protein_plot, protein_context_df])
    celltype_submit_btn.click(plot_celltype_emb, inputs = [celltype], outputs = [celltype_plot, activated_proteins_df])


# Launch
if __name__ == "__main__":
    demo.launch()