Spaces:

ayushnoori
/

gravity

Sleeping

App Files Files

ayushnoori commited on May 17, 2024

Commit

f18a5e1

1 Parent(s): ca764d6

Rename to predict and add external database links

Browse files

Files changed (6) hide show

media/predict_header.svg +1 -0
menu.py +2 -1
pages/explore.py +0 -20
pages/predict.py +154 -0
pages/validate.py +3 -95
utils.py +9 -0

media/predict_header.svg ADDED Viewed

menu.py CHANGED Viewed

@@ -45,8 +45,9 @@ def authenticated_menu():
     # st.sidebar.page_link("app.py", label="Switch Accounts", icon="🔒")
     st.sidebar.page_link("pages/about.py", label="About", icon="📖")
     st.sidebar.page_link("pages/input.py", label="Input", icon="💡")
     st.sidebar.page_link("pages/validate.py", label="Validate", icon="✅")
-    st.sidebar.page_link("pages/explore.py", label="Explore", icon="🔍")
     if st.session_state.role in ["admin"]:
         st.sidebar.page_link("pages/admin.py", label="Manage Users", icon="🔧")

     # st.sidebar.page_link("app.py", label="Switch Accounts", icon="🔒")
     st.sidebar.page_link("pages/about.py", label="About", icon="📖")
     st.sidebar.page_link("pages/input.py", label="Input", icon="💡")
+    st.sidebar.page_link("pages/predict.py", label="Predict", icon="🔍")
     st.sidebar.page_link("pages/validate.py", label="Validate", icon="✅")
+    # st.sidebar.page_link("pages/explore.py", label="Explore", icon="🔍")
     if st.session_state.role in ["admin"]:
         st.sidebar.page_link("pages/admin.py", label="Manage Users", icon="🔧")

pages/explore.py DELETED Viewed

@@ -1,20 +0,0 @@
-import streamlit as st
-from menu import menu_with_redirect
-# Path manipulation
-from pathlib import Path
-# Custom and other imports
-import project_config
-# Redirect to app.py if not logged in, otherwise show the navigation menu
-menu_with_redirect()
-# Header
-st.image(str(project_config.MEDIA_DIR / 'explore_header.svg'), use_column_width=True)
-# Main content
-# st.markdown(f"Hello, {st.session_state.name}!")
-# Coming soon
-st.write("Coming soon...")

pages/predict.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import streamlit as st
+from menu import menu_with_redirect
+# Standard imports
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# Path manipulation
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+# Custom and other imports
+import project_config
+from utils import capitalize_after_slash
+# Redirect to app.py if not logged in, otherwise show the navigation menu
+menu_with_redirect()
+# Header
+st.image(str(project_config.MEDIA_DIR / 'predict_header.svg'), use_column_width=True)
+# Main content
+# st.markdown(f"Hello, {st.session_state.name}!")
+st.subheader(f"{capitalize_after_slash(st.session_state.query['target_node_type'])} Search", divider = "blue")
+# Print current query
+st.markdown(f"**Query:** {st.session_state.query['source_node']} ➡️ {st.session_state.query['relation']} ➡️ {st.session_state.query['target_node_type']}")
+with st.spinner('Loading knowledge graph...'):
+    kg_nodes = nodes = pd.read_csv(project_config.DATA_DIR / 'kg_nodes.csv', dtype = {'node_index': int}, low_memory = False)
+# Get paths to embeddings, relation weights, and edge types
+with st.spinner('Downloading AI model...'):
+    embed_path = hf_hub_download(repo_id="ayushnoori/galaxy",
+                                filename="2024_03_29_04_12_52_epoch=3-step=54291_embeddings.pt",
+                                token=st.secrets["HF_TOKEN"])
+    relation_weights_path = hf_hub_download(repo_id="ayushnoori/galaxy",
+                                            filename="2024_03_29_04_12_52_epoch=3-step=54291_relation_weights.pt",
+                                            token=st.secrets["HF_TOKEN"])
+    edge_types_path = hf_hub_download(repo_id="ayushnoori/galaxy",
+                                        filename="2024_03_29_04_12_52_epoch=3-step=54291_edge_types.pt",
+                                        token=st.secrets["HF_TOKEN"])
+# Load embeddings, relation weights, and edge types
+with st.spinner('Loading AI model...'):
+    embeddings = torch.load(embed_path)
+    relation_weights = torch.load(relation_weights_path)
+    edge_types = torch.load(edge_types_path)
+# # Print source node type
+# st.write(f"Source Node Type: {st.session_state.query['source_node_type']}")
+# # Print source node
+# st.write(f"Source Node: {st.session_state.query['source_node']}")
+# # Print relation
+# st.write(f"Edge Type: {st.session_state.query['relation']}")
+# # Print target node type
+# st.write(f"Target Node Type: {st.session_state.query['target_node_type']}")
+# Compute predictions
+with st.spinner('Computing predictions...'):
+    source_node_type = st.session_state.query['source_node_type']
+    source_node = st.session_state.query['source_node']
+    relation = st.session_state.query['relation']
+    target_node_type = st.session_state.query['target_node_type']
+    # Get source node index
+    src_index = kg_nodes[(kg_nodes.node_type == source_node_type) & (kg_nodes.node_name == source_node)].node_index.values[0]
+    # Get relation index
+    edge_type_index = [i for i, etype in enumerate(edge_types) if etype == (source_node_type, relation, target_node_type)][0]
+    # Get target nodes indices
+    target_nodes = kg_nodes[kg_nodes.node_type == target_node_type]
+    dst_indices = target_nodes.node_index.values
+    src_indices = np.repeat(src_index, len(dst_indices))
+    # Retrieve cached embeddings and apply activation function
+    src_embeddings = embeddings[src_indices]
+    dst_embeddings = embeddings[dst_indices]
+    src_embeddings = F.leaky_relu(src_embeddings)
+    dst_embeddings = F.leaky_relu(dst_embeddings)
+    # Get relation weights
+    rel_weights = relation_weights[edge_type_index]
+    # Compute weighted dot product
+    scores = torch.sum(src_embeddings * rel_weights * dst_embeddings, dim = 1)
+    scores = torch.sigmoid(scores)
+    # Add scores to dataframe
+    target_nodes['score'] = scores.detach().numpy()
+    target_nodes = target_nodes.sort_values(by = 'score', ascending = False)
+    target_nodes['rank'] = np.arange(1, target_nodes.shape[0] + 1)
+    # Rename columns
+    display_data = target_nodes[['rank', 'node_id', 'node_name', 'score', 'node_source']].copy()
+    display_data = display_data.rename(columns = {'rank': 'Rank', 'node_id': 'ID', 'node_name': 'Name', 'score': 'Score', 'node_source': 'Database'})
+    # Define dictionary mapping node types to database URLs
+    map_dbs = {
+        'gene/protein': lambda x: f"https://ncbi.nlm.nih.gov/gene/?term={x}",
+        'drug': lambda x: f"https://go.drugbank.com/drugs/{x}",
+        'effect/phenotype': lambda x: f"https://hpo.jax.org/app/browse/term/HP:{x.zfill(7)}", # pad with 0s to 7 digits
+        'disease': lambda x: x, # MONDO
+        # pad with 0s to 7 digits
+        'biological_process': lambda x: f"https://amigo.geneontology.org/amigo/term/GO:{x.zfill(7)}",
+        'molecular_function': lambda x: f"https://amigo.geneontology.org/amigo/term/GO:{x.zfill(7)}",
+        'cellular_component': lambda x: f"https://amigo.geneontology.org/amigo/term/GO:{x.zfill(7)}",
+        'exposure': lambda x: f"https://ctdbase.org/detail.go?type=chem&acc={x}",
+        'pathway': lambda x: f"https://reactome.org/content/detail/{x}",
+        'anatomy': lambda x: x,
+    }
+    # Get name of database
+    display_database = display_data['Database'].values[0]
+    # Add URLs to database column
+    display_data['Database'] = display_data.apply(lambda x: map_dbs[target_node_type](x['ID']), axis = 1)
+    # Use multiselect to search for specific nodes
+    selected_nodes = st.multiselect('Search for specific nodes.', display_data.Name)
+    # Filter nodes
+    if len(selected_nodes) > 0:
+        selected_display_data = display_data[display_data.Name.isin(selected_nodes)]
+        # Show filtered nodes
+        if target_node_type not in ['disease', 'anatomy']:
+            st.dataframe(selected_display_data, use_container_width = True,
+                        column_config={"Database": st.column_config.LinkColumn(width = "small",
+                                                                               help = "Click to visit external database.",
+                                                                               display_text = display_database)})
+        else:
+            st.dataframe(selected_display_data, use_container_width = True)
+    # Show top ranked nodes
+    st.subheader("Model Predictions", divider = "blue")
+    top_k = st.slider('Select number of top ranked nodes to show.', 1, target_nodes.shape[0], min(500, target_nodes.shape[0]))
+    if target_node_type not in ['disease', 'anatomy']:
+        st.dataframe(display_data.iloc[:top_k], use_container_width = True,
+                    column_config={"Database": st.column_config.LinkColumn(width = "small",
+                                                                           help = "Click to visit external database.",
+                                                                           display_text = display_database)})
+    else:
+        st.dataframe(display_data.iloc[:top_k], use_container_width = True)

pages/validate.py CHANGED Viewed

@@ -1,16 +1,8 @@
 import streamlit as st
 from menu import menu_with_redirect
-# Standard imports
-import numpy as np
-import pandas as pd
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
 # Path manipulation
 from pathlib import Path
-from huggingface_hub import hf_hub_download
 # Custom and other imports
 import project_config
@@ -24,91 +16,7 @@ st.image(str(project_config.MEDIA_DIR / 'validate_header.svg'), use_column_width
 # Main content
 # st.markdown(f"Hello, {st.session_state.name}!")
-st.subheader("Model Predictions", divider = "green")
-# Print current query
-st.markdown(f"**Query:** {st.session_state.query['source_node']} ➡️ {st.session_state.query['relation']} ➡️ {st.session_state.query['target_node_type']}")
-with st.spinner('Loading knowledge graph...'):
-    kg_nodes = nodes = pd.read_csv(project_config.DATA_DIR / 'kg_nodes.csv', dtype = {'node_index': int}, low_memory = False)
-# Get paths to embeddings, relation weights, and edge types
-with st.spinner('Downloading AI model...'):
-    embed_path = hf_hub_download(repo_id="ayushnoori/galaxy",
-                                filename="2024_03_29_04_12_52_epoch=3-step=54291_embeddings.pt",
-                                token=st.secrets["HF_TOKEN"])
-    relation_weights_path = hf_hub_download(repo_id="ayushnoori/galaxy",
-                                            filename="2024_03_29_04_12_52_epoch=3-step=54291_relation_weights.pt",
-                                            token=st.secrets["HF_TOKEN"])
-    edge_types_path = hf_hub_download(repo_id="ayushnoori/galaxy",
-                                        filename="2024_03_29_04_12_52_epoch=3-step=54291_edge_types.pt",
-                                        token=st.secrets["HF_TOKEN"])
-# Load embeddings, relation weights, and edge types
-with st.spinner('Loading AI model...'):
-    embeddings = torch.load(embed_path)
-    relation_weights = torch.load(relation_weights_path)
-    edge_types = torch.load(edge_types_path)
-# # Print source node type
-# st.write(f"Source Node Type: {st.session_state.query['source_node_type']}")
-# # Print source node
-# st.write(f"Source Node: {st.session_state.query['source_node']}")
-# # Print relation
-# st.write(f"Edge Type: {st.session_state.query['relation']}")
-# # Print target node type
-# st.write(f"Target Node Type: {st.session_state.query['target_node_type']}")
-# Compute predictions
-with st.spinner('Computing predictions...'):
-    source_node_type = st.session_state.query['source_node_type']
-    source_node = st.session_state.query['source_node']
-    relation = st.session_state.query['relation']
-    target_node_type = st.session_state.query['target_node_type']
-    # Get source node index
-    src_index = kg_nodes[(kg_nodes.node_type == source_node_type) & (kg_nodes.node_name == source_node)].node_index.values[0]
-    # Get relation index
-    edge_type_index = [i for i, etype in enumerate(edge_types) if etype == (source_node_type, relation, target_node_type)][0]
-    # Get target nodes indices
-    target_nodes = kg_nodes[kg_nodes.node_type == target_node_type]
-    dst_indices = target_nodes.node_index.values
-    src_indices = np.repeat(src_index, len(dst_indices))
-    # Retrieve cached embeddings
-    src_embeddings = embeddings[src_indices]
-    dst_embeddings = embeddings[dst_indices]
-    # Apply activation function
-    src_embeddings = F.leaky_relu(src_embeddings)
-    dst_embeddings = F.leaky_relu(dst_embeddings)
-    # Get relation weights
-    rel_weights = relation_weights[edge_type_index]
-    # Compute weighted dot product
-    scores = torch.sum(src_embeddings * rel_weights * dst_embeddings, dim = 1)
-    scores = torch.sigmoid(scores)
-    # Add scores to dataframe
-    target_nodes['score'] = scores.detach().numpy()
-    # Rank target nodes by score
-    target_nodes = target_nodes.sort_values(by = 'score', ascending = False)
-    # Add rank to dataframe
-    target_nodes['rank'] = np.arange(1, target_nodes.shape[0] + 1)
-    # Show top ranked nodes
-    top_k = st.slider('Select number of top ranked nodes to show.', 1, target_nodes.shape[0], 50)
-    # Rename columns
-    display_data = target_nodes[['rank', 'node_id', 'node_name', 'node_source', 'score']].iloc[:top_k].copy()
-    display_data = display_data.rename(columns = {'rank': 'Rank', 'node_id': 'ID', 'node_name': 'Name', 'node_source': 'Database', 'score': 'Score'})
-    st.dataframe(display_data, use_container_width = True)

 import streamlit as st
 from menu import menu_with_redirect
 # Path manipulation
 from pathlib import Path
 # Custom and other imports
 import project_config
 # Main content
 # st.markdown(f"Hello, {st.session_state.name}!")
+st.subheader("Validate Predictions", divider = "green")
+# Coming soon
+st.write("Coming soon...")

utils.py CHANGED Viewed

@@ -1,6 +1,15 @@
 import base64
 import streamlit as st
 # From https://stackoverflow.com/questions/73251012/put-logo-and-title-above-on-top-of-page-navigation-in-sidebar-of-streamlit-multi
 # See also https://arnaudmiribel.github.io/streamlit-extras/extras/app_logo/
 @st.cache_data()

 import base64
 import streamlit as st
+def capitalize_after_slash(s):
+    # Split the string by slashes first
+    parts = s.split('/')
+    # Capitalize each part separately
+    capitalized_parts = [part.title() for part in parts]
+    # Rejoin the parts with slashes
+    capitalized_string = '/'.join(capitalized_parts).replace('_', ' ')
+    return capitalized_string
 # From https://stackoverflow.com/questions/73251012/put-logo-and-title-above-on-top-of-page-navigation-in-sidebar-of-streamlit-multi
 # See also https://arnaudmiribel.github.io/streamlit-extras/extras/app_logo/
 @st.cache_data()