import gradio as gr import pandas as pd from pathlib import Path import ast ''' Causal Gene Discovery Model /home/ema30/zaklab/rare_disease_dx/checkpoints/aligner/04_30_22:13:29:55_lr_1e-05_val_simulated_pats.disease_split_val_sim_pats_kg_8.9.21_kg_losstype_gene_multisimilarity/all_udn_patients_kg_8.9.21_kgsolved_manual_baylor_nobgm_distractor_genes_5_candidates_mapped_only_genes Patients-Like-Me Model /home/ema30/zaklab/rare_disease_dx/checkpoints/patient_NCA/04_26_22:17:38:30_lr_5e-05_val_simulated_pats.disease_split_val_sim_pats_kg_8.9.21_kg_losstype_patient_patient_NCA/mygene2_all_sim_all_udn_patients_kg_8.9.21_kgsolved_with_phenotypes Disease Characterization Model /home/ema30/zaklab/rare_disease_dx/checkpoints/patient_NCA/05_13_22:08:00:32_lr_1e-05_val_simulated_pats.disease_split_val_sim_pats_kg_8.9.21_kg_losstype_pd_NCA/mygene2_all_sim_all_udn_patients_kg_8.9.21_kgsolved_with_phenotypes ''' gene_scores_df = pd.read_csv('gene_discovery_scores.csv') exomiser_gene_scores_df = pd.read_csv('exomiser_gene_discovery_scores.csv') patient_scores_df = pd.read_csv('patients_like_me_scores.csv') dx_scores_df = pd.read_csv('dx_characterization_scores.csv') plm_attn_df = pd.read_csv('patients_like_me_scores_attn.csv') dx_attn_df = pd.read_csv('dx_characterization_scores_attn.csv') gene_attn_df = pd.read_csv('gene_discovery_scores_attn.csv') exomiser_gene_attn_df = pd.read_csv('exomiser_gene_discovery_scores_attn.csv') diseases_map = {'UDN-P1': 'POLR3-releated leukodystrophy', 'UDN-P2': 'Novel Syndrome', 'UDN-P3':'Coffin-Lowry syndrome' , 'UDN-P4': 'automsomal recessive spastic paraplegia type 76', 'UDN-P5': 'atypical presentation of familial cold autoinflammatory syndrome', 'UDN-P6': '*GATAD2B*-associated syndrome', 'UDN-P7': 'AR limb-girdle muscular atrophy type 2D', 'UDN-P8': '*ATP5PO*-related Leigh syndrome', 'UDN-P9': 'Spondyloepimetaphyseal dysplasia, Isidor-Toutain type'} genes_map = {'UDN-P3': 'RPS6KA3', 'UDN-P4': 'CAPN1', 'UDN-P5': 'NLRP12, RAPGEFL1', 'UDN-P6': 'GATAD2B', 'UDN-P7': 'SGCA', 'UDN-P8': 'ATP5P0', 'UDN-P9': 'RPL13'} def get_patient(patient_id, attn_df): ''' Returns phenotypes, candidate genes, Causal gene, disease ''' if patient_id in genes_map: gene = genes_map[patient_id] else: patient_gene_scores_df = gene_scores_df.loc[gene_scores_df['patient_id'] == patient_id] gene = ', '.join(patient_gene_scores_df.loc[patient_gene_scores_df['correct_gene_label'] == 1, 'genes'].tolist()) if patient_id in diseases_map: disease = diseases_map[patient_id] else: patient_dx_scores_df = dx_scores_df.loc[dx_scores_df['patient_id'] == patient_id] disease = ', '.join(patient_dx_scores_df.loc[patient_dx_scores_df['correct_label'] == 1, 'diseases'].tolist()) patient_attn_df = attn_df.loc[attn_df['patient_id'] == patient_id] phenotypes = ', '.join(patient_attn_df['phenotypes'].tolist()) patient_str = f''' **Selected Patient:** {patient_id}
**Causal Gene:** *{gene}*
**Disease:** {disease}
**Phenotypes:** {phenotypes}

''' return patient_str def read_file(filename): with open(filename, 'r') as file: f = file.read() return f def causal_gene_discovery(patient_id, prioritization_type): if prioritization_type == 'Variant Filtered': scores_df = exomiser_gene_scores_df.loc[exomiser_gene_scores_df['patient_id'] == patient_id] else: scores_df = gene_scores_df.loc[gene_scores_df['patient_id'] == patient_id] # read in gene scores scores_df = scores_df.sort_values("similarities", ascending=False) scores_df['similarities'] = scores_df['similarities'].round(3).astype(str) # add links to gene cards scores_df['genes'] = scores_df['genes'].apply(lambda x: f'[{x}](https://www.genecards.org/cgi-bin/carddisp.pl?gene={x})') # bold/color causal gene scores_df.loc[scores_df['correct_gene_label'] == 1, 'similarities'] = scores_df.loc[scores_df['correct_gene_label'] == 1, 'similarities'].apply(lambda x: f'**{x}**') scores_df.loc[scores_df['correct_gene_label'] == 1, 'genes'] = scores_df.loc[scores_df['correct_gene_label'] == 1, 'genes'].apply(lambda x: f'**{x}**') #filter df scores_df = scores_df.drop(columns=['patient_id', 'correct_gene_label']).rename(columns={ 'similarities': 'SHEPHERD Score', 'genes': 'Candidate Genes'}) #'correct_gene_label' : 'Is Causal Gene', ############# # Attention #read in phenotype attention if prioritization_type == 'Variant Filtered': attn_df = exomiser_gene_attn_df.loc[exomiser_gene_attn_df['patient_id'] == patient_id] else: attn_df = gene_attn_df.loc[gene_attn_df['patient_id'] == patient_id] attn_df = attn_df.sort_values("attention", ascending=False) attn_df['attention'] = attn_df['attention'].round(4) attn_df = attn_df.drop(columns=['patient_id', 'degrees']) ############# # KG neighborhood #image_loc = f'images/{patient_id}.png' html_file = f'https://michellemli.github.io/test_html/{patient_id}.html' kg_html = f'''''' #patient_info patient = get_patient(patient_id, gene_attn_df) return patient, scores_df, attn_df, kg_html def patients_like_me(patient_id, k=10): scores_df = patient_scores_df.loc[patient_scores_df['patient_id'] == patient_id] scores_df = scores_df.sort_values("similarities", ascending=False) #scores_df['phenotypes'] ='PHEN' # add links to disease pages scores_df['disease_ids'] = scores_df['disease_ids'].apply(lambda x: f'(https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert={x})') scores_df['diseases'] = scores_df['diseases'].apply(lambda x: f'[{x}]') scores_df['diseases'] = scores_df['diseases'] + scores_df['disease_ids'] scores_df['genes'] = scores_df['genes'].apply(lambda x: f'[{x}](https://www.genecards.org/cgi-bin/carddisp.pl?gene={x})') # bold/color patients with same causal gene scores_df.loc[scores_df['correct_label'] == 1, 'candidate_patients'] = scores_df.loc[scores_df['correct_label'] == 1, 'candidate_patients'].apply(lambda x: f'**{x}**') scores_df.loc[scores_df['correct_label'] == 1, 'genes'] = scores_df.loc[scores_df['correct_label'] == 1, 'genes'].apply(lambda x: f'**{x}**') scores_df.loc[scores_df['correct_label'] == 1, 'diseases'] = scores_df.loc[scores_df['correct_label'] == 1, 'diseases'].apply(lambda x: f'**{x}**') scores_df = scores_df.drop(columns=['patient_id', 'similarities', 'correct_label', 'disease_ids']).rename(columns={'candidate_patients': 'Candidate Patient', 'genes': 'Candidate Patient\'s Gene', 'diseases': 'Candidate Patient\'s Disease' }) #'phenotypes': 'Candidate Patient\'s Phenotypes' scores_df = scores_df.head(k) #read in phenotype attention attn_df = plm_attn_df.loc[plm_attn_df['patient_id'] == patient_id] attn_df = attn_df.sort_values("attention", ascending=False) attn_df['attention'] = attn_df['attention'].round(4) attn_df = attn_df.drop(columns=['patient_id', 'degrees']) #patient_info patient = get_patient(patient_id, plm_attn_df) return patient, scores_df, attn_df def disease_characterization(patient_id, k=10): #TODO: limit # of rows scores_df = dx_scores_df.loc[dx_scores_df['patient_id'] == patient_id] scores_df = scores_df.sort_values("similarities", ascending=False) scores_df = scores_df.head(k) scores_df.loc[ scores_df['disease_ids'].str.contains('Coxa vara'), 'disease_ids'] = '2812' scores_df.loc[ scores_df['disease_ids'].str.contains('Multiple epiphyseal dysplasia'), 'disease_ids'] = '2654' scores_df['disease_ids'] = scores_df['disease_ids'].apply(lambda x: ast.literal_eval(x)) scores_df['type_disease_ids'] = scores_df['disease_ids'].apply(lambda x: type(x)) scores_df.loc[scores_df['type_disease_ids'] == list, 'disease_ids'] = scores_df.loc[scores_df['type_disease_ids'] == list, 'disease_ids'].apply(lambda x: x[0]) # add links to disease pages scores_df['disease_ids'] = scores_df['disease_ids'].apply(lambda x: f'(https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert={x})') scores_df['diseases'] = scores_df['diseases'].apply(lambda x: f'[{x}]') scores_df['diseases'] = scores_df['diseases'] + scores_df['disease_ids'] # one disease couldn't map to orphanet scores_df.loc[ scores_df['disease_ids'].str.contains('33657'), 'diseases'] = '[leukodystrophy, hypomyelinating, 20](https://www.omim.org/entry/619071)' scores_df.loc[ scores_df['disease_ids'].str.contains('2654'), 'diseases'] = '[Multiple epiphyseal dysplasia](https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=251)' scores_df.loc[ scores_df['disease_ids'].str.contains('2812'), 'diseases'] = '[Coxa vara](https://omim.org/entry/122750)' scores_df = scores_df.drop(columns=['patient_id', 'similarities', 'correct_label', 'disease_ids','type_disease_ids']).rename(columns={'diseases' : 'Disease'}) #read in phenotype attention attn_df = dx_attn_df.loc[dx_attn_df['patient_id'] == patient_id] attn_df = attn_df.sort_values("attention", ascending=False) attn_df['attention'] = attn_df['attention'].round(4) attn_df = attn_df.drop(columns=['patient_id', 'degrees']) #patient_info patient = get_patient(patient_id, dx_attn_df) return patient, scores_df, attn_df def get_umap(umap_type): # get UMAP if umap_type == 'disease': html_file = 'https://michellemli.github.io/test_html/shepherd_disease_characterization_umap.html' #html_file = read_file('images/udn_orphafit_patient_umap_nneigh=50_mindist=0.9_spread=1.0colored_by_disease_category.html') elif umap_type == 'patient': html_file = 'https://michellemli.github.io/test_html/shepherd_patient_umap.html' else: raise NotImplementedError # return f"""""" return f'''''' #return f'''''' with gr.Blocks() as demo: #css="#gene_attn_accordion {text-align: center}" css="kg_neigh {width: 70%}" gr.Markdown('

AI-assisted Rare Disease Diagnosis with SHEPHERD

') #gr.Markdown('

A few SHot Explainable Predictor for Hard-to-diagnosE Rare Diseases

') with gr.Tabs(): with gr.TabItem("Causal Gene Discovery"): with gr.Column(): gr.Markdown('

Select a patient to view SHEPHERD\'s predictions

') gene_dropdown = gr.Dropdown(choices=['UDN-P1', 'UDN-P2'], label='Rare Disease Patients', type='value') #value='UDN-P1', gene_radio = gr.Radio(choices=['Expert Curated', 'Variant Filtered'], value='Expert Curated', label='Type of Gene List') patient_info = gr.Markdown() #get_patient('UDN-P1') with gr.Accordion(label=f'SHEPHERD\'s Ranking of Patient\'s Candidate Genes', open=True, elem_id='gene_accordion'): #gr.Markdown(f'

SHEPHERD\'s Ranking of Patient\'s Candidate Genes

') gr.Markdown('The patient\'s causal gene (i.e. gene harboring a variant that explains the patient\'s symptoms) is colored in green.') gene_dataframe = gr.Dataframe(max_rows=5, elem_id="gene_df", datatype = 'markdown', headers=['Candidate Genes', 'SHEPHERD Score' ], overflow_row_behaviour='paginate') # label='Candidate Genes', show_label=False, with gr.Accordion(label=f'SHEPHERD\'s Attention to Patient\'s Phenotypes', open=False, elem_id='gene_attn_accordion'): #gr.Markdown(f'

SHEPHERD\'s Attention to Patient\'s Phenotypes

') gene_attn_dataframe = gr.Dataframe(max_rows=5, elem_id="gene_attn_df", headers=['Phenotypes', 'Attention' ], overflow_row_behaviour='paginate') # label='Candidate Genes', show_label=False, with gr.Accordion(label=f'Visualization of Patient\'s Neighborhood in the Knowledge Graph', open=False, elem_id='kg_neigh_accordion'): #kg_neighborhood_image = gr.Image(elem_id='kg_neigh')#.style(height=200, width=200) kg_neighborhood_image = gr.HTML(elem_id = 'kg_patient_neighborhood') #gene_button = gr.Button("Go") with gr.TabItem("Patients Like Me"): gr.HTML(get_umap('patient')) gr.Markdown('

Select a patient to view SHEPHERD\'s predictions

') patient_dropdown = gr.Dropdown(choices=['UDN-P3','UDN-P4','UDN-P5','UDN-P6'], label='Rare Disease Patients', type='value') p_patient_info = gr.Markdown() with gr.Accordion(label=f'Top 10 Most Similar Patients according to SHEPHERD', open=True, elem_id='pt_accordion'): # patient_dataframe = gr.Dataframe(max_rows=10, datatype = 'markdown', show_label=False, elem_id="pat_df", headers=['Candidate Patient', 'Candidate Patient\'s Gene', 'Candidate Patient\'s Disease' ]) #'Candidate Patient\'s Phenotypes' #patient_button = gr.Button("Go") with gr.Accordion(label='SHEPHERD\'s Attention to Patient\'s Phenotypes', open=False, elem_id='pt_attn_accordion'): pt_attn_dataframe = gr.Dataframe(max_rows=5, elem_id="pt_attn_df", headers=['Phenotypes', 'Attention' ], overflow_row_behaviour='paginate') with gr.TabItem("Disease Characterization"): gr.HTML(get_umap('disease')) gr.Markdown('

Select a patient to view SHEPHERD\'s predictions

') dx_dropdown = gr.Dropdown(choices=['UDN-P7','UDN-P8','UDN-P9','UDN-P2'], label='Rare Disease Patients', type='value') dx_patient_info = gr.Markdown() with gr.Accordion(label='Top 10 Most Similar Diseases according to SHEPHERD', open=True, elem_id='pt_accordion'): # dx_dataframe = gr.Dataframe(max_rows=10, datatype = 'markdown', show_label=False, elem_id="dx_df", headers=['Diseases']) with gr.Accordion(label='SHEPHERD\'s Attention to Patient\'s Phenotypes', open=False, elem_id='dx_attn_accordion'): dx_attn_dataframe = gr.Dataframe(max_rows=5, elem_id="dx_attn_df", headers=['Phenotypes', 'Attention' ], overflow_row_behaviour='paginate') #dx_button = gr.Button("Go") gene_dropdown.change(causal_gene_discovery, inputs=[gene_dropdown,gene_radio], outputs=[patient_info, gene_dataframe, gene_attn_dataframe, kg_neighborhood_image]) gene_radio.change(causal_gene_discovery, inputs=[gene_dropdown,gene_radio], outputs=[patient_info, gene_dataframe, gene_attn_dataframe, kg_neighborhood_image]) patient_dropdown.change(patients_like_me, inputs=patient_dropdown, outputs=[p_patient_info, patient_dataframe, pt_attn_dataframe]) dx_dropdown.change(disease_characterization, inputs=dx_dropdown, outputs=[dx_patient_info, dx_dataframe, dx_attn_dataframe]) #gene_dropdown.change(get_patient, inputs=gene_dropdown, outputs=patient_info) #gene_button.click(causal_gene_discovery, inputs=gene_dropdown, outputs=[gene_dataframe,gene_attn_dataframe, kg_neighborhood_image]) #patient_button.click(patients_like_me, inputs=patient_dropdown, outputs=patient_dataframe) #dx_button.click(disease_characterization, inputs=dx_dropdown, outputs=dx_dataframe) demo.launch(share=True ) #server_port=50018, share=True