#!/usr/bin/env python # coding: utf-8 # # Identify the driver regulators of cell fate decisions # CEFCON is a computational tool for deciphering driver regulators of cell fate decisions from single-cell RNA-seq data. It takes a prior gene interaction network and expression profiles from scRNA-seq data associated with a given developmental trajectory as inputs, and consists of three main components, including cell-lineage-specific gene regulatory network (GRN) construction, driver regulator identification and regulon-like gene module (RGM) identification. # # Check out [(Wang et al., Nature Communications, 2023)](https://www.nature.com/articles/s41467-023-44103-3) for the detailed methods and applications. # # Code: [https://github.com/WPZgithub/CEFCON](https://github.com/WPZgithub/CEFCON) # # In[1]: import omicverse as ov #print(f"omicverse version: {ov.__version__}") import scanpy as sc #print(f"scanpy version: {sc.__version__}") import pandas as pd from tqdm.auto import tqdm ov.plot_set() # # Data loading and processing # Here, we use the mouse hematopoiesis data provided by [Nestorowa et al. (2016, Blood).](https://doi.org/10.1182/blood-2016-05-716480) # # **The scRNA-seq data requires processing to extract lineage information for the CEFCON analysis.** Please refer to the [original notebook](https://github.com/WPZgithub/CEFCON/blob/e74d2d248b88fb3349023d1a97d3cc8a52cc4060/notebooks/preprocessing_nestorowa16_data.ipynb) for detailed instructions on preprocessing scRNA-seq data. # In[2]: adata = ov.single.mouse_hsc_nestorowa16() adata # CEFCON fully exploit an available global and **context-free gene interaction network** as prior knowledge, from which we extract the cell-lineage-specific gene interactions according to the gene expression profiles derived from scRNA-seq data associated with a given developmental trajectory. # # You can download the prior network in the [zenodo](https://zenodo.org/records/8013900). **CEFCON only provides the prior network for human and mosue data anaylsis**. For other species, you should provide the prior network mannully. # # The author of CEFCON has provided several prior networks here; however, 'nichenet' yields the best results. # In[3]: prior_network = ov.single.load_human_prior_interaction_network(dataset='nichenet') # **In the scRNA-seq analysis of human data, you should not run this step. Running it may change the gene symbol and result in errors.** # # # # # In[4]: # Convert the gene symbols of the prior gene interaction network to the mouse gene symbols prior_network = ov.single.convert_human_to_mouse_network(prior_network,server_name='asia') prior_network # In[12]: prior_network.to_csv('result/combined_network_Mouse.txt.gz',sep='\t') # Alternatively, you can directly specify the file path of the input prior interaction network and import the specified file. # In[3]: #prior_network = './Reference_Networks/combined_network_Mouse.txt' prior_network=ov.read('result/combined_network_Mouse.txt.gz',index_col=0) # # Training CEFCON model # # We recommend using GRUOBI to solve the integer linear programming (ILP) problem when identifying driver genes. GUROBI is a commercial solver that requires licenses to run. Thankfully, it provides free licenses in academia, as well as trial licenses outside academia. If there is no problem about the licenses, you need to install the `gurobipy` package. # # If difficulties arise while using GUROBI, the non-commercial solver, SCIP, will be employed as an alternative. But the use of SCIP does not come with a guarantee of achieving a successful solutio # # **By default, the program will verify the availability of GRUOBI. If GRUOBI is not accessible, it will automatically switch the solver to SCIP.** # # In[4]: CEFCON_obj = ov.single.pyCEFCON(adata, prior_network, repeats=5, solver='GUROBI') CEFCON_obj # Construct cell-lineage-specific GRNs # In[5]: CEFCON_obj.preprocess() # Lineage-by-lineage computation: # In[6]: CEFCON_obj.train() # In[9]: # Idenytify driver regulators for each lineage CEFCON_obj.predicted_driver_regulators() # We can find out the driver regulators identified by CEFCON. # In[10]: CEFCON_obj.cefcon_results_dict['E_pseudotime'].driver_regulator.head() # In[11]: CEFCON_obj.predicted_RGM() # # Downstream analysis # In[12]: CEFCON_obj.cefcon_results_dict['E_pseudotime'] # In[13]: lineage = 'E_pseudotime' result = CEFCON_obj.cefcon_results_dict[lineage] # Plot gene embedding clusters # In[20]: gene_ad=sc.AnnData(result.gene_embedding) sc.pp.neighbors(gene_ad, n_neighbors=30, use_rep='X') # Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. sc.tl.leiden(gene_ad, resolution=1) sc.tl.umap(gene_ad, n_components=2, min_dist=0.3) # In[27]: ov.utils.embedding(gene_ad,basis='X_umap',legend_loc='on data', legend_fontsize=8, legend_fontoutline=2, color='leiden',frameon='small',title='Leiden clustering using CEFCON\nderived gene embeddings') # Plot influence scores of driver regulators # In[40]: import matplotlib.pyplot as plt import seaborn as sns data_for_plot = result.driver_regulator[result.driver_regulator['is_driver_regulator']] data_for_plot = data_for_plot[0:20] plt.figure(figsize=(2, 20 * 0.2)) sns.set_theme(style='ticks', font_scale=0.5) ax = sns.barplot(x='influence_score', y=data_for_plot.index, data=data_for_plot, orient='h', palette=sns.color_palette(f"ch:start=.5,rot=-.5,reverse=1,dark=0.4", n_colors=20)) ax.set_title(result.name) ax.set_xlabel('Influence score') ax.set_ylabel('Driver regulators') ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.grid(False) #设置spines可视化情况 ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(True) ax.spines['left'].set_visible(True) plt.title('E_pseudotime',fontsize=12) plt.xlabel('Influence score',fontsize=12) plt.ylabel('Driver regulon',fontsize=12) sns.despine() # In[41]: result.plot_driver_genes_Venn() # Plot heat map of the activity matrix of RGMs # In[42]: adata_lineage = adata[adata.obs_names[adata.obs[result.name].notna()],:] result.plot_RGM_activity_heatmap(cell_label=adata_lineage.obs['cell_type_finely'], type='out',col_cluster=True,bbox_to_anchor=(1.48, 0.25))