Spaces:
Sleeping
Sleeping
File size: 6,523 Bytes
2999286 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
#!/usr/bin/env python # coding: utf-8 # # Identify the driver regulators of cell fate decisions # CEFCON is a computational tool for deciphering driver regulators of cell fate decisions from single-cell RNA-seq data. It takes a prior gene interaction network and expression profiles from scRNA-seq data associated with a given developmental trajectory as inputs, and consists of three main components, including cell-lineage-specific gene regulatory network (GRN) construction, driver regulator identification and regulon-like gene module (RGM) identification. # # Check out [(Wang et al., Nature Communications, 2023)](https://www.nature.com/articles/s41467-023-44103-3) for the detailed methods and applications. # # Code: [https://github.com/WPZgithub/CEFCON](https://github.com/WPZgithub/CEFCON) # # In[1]: import omicverse as ov #print(f"omicverse version: {ov.__version__}") import scanpy as sc #print(f"scanpy version: {sc.__version__}") import pandas as pd from tqdm.auto import tqdm ov.plot_set() # # Data loading and processing # Here, we use the mouse hematopoiesis data provided by [Nestorowa et al. (2016, Blood).](https://doi.org/10.1182/blood-2016-05-716480) # # **The scRNA-seq data requires processing to extract lineage information for the CEFCON analysis.** Please refer to the [original notebook](https://github.com/WPZgithub/CEFCON/blob/e74d2d248b88fb3349023d1a97d3cc8a52cc4060/notebooks/preprocessing_nestorowa16_data.ipynb) for detailed instructions on preprocessing scRNA-seq data. # In[2]: adata = ov.single.mouse_hsc_nestorowa16() adata # CEFCON fully exploit an available global and **context-free gene interaction network** as prior knowledge, from which we extract the cell-lineage-specific gene interactions according to the gene expression profiles derived from scRNA-seq data associated with a given developmental trajectory. # # You can download the prior network in the [zenodo](https://zenodo.org/records/8013900). **CEFCON only provides the prior network for human and mosue data anaylsis**. For other species, you should provide the prior network mannully. # # The author of CEFCON has provided several prior networks here; however, 'nichenet' yields the best results. # In[3]: prior_network = ov.single.load_human_prior_interaction_network(dataset='nichenet') # **In the scRNA-seq analysis of human data, you should not run this step. Running it may change the gene symbol and result in errors.** # # # # # In[4]: # Convert the gene symbols of the prior gene interaction network to the mouse gene symbols prior_network = ov.single.convert_human_to_mouse_network(prior_network,server_name='asia') prior_network # In[12]: prior_network.to_csv('result/combined_network_Mouse.txt.gz',sep='\t') # Alternatively, you can directly specify the file path of the input prior interaction network and import the specified file. # In[3]: #prior_network = './Reference_Networks/combined_network_Mouse.txt' prior_network=ov.read('result/combined_network_Mouse.txt.gz',index_col=0) # # Training CEFCON model # # We recommend using GRUOBI to solve the integer linear programming (ILP) problem when identifying driver genes. GUROBI is a commercial solver that requires licenses to run. Thankfully, it provides free licenses in academia, as well as trial licenses outside academia. If there is no problem about the licenses, you need to install the `gurobipy` package. # # If difficulties arise while using GUROBI, the non-commercial solver, SCIP, will be employed as an alternative. But the use of SCIP does not come with a guarantee of achieving a successful solutio # # **By default, the program will verify the availability of GRUOBI. If GRUOBI is not accessible, it will automatically switch the solver to SCIP.** # # In[4]: CEFCON_obj = ov.single.pyCEFCON(adata, prior_network, repeats=5, solver='GUROBI') CEFCON_obj # Construct cell-lineage-specific GRNs # In[5]: CEFCON_obj.preprocess() # Lineage-by-lineage computation: # In[6]: CEFCON_obj.train() # In[9]: # Idenytify driver regulators for each lineage CEFCON_obj.predicted_driver_regulators() # We can find out the driver regulators identified by CEFCON. # In[10]: CEFCON_obj.cefcon_results_dict['E_pseudotime'].driver_regulator.head() # In[11]: CEFCON_obj.predicted_RGM() # # Downstream analysis # In[12]: CEFCON_obj.cefcon_results_dict['E_pseudotime'] # In[13]: lineage = 'E_pseudotime' result = CEFCON_obj.cefcon_results_dict[lineage] # Plot gene embedding clusters # In[20]: gene_ad=sc.AnnData(result.gene_embedding) sc.pp.neighbors(gene_ad, n_neighbors=30, use_rep='X') # Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. sc.tl.leiden(gene_ad, resolution=1) sc.tl.umap(gene_ad, n_components=2, min_dist=0.3) # In[27]: ov.utils.embedding(gene_ad,basis='X_umap',legend_loc='on data', legend_fontsize=8, legend_fontoutline=2, color='leiden',frameon='small',title='Leiden clustering using CEFCON\nderived gene embeddings') # Plot influence scores of driver regulators # In[40]: import matplotlib.pyplot as plt import seaborn as sns data_for_plot = result.driver_regulator[result.driver_regulator['is_driver_regulator']] data_for_plot = data_for_plot[0:20] plt.figure(figsize=(2, 20 * 0.2)) sns.set_theme(style='ticks', font_scale=0.5) ax = sns.barplot(x='influence_score', y=data_for_plot.index, data=data_for_plot, orient='h', palette=sns.color_palette(f"ch:start=.5,rot=-.5,reverse=1,dark=0.4", n_colors=20)) ax.set_title(result.name) ax.set_xlabel('Influence score') ax.set_ylabel('Driver regulators') ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.grid(False) #设置spines可视化情况 ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(True) ax.spines['left'].set_visible(True) plt.title('E_pseudotime',fontsize=12) plt.xlabel('Influence score',fontsize=12) plt.ylabel('Driver regulon',fontsize=12) sns.despine() # In[41]: result.plot_driver_genes_Venn() # Plot heat map of the activity matrix of RGMs # In[42]: adata_lineage = adata[adata.obs_names[adata.obs[result.name].notna()],:] result.plot_RGM_activity_heatmap(cell_label=adata_lineage.obs['cell_type_finely'], type='out',col_cluster=True,bbox_to_anchor=(1.48, 0.25)) |