MEBOCOST是一个基于Python的计算工具,使用单细胞RNA测序(scRNA-seq)数据定量推断基于代谢物的细胞间通信。MEBOCOST预测细胞-细胞通信事件,其中代谢物(如脂质)由一个细胞(发送细胞)分泌并传播到另一个细胞(接收细胞)的传感蛋白质。该算法将scRNA-seq数据与人工整理的代谢物-传感器数据库相结合,以识别细胞-细胞代谢物-传感器通信,并根据代谢物的外排和内流速率以及酶和传感器基因的表达水平分别定义发送细胞和接收细胞。
原文paper:https://www./content/10.1101/2022.05.30.494067v1
#终端,创新一个新的独立环境
conda create -n mebocost python=3.10
conda activate mebocost
#创建新建文件夹,下载mebocost包,下载完成后进入下载文件夹
mkdir metabocost_analysis
git clone https://github.com/kaifuchenlab/MEBOCOST.git
cd MEBOCOST
#安装依赖包
pip install -r requirements.txt
#安装mebocost
python -m pip install .
#mebocost v1.0.4
#加载包
import os,sys
import scanpy as sc
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from mebocost import mebocost
Create mebocost object
#load data,已注释好的单细胞对象adata = sc.read_h5ad('./scRNA_ha.h5ad')
sc.pl.umap(adata,color=["celltype"],frameon=False,ncols=1,)
# 初始化 mebocost object,导入表达矩阵mebo_obj = mebocost.create_obj( adata = adata, #scanpy单细胞对象 group_col = ['celltype'],#细胞注释列 met_est = 'mebocost',#细胞内代谢物水平的测定方法 config_path = './MEBOCOST/mebocost.conf',#包含代谢物注释、酶、传感器、scFEA 注释、compass 注释的文件路径的配置文件的路径。 exp_mat=None,#表达矩阵,这个参数是adata的专属参数,如果提供了scanpy adata,exp_mat设置为nonoe,如果设置了表达矩阵,adata参数设置为none cell_ann=None,#如果提供的是表达矩阵,这里才需要设置,传入dataframe,是对于表达矩阵中cells的注释. species='human',#物种,支持human和mouse met_pred=None,#数据框,如果使用scFEA或Compass来估算细胞中的代谢物水平,请提供来自scFEA或Compass的原始结果, #行名是细胞,列名是代谢物/反应/模块。如果设置方法是mebocost,则忽略这些参数 met_enzyme=None,#数据框,格式查看帮助函数,手动整理需要 met_sensor=None,#数据框,格式查看帮助函数,手动整理需要 met_ann=None,#数据框中包含了从HMDB网站收集的代谢物的注释信息,这些注释信息主要包括 HMDB 编号、KEGG 编号、代谢物等基本信息。 scFEA_ann=None,#data frame, module annotation of metabolite flux in scFEA, usually is the file at https://github.com/changwn/scFEA/blob/master/data/Human_M168_information.symbols.csv compass_met_ann=None,#查看帮助函数 compass_rxn_ann=None,#查看帮助函数 cutoff_exp='auto',#可设置为默认auto或者浮点数,用于筛选出针对给定基因表达水平较低的细胞。 cutoff_prop=0.25,#取值范围在 0 到 1 之间,代谢物或者基因在细胞中表达比例低于阈值的cut。 sensor_type=['Receptor', 'Transporter', 'Nuclear Receptor'],#将在通信建模中使用的传感器类型的列表。默认这三个 thread=8)#线程数
#这一步需要花费点时间commu_res = mebo_obj.infer_commu( n_shuffle=1000, seed=12345, Return=True, save_permuation=False, min_cell_number = 10, pval_method='permutation_test_fdr', pval_cutoff=0.05)
print('Number of mCCC detected by enzyme and sensor co-expression: ', commu_res.shape[0])#Number of mCCC detected by enzyme and sensor co-expression: 304
#结果可以保存为pk文件# ### save # mebocost.save_obj(obj = mebo_obj, path = './scRNA_mebocost.pk')## re-load the previous object if needed#mebo_obj = mebocost.load_obj('./scRNA_mebocost.pk')
整合compass,通过流出和流入速率约束mcc分析。其实上述步骤,就已经完成了mebocost分析了。得到的是一个数据框,细胞之间代谢通讯。作者教程中还有一个步骤,整合compass,通过流出和流入速率约束mcc分析,这里的compass分析,对每种细胞类型分别使用平均基因表达值来运行 COMPASS 程序。关于compass,我们之前有详细的教程((视频教程)Compass代谢分析详细流程及python版-R语言版下游分析和可视化)。
avg_exp = sc.get.aggregate(adata, by = 'celltype', func='mean')avg_exp = pd.DataFrame(avg_exp.layers['mean'], index = avg_exp.obs_names, columns = avg_exp.var_names).T# ## do un log since COMPASS will take log in the algorithmavg_exp = avg_exp.apply(lambda col: np.exp(col)-1)avg_exp
avg_exp.to_csv('avg_exp_mat.tsv', sep = 't')#保存文件用于compass分析#跑下compass流程,终端中运行:conda activate compasscompass --data avg_exp_mat.tsv --num-processes 10 --species homo_sapiens --output-dir ./compass_res --temp-dir ./compass_res_tmp --calc-metabolites --lambda 0#分析结果位于compass_res文件夹
## apply constraint on compass flux resultupdated_res = mebo_obj._ConstainFlux_(compass_folder='./compass_res/', efflux_cut='auto', influx_cut='auto', inplace=False)
## update to the objectmebo_obj.commu_res = updated_res
可视化结果
### ## filter by FDR less than 0.05,过滤保存显著性结果commu_res = mebo_obj.commu_res.copy()commu_res = commu_res[commu_res['permutation_test_fdr']<=0.05]## write to tsv filecommu_res.to_csv('communication_result.tsv', sep = 't', index = None)
#1-柱状图可视化通讯数目## sender and receiver event numbermebo_obj.eventnum_bar( sender_focus=[], metabolite_focus=[], sensor_focus=[], receiver_focus=[], xorder=[], and_or='and', pval_method='permutation_test_fdr', pval_cutoff=0.05, comm_score_col='Commu_Score', comm_score_cutoff = 0, cutoff_prop = 0.25, figsize=(5, 3.5), save=None, show_plot=True, show_num = True, include=['sender-receiver'], group_by_cell=True, colorcmap=["#8AAD05", "#DF5D22"], return_fig=False#设置为T直接保存图片 )
#2-network可视化互作,可以看出,这个可视化思路和cellchat一样,network的展示不如bar图直观## circle plot to show communications between cell groupsmebo_obj.commu_network_plot( sender_focus=[], metabolite_focus=[], sensor_focus=[], receiver_focus=[], and_or='and', pval_method='permutation_test_fdr', pval_cutoff=0.05, node_cmap='tab20',#节点celltype颜色设置 figsize='auto', line_cmap='viridis', line_color_vmin=None, line_color_vmax=None, linewidth_norm=(0.2, 1), linewidth_value_range = None, node_size_norm=(50, 200), node_value_range = None, adjust_text_pos_node=True, node_text_hidden = False, node_text_font=10, save=None, show_plot=True, comm_score_col='Commu_Score', comm_score_cutoff=0, text_outline=True, return_fig=False#设置为T直接保存图片 )
#3-Dotplot可视化互作数目,这个图的意思和前面两个一样,只是形式不一样,三者选择其一即可### dot plot to show the number of communications between cellsmebo_obj.count_dot_plot( pval_method='permutation_test_fdr', pval_cutoff=0.05, cmap='coolwarm',#颜色设置 figsize='auto', save=None, dot_size_norm =(20, 200), dot_value_range = None, dot_color_vmin=None, dot_color_vmax=None, show_plot=True, comm_score_col='Commu_Score', comm_score_cutoff=0, dendrogram_cluster=True, sender_order=[], receiver_order=[], return_fig = False#设置为T直接保存图片 )
#4-展示互作细节,dotplot展示细胞之间互作代谢物
## Malignant cell was focused, use receiver_focus=[] to include all cell typesmebo_obj.commu_dotmap( sender_focus=[], metabolite_focus=[], sensor_focus=[], receiver_focus=[], and_or='and', pval_method='permutation_test_fdr', pval_cutoff=0.05, figsize='auto', cmap='bwr', cmap_vmin = None, cmap_vmax = None, cellpair_order=[], met_sensor_order=[], dot_size_norm=(10, 150), save=None, show_plot=True, comm_score_col='Commu_Score', comm_score_range = None, comm_score_cutoff=0, swap_axis = False, return_fig = False#设置为T直接保存图片 )
#5-发送者代谢物与接收器传感器之间信息传递流程的可视化## Malignant cell was focused, use receiver_focus=[] to include all cell typesmebo_obj.FlowPlot( pval_method='permutation_test_fdr', pval_cutoff=0.05, sender_focus=[], metabolite_focus=[], sensor_focus=[], receiver_focus=[], remove_unrelevant = False, and_or='and', node_label_size=8, node_alpha=0.6, figsize='auto', node_cmap='Set1', line_cmap='YlGnBu',#颜色设置 line_cmap_vmin = None, line_cmap_vmax = 15.5, node_size_norm=(20, 150), node_value_range = None, linewidth_norm=(0.5, 5), linewidth_value_range = None, save='test.pdf', show_plot=True, comm_score_col='Commu_Score', comm_score_cutoff=0, text_outline=False, return_fig = False#设置为T直接保存图片 )
#6-提琴图展示代谢物、或者senor表达## violin plot to show the aggregated metabolite enzymes of informative metabolties in communication### here we show five significant metabolites,### users can pass several metabolites of interest by provide a listcommu_df = mebo_obj.commu_res.copy()good_met = commu_df[(commu_df['permutation_test_fdr']<=0.05)]['Metabolite_Name'].sort_values().unique()mebo_obj.violin_plot( sensor_or_met=good_met[:10], ## 仅展示top10的代谢物表达,也可以自行选择展示 cell_focus=[], cell_order = [], row_zscore = False, cmap=None, vmin=None, vmax=None, figsize='auto', cbar_title='', save=None, show_plot=True )