R版免疫组库分析scRepertoire包另外一个重要的分析就是结合scRNA与TCR/BCR数据,对VDJ数据从更多的角度解析。
library(Seurat)setwd("./scRNA/GEX/")#批量读入10X scRNA datadata_dirs <- list.dirs(path='.', full.names=TRUE, recursive=FALSE)samples <- c("HC1","HC2","HC3","ICB1","ICB2","IEI3")objs <- list()for(ix in seq_along(data_dirs)){sample <- samples[ix]path <- data_dirs[ix]data <- Read10X(data.dir=path)obj <- CreateSeuratObject(counts=data, project=sample,min.cells = 3,min.features = 200)obj$orig.ident <- sampleobjs[[sample]] <- obj}#processsr.merged <- merge(objs[[1]], y = c(objs[[2]],objs[[3]],objs[[4]],objs[[5]],objs[[6]]),add.cell.ids = c("HC1","HC2","HC3","ICB1","ICB2","IEI3"))sr.merged[["percent.mt"]] <- PercentageFeatureSet(sr.merged,pattern = "^MT-")sr.merged <- subset(sr.merged, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 20)VlnPlot(sr.merged, features = c("nFeature_RNA", "nCount_RNA", 'percent.mt'), ncol = 3)
#PCA:sr.merged <- NormalizeData(sr.merged, normalization.method = "LogNormalize")sr.merged <- FindVariableFeatures(sr.merged)sr.merged <- ScaleData(sr.merged, vars.to.regress = c("percent.mt"), verbose = T)sr.merged <- RunPCA(sr.merged, npcs = 50)
library(harmony)sr.merged <- sr.merged %>% RunHarmony("orig.ident",plot_convergence = TRUE,assay.use = 'RNA')ElbowPlot(object = sr.merged, ndims = 50, reduction = 'harmony')
library(scRepertoire)#这里我们演示的数据是BCR数据# Remove BCR VDJ genessr.merged <- quietBCRgenes(sr.merged)# Check the first 10 variable features after removalVariableFeatures(sr.merged)[1:20]##[1] "PTGDS" "S100A9" "S100A8" "TXNDC5" "AC233755.1"## [6] "G0S2" "LYZ" "CDKN1C" "CPVL" "MZB1" ## [11] "HDC" "TRBV7-2" "LYPD2" "IL1B" "CST3" ## [16] "S100A12" "AQP3" "MKI67" "TRBV7-9" "CXCL8"
sr.merged <- RunUMAP(sr.merged, reduction = "harmony", dims = 1:20, verbose = F)sr.merged <- FindNeighbors(sr.merged, dims = 1:20, reduction = 'harmony')sr.merged <- FindClusters(object = sr.merged , resolution = 0.3, verbose = FALSE) DimPlot(sr.merged,label = T)
sr.merged <- subset(sr.merged, seurat_clusters=='14', invert=T)library(plyr)[email protected]$seurat_clusters = droplevels([email protected]$seurat_clusters, exclude = setdiff(levels([email protected]$seurat_clusters), unique([email protected]$seurat_clusters)))[email protected]$celltype <- mapvalues([email protected]$seurat_clusters, from=c("0","1","2","3","4","5","6","7","8","9","10","11","12","13"), to=c("Naive", "Naive", "Memory", "Naive", "Memory", "Memory", "Classical ABC", "Transitional", "Classical ABC", "Memory", "Anergic ABC", "CD1c ABC", "Memory", "Classical ABC"))Idents(sr.merged) <- [email protected]$celltypeDimPlot(sr.merged, label=T, repel = T)
#批量读取csv文件setwd('./scRNA/0BCR_data/')folders=list.files('./',pattern='[contig_annotations.csv]$')#文件夹目录folders
BCR_list = lapply(folders,function(x){ read.csv(x) })#处理BCR data为clones#添加sample区别不同样本data_bcr <- combineBCR(BCR_list, ID = c("HC1","HC2","HC3","ICB1","ICB2","IEI"),#分组 samples=c("HC","HC","HC","Disease","Disease","Disease"), threshold=0.85)
其他基础可视化与TCR一样:可视化函数中,有一个共同参数cloneCall,选择不同的值,可视化clone变化。cloneCall有四个选项,分别的意思如下:
“gene”:使用含有TCR/Ig的VDJC基因
“nt”:使用CDR3区域的核苷酸序列
“aa”:使用CDR3区域的氨基酸序列
“strict”:使用包含TCR/Ig + CDR3区域核苷酸序列的VDJC基因。
clonalQuant(data_bcr,
group.by = 'ID',
cloneCall="strict",
chain = "both",
scale = TRUE)#若设为TRUE,则将输出结果转换为相对比例(relative percentage)
3-将克隆信息整合到单细胞object
需要注意,contig 数据中的细胞条形码(cell barcode)已经发生了改变,需要将clones与scRNA object结合,需要保证两者barcodes一致:现在的形式是分组+sample+barcode,所以scRNA的barcode形式也需要需修改一致。
seurat object中添加一个分组:并修改barcode。
sr.merged$group <- ifelse([email protected]$orig.ident %in% c("HC1", "HC2", "HC3"), "HC", "Disease") #添加分组#替换object中barcode形式
sr.merged = RenameCells(sr.merged, new.names = paste0([email protected]$group, "_", rownames([email protected])))
combineExpression() 函数执行两者之间的组合,函数的执行过程中,会计算克隆的频率clonal frequency和比例clonal proportion,并将每个克隆划分到称为cloneSize的组别中。克隆频率与比例的计算依赖于所比较的免疫受体库,也就是分组group.by参数。
sce <- combineExpression(data_bcr, sr.merged, cloneCall="gene", group.by = "ID", #不设置,默认基于data_bcr列表元素 proportion = FALSE, cloneSize=c(Single=1, Small=5, Medium=10, Large=30, Hyperexpanded=100))
#Define color palette colorblind_vector <- hcl.colors(n=7, palette = "inferno", fixup = TRUE)DimPlot(sce, group.by = "cloneSize") + scale_color_manual(values=rev(colorblind_vector[c(1,3,4,5,7)]))
#This is an example of the process, which will not be evaluated during knit#TCR <- combineTCR(...)#BCR <- combineBCR(...)#list.receptors <- c(TCR, BCR)#seurat <- combineExpression(list.receptors, # seurat, # cloneCall="gene", # proportion = TRUE)
4-可视化
4-1 clonalOverlay
利用降维图作为参考,clonalOverlay()可以在UMAPE图上叠加显示克隆扩增细胞的位置。该函数能够突出显示高克隆频率或高克隆比例的区域,从而直观呈现克隆扩增在细胞群体中的分布情况。
clonalOverlay(sce, reduction = "umap", cutpoint = 5, #在轮廓图中包含的最低克隆频率或比例阈值。 bins = 10, #绘制轮廓的层数。 facet.by = "group") + guides(color = "none")
4-2 clonalNetwork
与clonalOverlay() 类似,clonalNetwork()用于在单细胞降维空间中可视化不同细胞簇间共享克隆的网络交互关系。该函数展示了克隆从起始节点向终止节点的相对流动比例,箭头方向表示克隆共享的流向。这种可视化方式能够揭示克隆在不同细胞群体或发育状态之间的迁移与分布特征,从而反映免疫细胞的克隆扩增与动态联系。
#ggraph needs to be loaded due to issues with ggplotlibrary(ggraph)
#No Identity filterclonalNetwork(sce, reduction = "umap", group.by = "celltype", filter.clones = 2000, filter.identity = "Naive", cloneCall = "aa", exportClones = FALSE)#设置为True,则会导出多个身份分组(identity groups)之间共享的克隆,并根据克隆拷贝总数进行排序。
4-3 clonalOccupy
这个就是对cloneSize的柱状图统计了,可以按照celltype,或者按照分组展示每个组cloneSize的比例。
clonalOccupy(sce, x.axis = "celltype", proportion = TRUE, label = TRUE)#图上展示比例
clonalOccupy(sce, x.axis = "group", proportion = TRUE, label = TRUE)#图上展示比例
4-4 alluvialClones
alluvialClones() 可用于可视化克隆在多个分类变量之间的分布情况,从而分析这些变量之间的相互关系与变化趋势。如果需要强调特异性的clone,那么color可以设置为需要标注的clone。
alluvialClones(sce, cloneCall = "gene", y.axes = c("orig.ident", "celltype"), color = "celltype")
5-Quantifying Clonal Bias
5-1 StartracDiversity
StartracDiversity() 用于评估免疫细胞克隆在不同组织与细胞状态间的动态特征。
# Calculate and plot all three STARTRAC indicesStartracDiversity(sce, type = "group", #The metadata variable that specifies tissue type for migration analysis. group.by = "orig.ident")
5-2 clonalBias
一个新的克隆度量指标——clonalBias(),与 STARTRAC 类似,旨在定量评估单个克隆(individual clone)在特定细胞群体或亚群(cellular compartment / cluster)中的偏向程度。当 clone bias = 1 时,表示该克隆的所有细胞完全来源于同一细胞群或簇;而 clone bias = 0则表示该克隆在各细胞亚型间的分布与整体背景分布一致(无偏向)。
clonalBias(sce, cloneCall = "aa", split.by = "orig.ident", group.by = "celltype", n.boots = 10, min.expand =2)