Slingshot是由加州大学于2018年开发的一种用于单细胞转录组数据分析的创新工具,旨在解决细胞谱系分化结构和伪时间推断的关键挑战。该方法通过两个核心阶段——谱系结构推断和伪时间排序——实现了对复杂分化轨迹的高精度解析。与传统方法如Monocle和TSCAN相比,Slingshot在处理多分支结构和噪声数据方面展现出显著优势。其技术特点在于将稳定性强的算法框架与灵活的监督模式相结合,支持多种上游数据处理方法(如降维和聚类),具有突出的模块化特性。模拟研究证实,Slingshot能准确识别1-3个分支的发育轨迹,且其伪时间推断精度优于同类主流方法。这一工具为干细胞分化、发育生物学等研究领域提供了更可靠的细胞动态过程重建能力,特别是在需要解析复杂分支谱系的场景中表现出重要价值。
加载R包
library(SeuratData)
library(Seurat)
library(ggplot2)
library(tidyverse)
library(cowplot)
library(Biobase)
library(monocle3)
library(ggplot2)
library(dplyr)
library(harmony)
library(slingshot)
library(tradeSeq)
library(RColorBrewer)
library(DelayedMatrixStats)
library(scales)
library(paletteer)
library(viridis)
读入ifnb数据并提取CD14与CD16单核数据
这里和之前基本一样不多赘述
sce <- LoadData("ifnb.SeuratData")
#sce <- sce[, sce$seurat_annotations %in% c("CD14 Mono","CD16 Mono")]
# table([email protected]$seurat_annotations)
sce <- Seurat::NormalizeData(sce,
normalization.method = "LogNormalize",
scale.factor = 1e4)
sce <- FindVariableFeatures(sce, selection.method = "vst", nfeatures = 2000)
sce <- ScaleData(sce)
sce <- RunPCA(sce, npcs = 50)
sce = sce %>% RunHarmony("stim", plot_convergence = TRUE)
sce <- sce %>%
RunUMAP(reduction = "harmony", dims = 1:30) %>%
FindNeighbors(reduction = "harmony", dims = 1:30) %>%
FindClusters(resolution = 0.3) %>%
identity()
scRNAsub <- sce[, sce$seurat_annotations %in% c("CD14 Mono","CD16 Mono")]
p1 <- DimPlot(scRNAsub, reduction = "umap",group.by = "stim", pt.size=0.5, label = F,repel = TRUE)
p2 <- DimPlot(scRNAsub, reduction = "umap",group.by = "seurat_annotations", pt.size=0.5, label = T,repel = TRUE)
p1
p2
Idents(scRNAsub)=scRNAsub$seurat_annotations
diff.wilcox = FindAllMarkers(scRNAsub, only.pos = TRUE, min.pct = 0.1,
thresh.use = 0.1)
all.markers = diff.wilcox %>%dplyr::select(gene, everything()) %>% subset(p_val<0.05)
diff.genes <- subset(all.markers,p_val_adj<0.001)$gene
转为SingleCellExperiment格式
在slingshot对象构建过程中需要先将Seurat格式转化为SingleCellExperiment格式,同时可以观察到预定的轨迹变化,该流程整合对象构建较为简单。
scRNAsub <- as.SingleCellExperiment(scRNAsub, assay = "RNA")
sce_slingshot1 <- slingshot(scRNAsub,
reducedDim = 'UMAP',
start.clus = 'CD14 Mono',
clusterLabels = scRNAsub$seurat_annotations)
SlingshotDataSet(sce_slingshot1)
library(grDevices)
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
plotcol <- colors[cut(sce_slingshot1$slingPseudotime_1, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP , col = plotcol, pch=16, asp = 1)
lines(SlingshotDataSet(sce_slingshot1), lwd=2, col='black')
去除离群点并重新构建slingshot对象
这里由于存在几个离群细胞点,可以选择去除。去除后重新构建对象。
# 设定UMAP坐标范围(根据当前UMAP调整)
scRNAsub <- sce[, sce$seurat_annotations %in% c("CD14 Mono","CD16 Mono")]
x_range <- c(-10, 0) # 调整x轴范围
y_range <- c(-8, 8) # 调整y轴范围
# 获取在范围内的细胞
umap_coords <- Embeddings(scRNAsub, "umap")

head(umap_coords) # 查看坐标范围
in_range <- umap_coords[,1] > x_range[1] &
umap_coords[,1] < x_range[2] &
umap_coords[,2] > y_range[1] &
umap_coords[,2] < y_range[2]
scRNAsub <- subset(scRNAsub, cells = colnames(scRNAsub)[in_range])
DimPlot(scRNAsub, reduction = "umap")
#################
scRNAsub <- as.SingleCellExperiment(scRNAsub, assay = "RNA")
sce_slingshot1 <- slingshot(scRNAsub,
reducedDim = 'UMAP',
start.clus = 'CD14 Mono',
clusterLabels = scRNAsub$seurat_annotations)
SlingshotDataSet(sce_slingshot1)
library(grDevices)
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
plotcol <- colors[cut(sce_slingshot1$slingPseudotime_1, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP , col = plotcol, pch=16, asp = 0.5)
lines(SlingshotDataSet(sce_slingshot1), lwd=2, col='black')
时序热图
library(tradeSeq)
# fit negative binomial GAM
pseudotimeED <- slingPseudotime(sce_slingshot1, na = FALSE)
cellWeightsED <- slingCurveWeights(sce_slingshot1)
counts <- scRNAsub@assays@data$counts
counts=as.data.frame(counts)
counts <- counts[diff.genes,]
sce_slingshot <- fitGAM(counts = as.matrix(counts), pseudotime = pseudotimeED, cellWeights = cellWeightsED, nknots = 5, verbose = T)
ATres <- associationTest(sce_slingshot )
topgenes <- rownames(ATres[order(ATres$pvalue), ])[1:200]
topgenes<- diff.wilcox %>% group_by(cluster) %>% top_n(50, avg_log2FC)
pst.ord <- order(sce_slingshot1$slingPseudotime_1, na.last = NA)
heatdata <- assays(sce_slingshot1)$counts[topgenes$gene, pst.ord]
heatclus <- scRNAsub$seurat_annotations[pst.ord]
heatdata=as.matrix(heatdata)
heatmap(log1p(heatdata),
Colv = NA,
ColSideColors = brewer.pal(9,"Set1")[heatclus],
labCol = "") # 或者使用 labCol = NA
整体显示亚群中基因表达结合特有时序变化构建热图,其中也可以自己导出进行个性化修饰。
mean(rowData(sce_slingshot)$tradeSeq$converged)
rowData(sce_slingshot)$assocRes <- associationTest(sce_slingshot, lineages = TRUE, l2fc = log2(2))
assocRes <- rowData(sce_slingshot)$assocRes
gene_dynamic <- list()
genes_plot <- c("CD14","FCGR3A")
for(i in 1:length(genes_plot)){
p = plotSmoothers(sce_slingshot, assays(sce_slingshot)$counts,
gene =genes_plot[i], alpha = 0.6, border = T, lwd = 2)+
ggtitle(genes_plot[i])
gene_dynamic[[i]] <- p
}
Seurat::CombinePlots(gene_dynamic, ncol = 2)
该算法工具在计算速度上较快,同时推断轨迹的时候同样可以选择先验与算法推导。Slingshot能够准确识别复杂的多分支谱系结构,并且在不同噪声水平和细胞数量下表现出较高的鲁棒性,在不同数据中表现出色。
参考链接
https://www.jianshu.com/p/e85d23a25a43 https:///packages/release/bioc/vignettes/slingshot/inst/doc/vignette.html https://pmc.ncbi.nlm./articles/PMC6007078/