1、数据下载
关于visium HD数据的seurat读取处理以及其他情况的处理,已经有一个详细的教程了。之前的示例数据多个组在一张slice,我们是按照单个样本演示的。这里演示一下visium HD多样本整合分析,流程与普通visium是类似的。选取的数据是发表在Oliveira, M.F.d., Romero, J.P., Chung, M. et al. High-definition spatial transcriptomic profiling of immune cell populations in colorectal cancer. Nat Genet 57, 1512–1523 (2025). https:///10.1038/s41588-025-02193-3上的文章。这是一组很经典的示例数据,具体的数据可以在GEO下载,也可以在10x官网下载完整的结果或者自行下载FATSQ数据跑上游。样本中来自人类结直肠癌及其邻近正常黏膜组织的空间转录组,原本有5例样本,这里选择1例直肠癌、1例正常对照进行演示。数据下载链接:https://www./platforms/visium/product-family/dataset-human-crc:
data download:
wget https://cf./samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Normal_P5/Visium_HD_Human_Colon_Normal_P5_binned_outputs.tar.gzwget https://cf./samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P5/Visium_HD_Human_Colon_Cancer_P5_binned_outputs.tar.gztar -xvzf Visium_HD_Human_Colon_Normal_P5tar -xvzf Visium_HD_Human_Colon_Cancer_P5
setwd("~/data_analysis/10X_space/CRC_visiumHD/")library(Seurat)library(ggplot2)library(patchwork)library(dplyr)
2、读取数据及基本处理
我们选择8um分辨率的数据:
CRC <- Load10X_Spatial(data.dir = './CRC/binned_outputs/square_008um/',slice = 'CRC')CRC$group <- 'CRC'#添加分组CRC
## An object of class Seurat
## 18085 features across 541968 samples within 1 assay
## Active assay: Spatial (18085 features, 0 variable features)
## 1 layer present: counts
## 1 spatial field of view present: CRC
NAT <- Load10X_Spatial(data.dir = './NAT/binned_outputs/square_008um/',slice ='NAT')NAT$group <- 'NAT' #添加分组NAT
## An object of class Seurat
## 18085 features across 435773 samples within 1 assay
## Active assay: Spatial (18085 features, 0 variable features)
## 1 layer present: counts
## 1 spatial field of view present: NAT
obj.list <- list(CRC,NAT)for (i in 1:length(obj.list)) { obj.list[[i]][["percent.mt"]] <- PercentageFeatureSet(obj.list[[i]], pattern = "^MT-")#计算bins线粒体基因比例}
object_meta1 = obj.list[[1]]@meta.data# Create a plot for nUMIdist_counts_before1 <- object_meta1 %>% ggplot(aes(x=nCount_Spatial)) + geom_density(alpha = 0.2) + scale_x_log10() + theme_classic() + ylab("Cell density") + xlab("Number of UMIs per bin") + ggtitle('Pre-QC UMIs/Bin') + theme(plot.title = element_text(hjust = 0.5))# Create a plot for nGenedist_features_before1 <- object_meta1 %>% ggplot(aes(x=nFeature_Spatial)) + geom_density(alpha = 0.2) + scale_x_log10() + theme_classic() + ylab("Cell density") + xlab("Number of genes per bin") + ggtitle('Pre-QC Genes/Bin') + theme(plot.title = element_text(hjust = 0.5))# Create a plot for percent.mtdist_mt_before1 <- object_meta1 %>% ggplot(aes(x=percent.mt)) + geom_density(alpha = 0.2) + scale_x_log10() + theme_classic() + ylab("Cell density") + xlab("Number of genes per bin") + ggtitle('Pre-QC pt.mt/Bin') + theme(plot.title = element_text(hjust = 0.5))dist_counts_before1 | dist_features_before1 | dist_mt_before1
for (i in 1:length(obj.list)) { obj.list[[i]] <- subset(obj.list[[i]], subset = nCount_Spatial > 100 & nFeature_Spatial > 100 & percent.mt < 40)}
CRC_merge <- merge(obj.list[[1]],obj.list[[2]],add.cell.ids = c("CRC", "NAT"))CRC_merge
3、sketch聚类
标准化:
CRC_merge <- NormalizeData(CRC_merge, assay = 'Spatial')
CRC_merge <- FindVariableFeatures(CRC_merge)
CRC_merge <- SketchData( object = CRC_merge, assay = 'Spatial', ncells = ncol(CRC_merge) * 0.15, method = "LeverageScore", sketched.assay = "sketch")CRC_merge
## An object of class Seurat
## 36170 features across 616313 samples within 2 assays
## Active assay: sketch (18085 features, 2000 variable features)
## 4 layers present: counts.1, counts.2, data.1, data.2
## 1 other assay present: Spatial
## 2 spatial fields of view present: CRC NAT
在sketch assay对采样的细胞进行标准化降维流程以及样本整合:
DefaultAssay(CRC_merge) <- "sketch"CRC_merge <- FindVariableFeatures(CRC_merge, verbose = FALSE)CRC_merge <- ScaleData(CRC_merge, verbose = FALSE)CRC_merge <- RunPCA(CRC_merge, assay = "sketch", reduction.name = "pca.sketch", verbose = FALSE)
CRC_merge <- IntegrateLayers(object = CRC_merge, method = HarmonyIntegration, orig.reduction = "pca.sketch", new.reduction = "integrated.Harmony", verbose = FALSE)
CRC_merge <- FindNeighbors(CRC_merge, assay = "sketch", reduction = "integrated.Harmony", dims = 1:25)CRC_merge <- FindClusters(CRC_merge, cluster.name = "seurat_cluster.sketched", resolution = 0.5)
CRC_merge <- RunUMAP(CRC_merge, reduction = "integrated.Harmony", reduction.name = "umap.sketch", return.model = T, dims = 1:25)
# Plot UMAPDimPlot(CRC_merge, reduction = "umap.sketch", label = T, cols = 'polychrome') + ggtitle("Sketched clustering") + theme(legend.position = "none")
# Plot UMAPDimPlot(CRC_merge, reduction = "umap.sketch", group.by = 'group')
4、将sketch聚类标签投射回完整数据集
将“sketch”阶段得到的结果投射回所有 bins。这两步骤比较耗费时间!
#返回的整合后的full reduction为integrated.Harmony.fullCRC_merge <- ProjectIntegration(object = CRC_merge, #合并的seurat obj sketched.assay = "sketch", #sketch assay assay = "Spatial",#原始数据assay reduction="integrated.Harmony")#使用sketch,经过批次校正的嵌入的降维结果名称。
options(future.globals.maxSize = 10 * 1024^3)CRC_merge <- ProjectData( object = CRC_merge, assay = "Spatial",#原来的完整数据集assay full.reduction = "integrated.Harmony.full",#投射后所有bins的reduction名称 sketched.assay = "sketch",#sketch下采样数据assay sketched.reduction = "integrated.Harmony.full",#投射需要使用的sketched.reduction umap.model = "umap.sketch",#sketch上训练好的UMAP模型的名称 dims = 1:25, refdata = list(seurat_cluster.projected = "seurat_cluster.sketched"))
## Finding sketch neighbors
## Finding sketch weight matrix
## Transfering refdata from sketch
## Projection to sketch umap
## Running UMAP projection
## 12:01:50 Read 616313 rows
## 12:01:50 Processing block 1 of 1
## 12:01:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:02:03 Initializing by weighted average of neighbor coordinates using 1 thread
## 12:02:08 Commencing optimization for 67 epochs, with 18489390 positive edges
## 12:04:22 Finished
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from full.umap.sketch to fullumapsketch_
可视化最终效果:
DefaultAssay(CRC_merge) <- "Spatial"Idents(CRC_merge) <- "seurat_cluster.projected"# Plot the UMAPDimPlot(CRC_merge, reduction = "full.umap.sketch", label = T, raster = F, cols = 'polychrome') + ggtitle("Projected clustering") + theme(legend.position = "none")
color_pal=c( "#E41A1C","#377EB8","#4DAF4A", "#984EA3","#FF7F00","#FFFF33","#A65628", "#F781BF","#999999","#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3", "#FDB462", "#B3DE69","#FCCDE5","#CCEBC5","#D95F02","#003366","#1B9E77", "#7570B3","#E7298A","#66A61E","#E6AB02","#FFC0CB")SpatialDimPlot(CRC_merge, group.by = 'seurat_cluster.projected', pt.size.factor = 8, images = 'CRC') + scale_fill_manual(values = color_pal)+ guides(fill=guide_legend(ncol=2))
color_pal=c( "#E41A1C","#377EB8","#4DAF4A", "#984EA3","#FF7F00","#FFFF33","#A65628", "#F781BF","#999999","#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3", "#FDB462", "#B3DE69","#FCCDE5","#CCEBC5","#D95F02","#003366","#1B9E77", "#7570B3","#E7298A","#66A61E","#E6AB02","#FFC0CB")SpatialDimPlot(CRC_merge, group.by = 'seurat_cluster.projected', pt.size.factor = 8, images = 'NAT') + scale_fill_manual(values = color_pal)+ guides(fill=guide_legend(ncol=2))