# ---------------------------------------------------------------# ----- -----# ----- 10X Visium 空转分析教程【seurat】 -----# ----- 《KS科研分享与服务》公众号出品 -----# ----- -----# ----- -----# ----- 第一节:基础分析 -----# ----- -----# ----------------------------------------------------------------#!!!!此分析基于seurat v5!!!!
1、shell-download data
#=================================================================================# nohup wget https://ftp.ncbi.nlm./geo/series/GSE287nnn/GSE287278/suppl/GSE287278_RAW.tar &# cd data_analysis/Spatial_analysis/visium/# tar -xvf GSE287278_RAW.tar# rm GSE287278_RAW.tar# for f in GSM*.tar.gz; do# tar -xzf "$f"# done#rm GSM*.tar.gz#ls -lh# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 CTR1_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 CTR2_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 CTR3_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 CTR4_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 RIF1_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 RIF2_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 RIF3_processed_data# drwxr-xr-x 4 ks_ts ks_ts 4.0K Dec 19 2024 RIF4_processed_data
2、读取数据
#加载数据用的是Load10X_Spatial,?Load10X_Spatial可以查看它的帮助函数#这里加载的其实是标准的10X空转下级数据,Load10X_Spatial示例也是最标准的读取方式#如果你的数据是10X spaceranger得到的,下游文件在out文件夹,很容易进行读取#但是如果不跑上游,也没有标准的文件,例如使用公共数据库数据,可能就需要各种形式处理了# Load10X_Spatial(# data.dir, #h5文件也就是表达矩阵文件夹,image文件在其下级文件夹spatial# filename = "filtered_feature_bc_matrix.h5",#feature barcode matrix文件名# assay = "Spatial",# slice = "slice1",# filter.matrix = TRUE,只保留确定在组织上的spots# to.upper = FALSE,# image = NULL,# ...# )#标准文件读取#路径应该包含filtered_feature_bc_matrix.h5文件以及spatial文件夹# sp <- Load10X_Spatial(data.dir = './sta_files/',# filename = "filtered_feature_bc_matrix.h5",# assay = "Spatial")
setwd("~/data_analysis/Spatial_analysis/visium/")#加载包library(Seurat)library(ggplot2)library(patchwork)library(dplyr)library(SeuratWrappers)
#control3 sampleCTR3 <- CreateSeuratObject(counts = Read10X("./CTR3_processed_data/filtered_feature_bc_matrix/"), assay = "Spatial",project='CTR3')#读取exp matrix,并创建seurat objCTR3_img <- Read10X_Image(image.dir = file.path("./CTR3_processed_data/spatial/"), filter.matrix = TRUE)#Load a 10X Genomics Visium ImageCTR3_img <- CTR3_img[Cells(x = CTR3)]#保留图像中与表达矩阵一致的spotDefaultAssay(CTR3) <- "Spatial"CTR3[["CTR3"]] <- CTR3_img#control4 sampleCTR4 <- CreateSeuratObject(counts = Read10X("./CTR4_processed_data/filtered_feature_bc_matrix/"), assay = "Spatial",project='CTR4')#读取exp matrix,并创建seurat objCTR4_img <- Read10X_Image(image.dir = file.path("./CTR4_processed_data/spatial/"), filter.matrix = TRUE)#Load a 10X Genomics Visium ImageCTR4_img <- CTR4_img[Cells(x = CTR4)]#保留图像中与表达矩阵一致的spotDefaultAssay(CTR4) <- "Spatial"CTR4[["CTR4"]] <- CTR4_img#RIF3 sampleRIF3 <- CreateSeuratObject(counts = Read10X("./RIF3_processed_data/filtered_feature_bc_matrix/"), assay = "Spatial",project='RIF3')#读取exp matrix,并创建seurat objRIF3_img <- Read10X_Image(image.dir = file.path("./RIF3_processed_data/spatial/"), filter.matrix = TRUE)#Load a 10X Genomics Visium ImageRIF3_img <- RIF3_img[Cells(x = RIF3)]#保留图像中与表达矩阵一致的spotDefaultAssay(RIF3) <- "Spatial"RIF3[["RIF3"]] <- RIF3_img#RIF4 sampleRIF4 <- CreateSeuratObject(counts = Read10X("./RIF4_processed_data/filtered_feature_bc_matrix/"), assay = "Spatial",project='RIF4')#读取exp matrix,并创建seurat objRIF4_img <- Read10X_Image(image.dir = file.path("./RIF4_processed_data/spatial/"), filter.matrix = TRUE)#Load a 10X Genomics Visium ImageRIF4_img <- RIF4_img[Cells(x = RIF4)]#保留图像中与表达矩阵一致的spotDefaultAssay(RIF4) <- "Spatial"RIF4[["RIF4"]] <- RIF4_img#removerm(CTR3_img,CTR4_img,RIF3_img,RIF4_img)
3、可视化基本信息
#可视化spots ncount/nfeatures,以检查数据以及构建的object无误SpatialFeaturePlot(CTR3, features = "nCount_Spatial")+ theme(legend.position = "right")
SpatialFeaturePlot(RIF3, features = "nCount_Spatial") +theme(legend.position = "right")
VlnPlot(CTR3, features = "nCount_Spatial", pt.size = 0.1)
VlnPlot(RIF3, features = "nCount_Spatial", pt.size = 0.1)
4、质控及标准化
#add metadata#添加sample分组或者其他信息CTR3$group <- 'CTR'CTR3$group <- 'CTR'RIF3$group <- 'RIF'RIF4$group <- 'RIF'
obj.list <- list(CTR3,CTR4,RIF3,RIF4)for (i in 1:length(obj.list)) {obj.list[[i]][["percent.mt"]] <- PercentageFeatureSet(obj.list[[i]], pattern = "^MT-")#计算spot线粒体基因比例obj.list[[i]] <- subset(obj.list[[i]], subset = nCount_Spatial > 500 & percent.mt < 20)}
# perform SCTransform normalizationfor (i in 1:length(obj.list)) {obj.list[[i]] <- SCTransform(obj.list[[i]], assay = 'Spatial'#vars.to.regress = "percent.mt #指定需要回归掉的技术噪音变量)}
5、数据整合 & 降维聚类
空转样本单独分析还是整合分析?
???关于多样本空转数据分析需不需要整合分析的看法???
首先说结论然后再解释(这里纯属个人看法思考及参考了网上部分意见):空转样本整合分析非必需,但是如果多样本还是有必要!
空转数据如果你有两个及以上的样本,没有scRNA那种需要进行条件对比的数据,是没有整合去批次的需求的,完全可以单个sample分开进行分析。同时,空转数据每个切片都是不一样的,所以整合也没有必要,更不能说通过整合去去批次。之所以进行“整合”一说,对于空转指示对spot表达矩阵的整合,方便进行降维聚类、横向比较以及注释,如果不进行整合,那么merge的多个切片spot表达矩阵降维的UMAP cluster,每个sampl都是独立的,不利于分析。整合只是对spot表达矩阵的整合,用于后续的降维,并没有对image整合,就目前来说那是不可能的也是不可取的。
这里演示数据有4个sample,我们采取SCT+seurat的整合方式:
# Merge all samples# Create RNA assay for all spatial objectsfor (i in 1:length(obj.list)) {obj.list[[i]][["RNA"]] <- obj.list[[i]][["Spatial"]]}
anchor_features <- SelectIntegrationFeatures(object.list = obj.list, nfeatures = 3000)obj.list <- PrepSCTIntegration(object.list = obj.list, anchor.features = anchor_features)rsc.anchors <- FindIntegrationAnchors(object.list = obj.list, normalization.method = "SCT",anchor.features = anchor_features) # Takes time!
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Warning: Adding image data that isn't associated with any assays
## Warning: Adding image data that isn't associated with any assays
## Warning: Key 'slice1_' taken, using 'ctr4_' instead
## Finding neighborhoods
## Finding anchors
## Found 6525 anchors
## Filtering anchors
## Retained 3041 anchors
## Running CCA
## Merging objects
## Warning: Adding image data that isn't associated with any assays
## Warning: Adding image data that isn't associated with any assays
## Warning: Key 'slice1_' taken, using 'rif3_' instead
## Finding neighborhoods
## Finding anchors
## Found 3985 anchors
## Filtering anchors
## Retained 2596 anchors
## Running CCA
## Merging objects
## Warning: Adding image data that isn't associated with any assays
## Warning: Adding image data that isn't associated with any assays
## Warning: Key 'slice1_' taken, using 'rif3_' instead
## Finding neighborhoods
## Finding anchors
## Found 4029 anchors
## Filtering anchors
## Retained 2574 anchors
## Running CCA
## Merging objects
## Warning: Adding image data that isn't associated with any assays
## Warning: Adding image data that isn't associated with any assays
## Warning: Key 'slice1_' taken, using 'rif4_' instead
## Finding neighborhoods
## Finding anchors
## Found 4811 anchors
## Filtering anchors
## Retained 2700 anchors
## Running CCA
## Merging objects
## Warning: Adding image data that isn't associated with any assays
## Warning: Adding image data that isn't associated with any assays
## Warning: Key 'slice1_' taken, using 'rif4_' instead
## Finding neighborhoods
## Finding anchors
## Found 4952 anchors
## Filtering anchors
## Retained 2907 anchors
## Running CCA
## Merging objects
## Warning: Adding image data that isn't associated with any assays
## Warning: Adding image data that isn't associated with any assays
## Warning: Key 'slice1_' taken, using 'rif4_' instead
## Finding neighborhoods
## Finding anchors
## Found 3520 anchors
## Filtering anchors
## Retained 2196 anchors
merged <- IntegrateData(anchorset = rsc.anchors, normalization.method = "SCT", k.weight = 40)
merged[["RNA"]] <- JoinLayers(merged[["RNA"]])
降维聚类步骤与scRNA无异merged <- RunPCA(merged, npcs = 30, verbose = FALSE)merged <- RunUMAP(merged, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 18:54:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:54:53 Read 5883 rows and found 30 numeric columns
## 18:54:53 Using Annoy for neighbor search, n_neighbors = 30
## 18:54:53 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:54:55 Writing NN index file to temp file /tmp/RtmptDpooK/file81ac7575aa159
## 18:54:55 Searching Annoy index using 1 thread, search_k = 3000
## 18:54:57 Annoy recall = 100%
## 18:54:59 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 18:55:01 Initializing from normalized Laplacian + noise (using RSpectra)
## 18:55:01 Commencing optimization for 500 epochs, with 289038 positive edges
## 18:55:01 Using rng type: pcg
## 18:55:12 Optimization finished
merged <- FindNeighbors(merged, reduction = "pca", dims = 1:30)merged <- FindClusters(merged, resolution = 0.6)
6、一些基本结果可视化
color_pal<-c("#11A579","#F2B701","#66C5CC","#80BA5A","#F6CF71","#7F3C8D","#CF1C90","#3969AC","#f97b72","#E73F74","#4b4b8f","#ACA4E2","#31C53F","#B95FBB","#D4D915","#28CECA")DimPlot(merged, reduction = "umap", label = F, pt.size = 0.5,cols = color_pal)+DimPlot(merged, reduction = "umap", group.by = 'orig.ident',label = F, pt.size = 0.5)
SpatialDimPlot(merged, stroke=0.1,ncol = 2)& #如果spots不需要边框,设置stroke=NAscale_fill_manual(values = color_pal)
DefaultAssay(merged) <- 'SCT'SpatialFeaturePlot(merged, 'PAEP', ncol = 2, min.cutoff = 0)
SpatialFeaturePlot(merged, 'SPP1', ncol = 2, min.cutoff = 0, stroke=0.1,pt.size.factor = 2)
精彩内容
如感兴趣/需要详情请联系作者: