前言

使用工具:R 

一.下载数据

到网站NCBI下载数据我这次选择了两个样本GSM4138111和GSM4138110进行下载, 需要下载下面的三个文件,下载的时候要将文件归类,数据是那个样本的就放到以该样本命名的文件夹中。

R语言转录组测序分析 r语言基因id转symbol_数据

二.文件改名

在使用的时候需要对文件名进行更改,改成如下名字:genes改成features就好

R语言转录组测序分析 r语言基因id转symbol_文件名_02

如果样本很多,那么改名字就很麻烦了,可以使用以下R语言代码进行批量更改名字: 

fs=list.files('.文件夹所在位置,'^GSM')
library(tidyverse)
samples=str_split(fs,'_',simplify = T)[,1]

lapply(unique(samples),function(x){
  y=fs[grepl(x,fs)]
  folder=paste0("GSE139324_RAW/", str_split(y[1],'_',simplify = T)[,1])
  dir.create(folder,recursive = T)
  #为每个样本创建子文件夹
  file.rename(paste0("GSE139324_RAW/",y[1]),file.path(folder,"barcodes.tsv.gz"))
  #重命名文件,并移动到相应的子文件夹里
  file.rename(paste0("GSE139324_RAW/",y[2]),file.path(folder,"features.tsv.gz"))
  file.rename(paste0("GSE139324_RAW/",y[3]),file.path(folder,"matrix.mtx.gz"))
})

三.样本合并

每个样本的文件名已经改好了,那么就需要进行合并了使用R语言代码进行数据合并

library(Seurat)
library(tidyverse)

library(patchwork)
dir.create('cluster1')
dir.create('cluster2')
dir.create('cluster3')
set.seed(123) 
dir = c('D://shiyan//GSM4138110', 
        'D://shiyan//GSM4138111')

names(dir) = c('HNC01PBMC', 'HNC01TIL')
counts <- Read10X(data.dir = dir)
scRNA1 = CreateSeuratObject(counts, min.cells=1)
dim(scRNA1)
table(scRNA1@meta.data$orig.ident)  #查看每个样本的细胞数

这样就可以对样本数据进行合并分析了

四.聚类分析

library(Seurat)
library(tidyverse)

library(patchwork)
scRNA1 <- NormalizeData(scRNA1)
scRNA1 <- FindVariableFeatures(scRNA1, selection.method = "vst")
scRNA1 <- ScaleData(scRNA1, features = VariableFeatures(scRNA1))
scRNA1 <- RunPCA(scRNA1, features = VariableFeatures(scRNA1))
plot1 <- DimPlot(scRNA1, reduction = "pca", group.by="orig.ident")
plot2 <- ElbowPlot(scRNA1, ndims=30, reduction="pca") 
plotc <- plot1+plot2
ggsave("cluster1/pca.png", plot = plotc, width = 8, height = 4)
#选取主成分
pc.num=1:30

##细胞聚类
scRNA1 <- FindNeighbors(scRNA1, dims = pc.num) 
scRNA1 <- FindClusters(scRNA1, resolution = 0.5)
table(scRNA1@meta.data$seurat_clusters)
metadata <- scRNA1@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
write.csv(cell_cluster,'cluster1/cell_cluster.csv',row.names = F)

##非线性降维
#tSNE
scRNA1 = RunTSNE(scRNA1, dims = pc.num)
embed_tsne <- Embeddings(scRNA1, 'tsne')   #提取tsne图坐标
write.csv(embed_tsne,'cluster1/embed_tsne.csv')
#group_by_cluster
plot1 = DimPlot(scRNA1, reduction = "tsne", label=T) 
ggsave("cluster1/tSNE.png", plot = plot1, width = 8, height = 7)
#group_by_sample
plot2 = DimPlot(scRNA1, reduction = "tsne", group.by='orig.ident') 
ggsave("cluster1/tSNE_sample.png", plot = plot2, width = 8, height = 7)
#combinate
plotc <- plot1+plot2
ggsave("cluster1/tSNE_cluster_sample.png", plot = plotc, width = 10, height = 5)

#UMAP
scRNA1 <- RunUMAP(scRNA1, dims = pc.num)
embed_umap <- Embeddings(scRNA1, 'umap')   #提取umap图坐标
write.csv(embed_umap,'cluster1/embed_umap.csv') 
#group_by_cluster
plot3 = DimPlot(scRNA1, reduction = "umap", label=T) 
ggsave("cluster1/UMAP.png", plot = plot3, width = 8, height = 7)
#group_by_sample
plot4 = DimPlot(scRNA1, reduction = "umap", group.by='orig.ident')
ggsave("cluster1/UMAP.png", plot = plot4, width = 8, height = 7)
#combinate
plotc <- plot3+plot4
ggsave("cluster1/UMAP_cluster_sample.png", plot = plotc, width = 10, height = 5)
setwd("D://shiyan")
#合并tSNE与UMAP
plotc <- plot2+plot4+ plot_layout(guides = 'collect')
ggsave("cluster1/tSNE_UMAP.png", plot = plotc, width = 10, height = 5)
ggsave()

R语言转录组测序分析 r语言基因id转symbol_R语言转录组测序分析_03

这是两个样本的降维之后的聚类分析,因为样本太少了,所以分群效果也不是很好,接下来对样本数据进行整合

for (i in 1:length(scRNAlist)) {
    scRNAlist[[i]] <- NormalizeData(scRNAlist[[i]])
    scRNAlist[[i]] <- FindVariableFeatures(scRNAlist[[i]], selection.method = "vst")
}
##以VariableFeatures为基础寻找锚点,运行时间较长
scRNA.anchors <- FindIntegrationAnchors(object.list = scRNAlist)
##利用锚点整合数据,运行时间较长
scRNA3 <- IntegrateData(anchorset = scRNA.anchors)

 可能我电脑比较垃圾所以很慢,好的配置可能会更快一些,所以我下篇文章继续写吧 下一章我会将数据的质控还有使用数据进行绘制小提琴图,观察数据结果,然后预测样本中的细胞类型