geo 读取单细胞csv表达矩阵单细胞改列名 seurat




getwd()
path="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin" #空间转录组
dir.create(path)
setwd(path)
getwd()
list.files()



raw_counts=read.csv("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv"
                      )
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-1]
head(counts)[1:4,1:4]
rownames(counts)=counts$symbol

head(raw_counts)[1:4,1:4]
counts=raw_counts[,-2]
head(counts)[1:4,1:4]
rownames(counts)=counts$id
counts=counts[,-1]

library(Seurat)
#https://zhuanlan.zhihu.com/p/385206713
rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")

ids=raw_counts[,1:2]
head(ids)
colnames(ids)= c('ENSEMBL','SYMBOL')
head(ids)
dim(ids) # [1] 16428 
ids=na.omit(ids)
dim(ids) # [1] 15504 
length(unique(ids$SYMBOL)) # [1] 15494 
# 这里的关系超级乱，互相之间都不是一对一 
# 凡是混乱的ID一律删除即可
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
dim(ids)
pos=match(ids$ENSEMBL,rownames(rawdata) )
hp_sce=rawdata[pos,]
hp_sce
#rownames(hp_sce) = ids$SYMBOL
# RenameGenesSeurat  -----------------------------------------------
#创建函数 改名字
RenameGenesSeurat <- function(obj , 
                              newnames ) { 
  # Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration. 
  # It only changes obj@assays$RNA@counts, @data and @scale.data.
  print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
  RNA <- obj@assays$RNA
  
  if (nrow(RNA) == length(newnames)) {
    if (length(RNA@counts)) RNA@counts@Dimnames[[1]]            <- newnames
    if (length(RNA@data)) RNA@data@Dimnames[[1]]                <- newnames
    if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]]    <- newnames
  } else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
  obj@assays$RNA <- RNA
  return(obj)
}

hp_sce=RenameGenesSeurat(obj = hp_sce, 
                      newnames = ids$SYMBOL)
getwd()
#save(hp_sce,file = 'first_sce.Rdata')



hp_sce
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]

hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")

getwd()




plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))

VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
hp_sce

hp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
hp_sce1


sce=hp_sce1
sce
colnames(sce)
grep(colnames(sce),pattern = ".1")
grep(colnames(sce),pattern = ".2")
sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))), 
                       rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))
                        ) ## 8186,7947;

table(sce$stim)

library(dplyr)
sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))

All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%  
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)

pdf("2_ElbowPlot.pdf")
ElbowPlot(All, ndims = 50)
dev.off()

library(cowplot)
#All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
pdf("2_pre_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
plot_grid(p1, p2)
dev.off()
##########################run harmony

All <- All %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All, 'harmony') 


pdf("2_after_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
plot_grid(p3, p4)
dev.off()
#############cluster
#library(harmony)
All <- All %>% 
  RunUMAP(reduction = "harmony", dims = 1:30) %>% 
  RunTSNE(reduction = "harmony", dims = 1:30) %>% 
  FindNeighbors(reduction = "harmony", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()


options(repr.plot.height = 4, repr.plot.width = 10)
pdf("3_after_harmony_umap_two_group.pdf")
DimPlot(All, reduction = "umap", group.by = "stim", pt.size = .1)
dev.off()

pdf("3_after_harmony_cluster_UMAP.pdf")
DimPlot(All, reduction = "umap", label = TRUE, pt.size = .1)
dev.off()

pdf("3_umap_samples_split.pdf")
DimPlot(All, reduction = "umap", pt.size = .1, split.by = "stim", label = T)
dev.off()

pdf("3_after_harmony_tsne_two_group.pdf")
DimPlot(All, reduction = "tsne", group.by = "stim", pt.size = .1)
dev.off()

pdf("3_after_harmony_cluster_tSNE.pdf")
DimPlot(All, reduction = "tsne", label = TRUE, pt.size = .1)
dev.off()

pdf("3_tSNE_samples_split.pdf")
DimPlot(All, reduction = "tsne", pt.size = .1, split.by = "stim", label = T)
dev.off()
getwd()
#save(All,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
load("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

相关阅读:
软件测试项目该如何规避风险?
为什么重写Equals方法要重写HashCode方法
 计算机毕设源码网站springboot毕业设计管理系统
 UE5 C++报错：is not currently enabled for Live Coding
华为认证 | 这门HCIA认证正式发布！
产品需求交付质量保证的“七重门”
【蓝桥杯集训100题】scratch售票找零蓝桥杯scratch比赛专项预测编程题集训模拟练习题第23题
 代码随想录算法训练营DAY36|C++贪心算法Part.5|435.无重叠区间、763.划分字母区间、56. 合并区间
 【AI语言模型】阿里推出音视频转文字引擎
 一大波开源小抄来袭
原文地址：https://blog.csdn.net/qq_52813185/article/details/125499748

geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat

geo 读取单细胞csv表达矩阵单细胞改列名 seurat