• geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat


    
    
    
    getwd()
    path="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin" #空间转录组
    dir.create(path)
    setwd(path)
    getwd()
    list.files()
    
    
    
    raw_counts=read.csv("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv"
                          )
    head(raw_counts)[1:4,1:4]
    counts=raw_counts[,-1]
    head(counts)[1:4,1:4]
    rownames(counts)=counts$symbol
    
    head(raw_counts)[1:4,1:4]
    counts=raw_counts[,-2]
    head(counts)[1:4,1:4]
    rownames(counts)=counts$id
    counts=counts[,-1]
    
    library(Seurat)
    #https://zhuanlan.zhihu.com/p/385206713
    rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")
    
    ids=raw_counts[,1:2]
    head(ids)
    colnames(ids)= c('ENSEMBL','SYMBOL')
    head(ids)
    dim(ids) # [1] 16428 
    ids=na.omit(ids)
    dim(ids) # [1] 15504 
    length(unique(ids$SYMBOL)) # [1] 15494 
    # 这里的关系超级乱,互相之间都不是一对一 
    # 凡是混乱的ID一律删除即可
    ids=ids[!duplicated(ids$SYMBOL),]
    ids=ids[!duplicated(ids$ENSEMBL),]
    dim(ids)
    pos=match(ids$ENSEMBL,rownames(rawdata) )
    hp_sce=rawdata[pos,]
    hp_sce
    #rownames(hp_sce) = ids$SYMBOL
    # RenameGenesSeurat  -----------------------------------------------
    #创建函数 改名字
    RenameGenesSeurat <- function(obj , 
                                  newnames ) { 
      # Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration. 
      # It only changes obj@assays$RNA@counts, @data and @scale.data.
      print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
      RNA <- obj@assays$RNA
      
      if (nrow(RNA) == length(newnames)) {
        if (length(RNA@counts)) RNA@counts@Dimnames[[1]]            <- newnames
        if (length(RNA@data)) RNA@data@Dimnames[[1]]                <- newnames
        if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]]    <- newnames
      } else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
      obj@assays$RNA <- RNA
      return(obj)
    }
    
    hp_sce=RenameGenesSeurat(obj = hp_sce, 
                          newnames = ids$SYMBOL)
    getwd()
    #save(hp_sce,file = 'first_sce.Rdata')
    
    
    
    hp_sce
    rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
    rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]
    
    hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
    fivenum(hp_sce[["percent.mt"]][,1])
    rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
    C<-GetAssayData(object = hp_sce, slot = "counts")
    percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
    hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")
    
    getwd()
    
    
    
    
    plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
    plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
    CombinePlots(plots = list(plot1, plot2))
    
    VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
    VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
    VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
    hp_sce
    
    hp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
    hp_sce1
    
    
    sce=hp_sce1
    sce
    colnames(sce)
    grep(colnames(sce),pattern = ".1")
    grep(colnames(sce),pattern = ".2")
    sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))), 
                           rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),
                           rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),
                           rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),
                           rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),
                           rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))
                            ) ## 8186,7947;
    
    table(sce$stim)
    
    library(dplyr)
    sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))
    
    All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%  
      FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
      ScaleData(verbose = FALSE)
    All = RunPCA(All, npcs = 50, verbose = FALSE)
    
    pdf("2_ElbowPlot.pdf")
    ElbowPlot(All, ndims = 50)
    dev.off()
    
    library(cowplot)
    #All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
    pdf("2_pre_harmony_harmony_plot.pdf")
    options(repr.plot.height = 5, repr.plot.width = 12)
    p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
    p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
    plot_grid(p1, p2)
    dev.off()
    ##########################run harmony
    
    All <- All %>% RunHarmony("stim", plot_convergence = TRUE)
    harmony_embeddings <- Embeddings(All, 'harmony') 
    
    
    pdf("2_after_harmony_harmony_plot.pdf")
    options(repr.plot.height = 5, repr.plot.width = 12)
    p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
    p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
    plot_grid(p3, p4)
    dev.off()
    #############cluster
    #library(harmony)
    All <- All %>% 
      RunUMAP(reduction = "harmony", dims = 1:30) %>% 
      RunTSNE(reduction = "harmony", dims = 1:30) %>% 
      FindNeighbors(reduction = "harmony", dims = 1:30)
    All<-All%>% FindClusters(resolution = 3) %>% identity()
    
    
    options(repr.plot.height = 4, repr.plot.width = 10)
    pdf("3_after_harmony_umap_two_group.pdf")
    DimPlot(All, reduction = "umap", group.by = "stim", pt.size = .1)
    dev.off()
    
    pdf("3_after_harmony_cluster_UMAP.pdf")
    DimPlot(All, reduction = "umap", label = TRUE, pt.size = .1)
    dev.off()
    
    pdf("3_umap_samples_split.pdf")
    DimPlot(All, reduction = "umap", pt.size = .1, split.by = "stim", label = T)
    dev.off()
    
    pdf("3_after_harmony_tsne_two_group.pdf")
    DimPlot(All, reduction = "tsne", group.by = "stim", pt.size = .1)
    dev.off()
    
    pdf("3_after_harmony_cluster_tSNE.pdf")
    DimPlot(All, reduction = "tsne", label = TRUE, pt.size = .1)
    dev.off()
    
    pdf("3_tSNE_samples_split.pdf")
    DimPlot(All, reduction = "tsne", pt.size = .1, split.by = "stim", label = T)
    dev.off()
    getwd()
    #save(All,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
    load("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
  • 相关阅读:
    软件测试项目该如何规避风险?
    为什么重写Equals方法要重写HashCode方法
    计算机毕设源码网站springboot毕业设计管理系统
    UE5 C++报错:is not currently enabled for Live Coding
    华为认证 | 这门HCIA认证正式发布!
    产品需求交付质量保证的“七重门”
    【蓝桥杯集训100题】scratch售票找零 蓝桥杯scratch比赛专项预测编程题 集训模拟练习题第23题
    代码随想录算法训练营DAY36|C++贪心算法Part.5|435.无重叠区间、763.划分字母区间、56. 合并区间
    【AI语言模型】阿里推出音视频转文字引擎
    一大波开源小抄来袭
  • 原文地址:https://blog.csdn.net/qq_52813185/article/details/125499748