关注微信:生信小博士
#BiocManager::install("msigdb")
#https://www.gsea-msigdb.org/gsea/msigdb
#https://cran.r-project.org/web/packages/msigdbr/vignettes/msigdbr-intro.html
#https://bioconductor.org/packages/release/data/experiment/vignettes/msigdb/inst/doc/msigdb.html#the-molecular-signatures-database-msigdb
#https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp
library(msigdb)library(ExperimentHub)
library(GSEABase)
- #6提取并制备人的hallmarks列表---------
- all_gene_sets_hs = msigdbr::msigdbr(species = "Homo sapiens") #Mus musculus
-
- all_gene_sets_hs
- all_gene_sets_hs$gs_name %>%table()
- all_gene_sets_hs$gs_cat %>%table()
- all_gene_sets_hs$gs_subcat %>%table()
- all_gene_sets_hs$gs_id %>%table() %>%tail()
-
- all_gene_sets_hs_list=split(x = all_gene_sets_hs$gene_symbol,f=all_gene_sets_hs$gs_name )
-
- all_gene_sets_hs_list
- length(all_gene_sets_hs_list)
-
- MSIGDB_CANONICAL= all_gene_sets_hs %>% dplyr::filter(gs_cat=="H")
- MSIGDB_CANONICAL
-
- MSIGDB_CANONICAL_list=split(x=MSIGDB_CANONICAL$gene_symbol,f = MSIGDB_CANONICAL$gs_name)
-
- length(MSIGDB_CANONICAL_list)
-
- .libPaths(c("/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
- "/home/data/t040413/R/yll/usr/local/lib/R/site-library",
- "/usr/local/lib/R/library",
- "/home/data/refdir/Rlib/"))
-
-
- #BiocManager::install("msigdb")
- #https://www.gsea-msigdb.org/gsea/msigdb
- #https://cran.r-project.org/web/packages/msigdbr/vignettes/msigdbr-intro.html
- #https://bioconductor.org/packages/release/data/experiment/vignettes/msigdb/inst/doc/msigdb.html#the-molecular-signatures-database-msigdb
- #https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp
- library(msigdb)
-
- library(ExperimentHub)
- library(GSEABase)
-
- #To download the data, we first need to get a list of the data available in the msigdb package and determine the unique identifiers for each data. The query() function assists in getting this list.
-
- 1#1 获取总的基因集合的名字--------
- eh = ExperimentHub()
- all_gene_sets_name=query(eh , 'msigdb')
- all_gene_sets_name
-
- #Data can then be downloaded using the unique identifier.
- eh[['EH5421']]
-
-
- #use the custom accessor to select a specific version of MSigDB
- msigdb.hs = getMsigdb(org = 'hs', id = 'SYM', version = '7.4')
- msigdb.hs
-
-
-
-
- #Each signature is stored in a GeneSet object and can be processed using functions in the GSEABase R/Bioconductor package.
-
- gs = msigdb.hs[[1000]]
- gs
- geneIds(gs)
-
-
- #get collection type
- collectionType(gs)
-
-
- #get MSigDB category
- bcCategory(collectionType(gs))
-
-
- #get MSigDB subcategory
- bcSubCategory(collectionType(gs))
-
-
- #get description
- description(gs)
-
-
-
- #calculate the number of signatures in each category
- table(sapply(lapply(msigdb.hs, collectionType), bcCategory))
-
-
-
- #calculate the number of signatures in each subcategory
- table(sapply(lapply(msigdb.hs, collectionType), bcSubCategory))
-
- #plot the distribution of sizes
- hist(sapply(lapply(msigdb.hs, geneIds), length),
- main = 'MSigDB signature size distribution',
- xlab = 'Signature size')
-
-
- listCollections(msigdb.hs)
- #> [1] "c1" "c2" "c3" "c4" "c5" "c6" "c7" "c8" "h"
- listSubCollections(msigdb.hs)
-
-
- #retrieeve the hallmarks gene sets
- subsetCollection(msigdb.hs, 'h')
-
-
-
- #retrieve the biological processes category of gene ontology
- subsetCollection(msigdb.hs, 'c5', 'GO:BP')
-
-
-
- #4提取所有人类基因集合数据----------
- all_species=msigdbr::msigdbr_species()
- all_species
- all_gene_sets_hs = msigdbr::msigdbr(species = "Homo sapiens") #Mus musculus
-
- head(all_gene_set_hs)
-
- #查看所有的collections------
- all_collections=msigdbr::msigdbr_collections()
- all_collections
-
-
- #5 提取人 鼠 特定的ollection------
- #You can retrieve data for a specific collection, such as the hallmark gene sets.
-
- m_gene_sets = msigdbr::msigdbr(species = "mouse", category = "H")
- head(m_gene_sets)
-
- h_gene_sets = msigdbr::msigdbr(species = "human", category = "H")
- head(h_gene_sets)
-
-
-
- h_gene_sets$gs_name
-
- #6提取并制备人的hallmarks列表---------
- all_gene_sets_hs = msigdbr::msigdbr(species = "Homo sapiens") #Mus musculus
- #saveRDS(all_gene_sets_hs,file="~/datasets/all_gene_sets_hs_msigdb.rds")
-
- all_gene_sets_hs
- all_gene_sets_hs$gs_name %>%table()
- all_gene_sets_hs$gs_cat %>%table()
- all_gene_sets_hs$gs_subcat %>%table()
- all_gene_sets_hs$gs_id %>%table() %>%tail()
- all_gene_sets_hs_list=split(x = all_gene_sets_hs$gene_symbol,f=all_gene_sets_hs$gs_name )
- all_gene_sets_hs_list
- length(all_gene_sets_hs_list)
- MSIGDB_CANONICAL= all_gene_sets_hs %>% dplyr::filter(gs_cat=="H")
- MSIGDB_CANONICAL
- MSIGDB_CANONICAL_list=split(x=MSIGDB_CANONICAL$gene_symbol,f = MSIGDB_CANONICAL$gs_name)
- length(MSIGDB_CANONICAL_list)
- names(MSIGDB_CANONICAL_list)
-
- #saveRDS(MSIGDB_CANONICAL_list,file = "~/datasets/Genesets_Dec19.rds")
-
-
-