• msigdbr hallmarks gsea broad研究所


    关注微信:生信小博士 

     使用msigdbr r包

    #BiocManager::install("msigdb")
    #https://www.gsea-msigdb.org/gsea/msigdb
    #https://cran.r-project.org/web/packages/msigdbr/vignettes/msigdbr-intro.html
    #https://bioconductor.org/packages/release/data/experiment/vignettes/msigdb/inst/doc/msigdb.html#the-molecular-signatures-database-msigdb
    #https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp
    library(msigdb)

    library(ExperimentHub)
    library(GSEABase)

    1. #6提取并制备人的hallmarks列表---------
    2. all_gene_sets_hs = msigdbr::msigdbr(species = "Homo sapiens") #Mus musculus
    3. all_gene_sets_hs
    4. all_gene_sets_hs$gs_name %>%table()
    5. all_gene_sets_hs$gs_cat %>%table()
    6. all_gene_sets_hs$gs_subcat %>%table()
    7. all_gene_sets_hs$gs_id %>%table() %>%tail()
    8. all_gene_sets_hs_list=split(x = all_gene_sets_hs$gene_symbol,f=all_gene_sets_hs$gs_name )
    9. all_gene_sets_hs_list
    10. length(all_gene_sets_hs_list)
    11. MSIGDB_CANONICAL= all_gene_sets_hs %>% dplyr::filter(gs_cat=="H")
    12. MSIGDB_CANONICAL
    13. MSIGDB_CANONICAL_list=split(x=MSIGDB_CANONICAL$gene_symbol,f = MSIGDB_CANONICAL$gs_name)
    14. length(MSIGDB_CANONICAL_list)

    完整代码如下

    1. .libPaths(c("/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
    2. "/home/data/t040413/R/yll/usr/local/lib/R/site-library",
    3. "/usr/local/lib/R/library",
    4. "/home/data/refdir/Rlib/"))
    5. #BiocManager::install("msigdb")
    6. #https://www.gsea-msigdb.org/gsea/msigdb
    7. #https://cran.r-project.org/web/packages/msigdbr/vignettes/msigdbr-intro.html
    8. #https://bioconductor.org/packages/release/data/experiment/vignettes/msigdb/inst/doc/msigdb.html#the-molecular-signatures-database-msigdb
    9. #https://www.gsea-msigdb.org/gsea/msigdb/collections.jsp
    10. library(msigdb)
    11. library(ExperimentHub)
    12. library(GSEABase)
    13. #To download the data, we first need to get a list of the data available in the msigdb package and determine the unique identifiers for each data. The query() function assists in getting this list.
    14. 1#1 获取总的基因集合的名字--------
    15. eh = ExperimentHub()
    16. all_gene_sets_name=query(eh , 'msigdb')
    17. all_gene_sets_name
    18. #Data can then be downloaded using the unique identifier.
    19. eh[['EH5421']]
    20. #use the custom accessor to select a specific version of MSigDB
    21. msigdb.hs = getMsigdb(org = 'hs', id = 'SYM', version = '7.4')
    22. msigdb.hs
    23. #Each signature is stored in a GeneSet object and can be processed using functions in the GSEABase R/Bioconductor package.
    24. gs = msigdb.hs[[1000]]
    25. gs
    26. geneIds(gs)
    27. #get collection type
    28. collectionType(gs)
    29. #get MSigDB category
    30. bcCategory(collectionType(gs))
    31. #get MSigDB subcategory
    32. bcSubCategory(collectionType(gs))
    33. #get description
    34. description(gs)
    35. #calculate the number of signatures in each category
    36. table(sapply(lapply(msigdb.hs, collectionType), bcCategory))
    37. #calculate the number of signatures in each subcategory
    38. table(sapply(lapply(msigdb.hs, collectionType), bcSubCategory))
    39. #plot the distribution of sizes
    40. hist(sapply(lapply(msigdb.hs, geneIds), length),
    41. main = 'MSigDB signature size distribution',
    42. xlab = 'Signature size')
    43. listCollections(msigdb.hs)
    44. #> [1] "c1" "c2" "c3" "c4" "c5" "c6" "c7" "c8" "h"
    45. listSubCollections(msigdb.hs)
    46. #retrieeve the hallmarks gene sets
    47. subsetCollection(msigdb.hs, 'h')
    48. #retrieve the biological processes category of gene ontology
    49. subsetCollection(msigdb.hs, 'c5', 'GO:BP')
    50. #4提取所有人类基因集合数据----------
    51. all_species=msigdbr::msigdbr_species()
    52. all_species
    53. all_gene_sets_hs = msigdbr::msigdbr(species = "Homo sapiens") #Mus musculus
    54. head(all_gene_set_hs)
    55. #查看所有的collections------
    56. all_collections=msigdbr::msigdbr_collections()
    57. all_collections
    58. #5 提取人 鼠 特定的ollection------
    59. #You can retrieve data for a specific collection, such as the hallmark gene sets.
    60. m_gene_sets = msigdbr::msigdbr(species = "mouse", category = "H")
    61. head(m_gene_sets)
    62. h_gene_sets = msigdbr::msigdbr(species = "human", category = "H")
    63. head(h_gene_sets)
    64. h_gene_sets$gs_name
    65. #6提取并制备人的hallmarks列表---------
    66. all_gene_sets_hs = msigdbr::msigdbr(species = "Homo sapiens") #Mus musculus
    67. #saveRDS(all_gene_sets_hs,file="~/datasets/all_gene_sets_hs_msigdb.rds")
    68. all_gene_sets_hs
    69. all_gene_sets_hs$gs_name %>%table()
    70. all_gene_sets_hs$gs_cat %>%table()
    71. all_gene_sets_hs$gs_subcat %>%table()
    72. all_gene_sets_hs$gs_id %>%table() %>%tail()
    73. all_gene_sets_hs_list=split(x = all_gene_sets_hs$gene_symbol,f=all_gene_sets_hs$gs_name )
    74. all_gene_sets_hs_list
    75. length(all_gene_sets_hs_list)
    76. MSIGDB_CANONICAL= all_gene_sets_hs %>% dplyr::filter(gs_cat=="H")
    77. MSIGDB_CANONICAL
    78. MSIGDB_CANONICAL_list=split(x=MSIGDB_CANONICAL$gene_symbol,f = MSIGDB_CANONICAL$gs_name)
    79. length(MSIGDB_CANONICAL_list)
    80. names(MSIGDB_CANONICAL_list)
    81. #saveRDS(MSIGDB_CANONICAL_list,file = "~/datasets/Genesets_Dec19.rds")

  • 相关阅读:
    【YOLO】拾遗(五)
    【校招VIP】前端操作系统之进程通信
    Junit单元测试
    Kotlin 协程 Select:看我如何多路复用
    基于springboot+jsp+Mysql的商务安全邮箱邮件收发
    Oracle-Autoupgrade方式升级19c
    【uvm】Wait for Interface Signals in UVM
    1700亿烧光,利润暴跌78%!外媒:中芯国际不是麒麟9000S的代工厂
    云原生 黑马Kubernetes教程(K8S教程)笔记——第一章 kubernetes介绍——Master集群控制节点、Node工作负载节点、Pod控制单元
    风控模型中特征重要度的两种筛选方法
  • 原文地址:https://blog.csdn.net/qq_52813185/article/details/134063222