• easyPubMed


    1. ################3----
    2. #install.packages("easyPubMed")
    3. library(easyPubMed)
    4. my_query <- "silicosis[Title/Abstract]"
    5. entrez_id <- get_pubmed_ids(my_query)
    6. abstracts_txt <- fetch_pubmed_data(entrez_id, format = "xml")
    7. my_abstracts_xml=abstracts_txt
    8. #print(abstracts_txt[1:16])
    9. #https://cran.r-project.org/web/packages/easyPubMed/vignettes/getting_started_with_easyPubMed.html
    10. my_titles <- custom_grep(my_abstracts_xml, "ArticleTitle", "char")
    11. # use gsub to remove the tag, also trim long titles
    12. TTM <- nchar(my_titles) > 75
    13. my_titles[TTM] <- paste(substr(my_titles[TTM], 1, 70), "...", sep = "")
    14. # Print as a data.frame (use kable)
    15. head(my_titles)
    16. new_PM_df2 <- table_articles_byAuth(pubmed_data = abstracts_txt, included_authors = "last" , max_chars = 0)
    17. new_PM_df <- table_articles_byAuth(pubmed_data = abstracts_txt, included_authors = "last", max_chars = 50000)
    18. write.table(new_PM_df,file = "pubmed_result.txt")
    19. dim(new_PM_df)
    20. new_PM_df %>%
    21. group_by(year) %>%
    22. count() %>%
    23. # filter(year >= 2020) %>%
    24. ggplot(aes(year, n)) +
    25. geom_point() +
    26. geom_line() +
    27. labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
    28. y = "Articles")
    29. #https://mp.weixin.qq.com/s/JBjTJWt6dabjfmogsYYhXw
    30. pubmed.name <- get_pubmed_ids("silicosis[Title/Abstract]") #限定作者
    31. print(pubmed.name$Count)
    32. title.date <- get_pubmed_ids("2024[PDAT]") #("parkinson[TI] AND 2019[PDAT]") #限定关键词和时间
    33. print(title.date$Count)
    34. ##搜索标题里有APE1或OGG1这两个基因——在2012-2016年间发表的文章
    35. new_query<-"(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND (1858[PDAT]:2024[PDAT])"
    36. new_query<-"(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND (1858[PDAT])"
    37. new_entrez_id <- get_pubmed_ids(new_query)
    38. new_entrez_id
    39. system.time(13)
    40. Sys.sleep(5)
    41. all_df <- list()
    42. for (eachyear in 1923:2024) {
    43. each_query <- paste0('(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND ',
    44. eachyear, '[PDAT])')
    45. each_entrz_id <- get_pubmed_ids(each_query)
    46. print(paste0("=======done===", eachyear))
    47. if (each_entrz_id$Count == 0) {
    48. next # 跳过当前循环迭代,进入下一次迭代
    49. }
    50. each_abstracts_txt <- fetch_pubmed_data(each_entrz_id, format = "xml")
    51. print(paste0("=======done===", eachyear))
    52. Sys.sleep(1)
    53. each_new_PM_df <- table_articles_byAuth(pubmed_data = each_abstracts_txt, included_authors = "last")
    54. all_df[[eachyear]] <- each_new_PM_df
    55. print(paste0("=======done===", eachyear))
    56. print(head(each_new_PM_df))
    57. }
    58. length(all_df)
    59. names(all_df)
    60. lapply(all_df,names)
    61. # 去除all_df列表中所有空的数据框
    62. all_list <- all_df[sapply(all_df, function(df) !is.null(df) && nrow(df) > 0)]
    63. # 检查列表中数据框的数量
    64. length(all_list)
    65. # 合并列表中所有数据框的内容
    66. combined_df <- do.call(rbind, all_list)
    67. # 现在,combined_df 包含了所有数据框的内容,按行合并
    68. # 如果需要,您可以查看合并后的数据框的前几行
    69. head(combined_df)
    70. dim(combined_df)
    71. # 注意修改年份
    72. combined_df %>%
    73. group_by(year) %>%
    74. count() %>%
    75. #filter(year >= 2020) %>%
    76. ggplot(aes(year, n)) +
    77. geom_point() +
    78. geom_line() +
    79. labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
    80. y = "Articles")
    81. ##分析lncRNA文章情况
    82. y <- combined_df$year
    83. ##可视化一下
    84. library(ggplot2)
    85. date()
    86. count<-table(y)
    87. count<-as.data.frame(count)
    88. names(count)<-c("Year", "Counts")
    89. p<-ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
    90. geom_bar(stat="identity", width=0.5)+
    91. labs(y = "Number of articles",title="PubMed articles containing lncRNA"
    92. )+
    93. scale_fill_brewer(palette="Dark2")
    94. p
    95. library(ggsci)
    96. ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
    97. geom_bar(stat = "identity", width = 0.5) +
    98. scale_fill_viridis_d() + # 使用 viridis 调色板
    99. labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
    100. theme_bw() +
    101. theme(legend.position = "bottom")
    102. library(ggplot2)
    103. ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
    104. geom_bar(stat = "identity", width = 0.9) +
    105. scale_fill_viridis_d() +
    106. labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
    107. theme_bw() +
    108. theme(legend.position = "bottom",
    109. axis.text.x = element_text(angle = 45, hjust = 1)) # 旋转并对齐横坐标文本
    110. library(ggplot2)
    111. ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
    112. geom_bar(stat = "identity", width = 0.8) + # 调整柱子的宽度
    113. scale_fill_viridis_d() + # 使用 viridis 调色板 #scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)[1:10]) +
    114. labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
    115. theme_void() + # 去掉背景
    116. theme(legend.position = "bottom")
    117. ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
    118. geom_bar(stat="identity", width=0.5)+
    119. scale_color_manual( colorRampPalette(c("blue","white","green")) (200) ) +
    120. labs(y = "Number of articles",title="PubMed articles containing PAH-CHD" ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)) + theme(legend.position="bottom")
    1. #request 2
    2. .libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
    3. "/home/data/t040413/R/yll/usr/local/lib/R/site-library",
    4. "/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
    5. ## We load the required packages
    6. library(Seurat)
    7. ##############1------------------
    8. #install.packages("RISmed") #https://www.jingege.wang/2020/06/03/pubmed%E6%95%B0%E6%8D%AE%E6%8C%96%E6%8E%98%EF%BC%9Arismed%E5%8C%85/
    9. library(RISmed) #https://mp.weixin.qq.com/s/a6XfKFXzVWHd52TKeCDrIQ
    10. library(dplyr)
    11. library(ggplot2)
    12. library(tidytext)
    13. #install.packages("tidytext")
    14. library(wordcloud)
    15. # 修改检索关键词
    16. query <- "(silicosis[Title/Abstract]) AND (silica-induced[Title/Abstract])"
    17. result <- EUtilsSummary(query,
    18. type = "esearch",
    19. db = "pubmed",
    20. datetype = "pdat",
    21. retmax = 10000,
    22. mindate = 1000,
    23. maxdate = 2024)
    24. fetch <- EUtilsGet(result, type = "efetch", db = "pubmed")
    25. abstracts <- data.frame(title = fetch@ArticleTitle,
    26. abstract = fetch@AbstractText,
    27. journal = fetch@Title,
    28. DOI = fetch@PMID,
    29. year = fetch@YearPubmed)
    30. abstracts <- abstracts %>% mutate(abstract = as.character(abstract))
    31. # 注意修改年份
    32. abstracts %>%
    33. group_by(year) %>%
    34. count() %>%
    35. filter(year >= 2020) %>%
    36. ggplot(aes(year, n)) +
    37. geom_point() +
    38. geom_line() +
    39. labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
    40. y = "Articles")
    41. ##################2------
    42. #限定下检索主题
    43. search_topic <- c('silicosis') #,"silica-induced"
    44. search_query <- EUtilsSummary(search_topic,db="pubmed", retmax=10000,datetype='pdat', mindate=2020, maxdate=2023)
    45. #查查看下检索内容
    46. summary(search_query)
    47. #看下这些文献的Id
    48. QueryId(search_query)
    49. #获取检索结果
    50. records<- EUtilsGet(search_query,db="pubmed")
    51. class(records)
    52. str(records)
    53. #提取检索结果
    54. pubmed_data <- data.frame('Title'=ArticleTitle(records),
    55. 'Year'=YearAccepted(records),
    56. 'journal'=ISOAbbreviation(records))
    57. head(pubmed_data,1)
    58. pubmed_data[1:3,1]
    59. write.csv(pubmed_data,file='PAH-CHD.csv')
    60. 分析文章情况
    61. y <- YearPubmed(EUtilsGet(search_query))
    62. 可视化一下
    63. library(ggplot2)
    64. date()
    65. count <- table(y)
    66. count <- as.data.frame(count)
    67. names(count)<-c("Year", "Counts")
    68. library(RColorBrewer)
    69. library(ggsci)
    70. ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
    71. geom_bar(stat="identity", width=0.5)+
    72. labs(y = "Number of articles",title="PubMed articles containing PAH-CHD"
    73. ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)) +
    74. theme(legend.position="bottom")
    75. y <- YearPubmed(EUtilsGet(search_query))
    76. ##可视化一下
    77. library(ggplot2)
    78. date()
    79. count <- table(y)
    80. count <- as.data.frame(count)
    81. names(count)<-c("Year", "Counts")
    82. library(ggsci)
    83. ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
    84. geom_bar(stat="identity", width=0.5)+
    85. labs(y = "Number of articles",title="PubMed articles containing DNA methylation in plant"
    86. ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(19, "Accent"))(19)) +
    87. theme(legend.position="bottom")

  • 相关阅读:
    自动化测试在 Kubernetes Operator 开发中的应用:以 OpenTelemetry 为例
    mysql-5:多表关系
    【python】len()、str()、int()和float()函数
    gd32 USB HOST 接口
    多旅行商问题——公式和求解过程概述
    不习惯的Vue3起步二 の alias别名、ref和reactive
    Spring Cloud Consul
    Flutter 没有完整的生命周期?
    chromedriver下载地址
    重磅!flink-table-store 将作为独立数据湖项目重新加入 Apache
  • 原文地址:https://blog.csdn.net/qq_52813185/article/details/133688823