easyPubMed


################3----
#install.packages("easyPubMed")
 
library(easyPubMed)
my_query <- "silicosis[Title/Abstract]"
entrez_id <- get_pubmed_ids(my_query)
 
abstracts_txt <- fetch_pubmed_data(entrez_id, format = "xml")
my_abstracts_xml=abstracts_txt
#print(abstracts_txt[1:16])
#https://cran.r-project.org/web/packages/easyPubMed/vignettes/getting_started_with_easyPubMed.html
 
my_titles <- custom_grep(my_abstracts_xml, "ArticleTitle", "char")
# use gsub to remove the tag, also trim long titles
TTM <- nchar(my_titles) > 75
my_titles[TTM] <- paste(substr(my_titles[TTM], 1, 70), "...", sep = "")
 
# Print as a data.frame (use kable)
head(my_titles)
 
 
 
 
new_PM_df2 <- table_articles_byAuth(pubmed_data = abstracts_txt, included_authors = "last" , max_chars = 0)
 
new_PM_df <- table_articles_byAuth(pubmed_data = abstracts_txt, included_authors = "last", max_chars = 50000)
write.table(new_PM_df,file = "pubmed_result.txt")
 
dim(new_PM_df)
 
new_PM_df %>%
  group_by(year) %>%
  count() %>%
 # filter(year >= 2020) %>% 
  ggplot(aes(year, n)) +
  geom_point() +
  geom_line() +
  labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
       y = "Articles")
 
 
 
#https://mp.weixin.qq.com/s/JBjTJWt6dabjfmogsYYhXw
pubmed.name <- get_pubmed_ids("silicosis[Title/Abstract]") #限定作者
print(pubmed.name$Count)
title.date <- get_pubmed_ids("2024[PDAT]")   #("parkinson[TI] AND 2019[PDAT]")  #限定关键词和时间
print(title.date$Count)
##搜索标题里有APE1或OGG1这两个基因——在2012-2016年间发表的文章
new_query<-"(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND  (1858[PDAT]:2024[PDAT])"
new_query<-"(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND  (1858[PDAT])"
 
new_entrez_id <- get_pubmed_ids(new_query)
new_entrez_id
 
 
system.time(13)
Sys.sleep(5)
 
 
 
all_df <- list()
for (eachyear in 1923:2024) {
  each_query <- paste0('(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND ',
                       eachyear, '[PDAT])')
  
  each_entrz_id <- get_pubmed_ids(each_query)
  print(paste0("=======done===", eachyear))
  
  if (each_entrz_id$Count == 0) {
    next  # 跳过当前循环迭代，进入下一次迭代
  }
  
  each_abstracts_txt <- fetch_pubmed_data(each_entrz_id, format = "xml")
  print(paste0("=======done===", eachyear))
  Sys.sleep(1)
  
  each_new_PM_df <- table_articles_byAuth(pubmed_data = each_abstracts_txt, included_authors = "last")
  
  all_df[[eachyear]] <- each_new_PM_df
  print(paste0("=======done===", eachyear))
  print(head(each_new_PM_df))
}
length(all_df)
names(all_df)
lapply(all_df,names)
# 去除all_df列表中所有空的数据框
all_list <- all_df[sapply(all_df, function(df) !is.null(df) && nrow(df) > 0)]
# 检查列表中数据框的数量
length(all_list)
# 合并列表中所有数据框的内容
combined_df <- do.call(rbind, all_list)
# 现在，combined_df 包含了所有数据框的内容，按行合并
# 如果需要，您可以查看合并后的数据框的前几行
head(combined_df)
dim(combined_df)
# 注意修改年份
combined_df %>%
  group_by(year) %>%
  count() %>%
  #filter(year >= 2020) %>% 
  ggplot(aes(year, n)) +
  geom_point() +
  geom_line() +
  labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
       y = "Articles")
##分析lncRNA文章情况
y <- combined_df$year
##可视化一下
library(ggplot2)
date()
count<-table(y)
count<-as.data.frame(count)
names(count)<-c("Year", "Counts")
p<-ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
  
  geom_bar(stat="identity", width=0.5)+
  
  labs(y = "Number of articles",title="PubMed articles containing lncRNA"
       
  )+
  
  scale_fill_brewer(palette="Dark2")
p
library(ggsci) 
ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
  geom_bar(stat = "identity", width = 0.5) +
  scale_fill_viridis_d() +  # 使用 viridis 调色板
  labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
  theme_bw() +
  theme(legend.position = "bottom")
library(ggplot2)
ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
  geom_bar(stat = "identity", width = 0.9) +
  scale_fill_viridis_d() +
  labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
  theme_bw() +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1))  # 旋转并对齐横坐标文本
library(ggplot2)
ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
  geom_bar(stat = "identity", width = 0.8) +  # 调整柱子的宽度
  scale_fill_viridis_d() +  # 使用 viridis 调色板 #scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)[1:10]) +
  labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
  theme_void() +  # 去掉背景
  theme(legend.position = "bottom")
ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +  
  geom_bar(stat="identity", width=0.5)+
  scale_color_manual(  colorRampPalette(c("blue","white","green")) (200)   )      +
  labs(y = "Number of articles",title="PubMed articles containing PAH-CHD"   ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)) +   theme(legend.position="bottom")


 
 
#request 2
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
             "/home/data/t040413/R/yll/usr/local/lib/R/site-library",  
             "/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
 
## We load the required packages
library(Seurat)
 
 
 
##############1------------------
#install.packages("RISmed") #https://www.jingege.wang/2020/06/03/pubmed%E6%95%B0%E6%8D%AE%E6%8C%96%E6%8E%98%EF%BC%9Arismed%E5%8C%85/
library(RISmed) #https://mp.weixin.qq.com/s/a6XfKFXzVWHd52TKeCDrIQ
 
library(dplyr)
library(ggplot2)
library(tidytext)
#install.packages("tidytext")
library(wordcloud)
# 修改检索关键词
query <- "(silicosis[Title/Abstract]) AND (silica-induced[Title/Abstract])"
result <- EUtilsSummary(query, 
                        type = "esearch", 
                        db = "pubmed",
                        datetype = "pdat",
                        retmax = 10000,
                        mindate = 1000, 
                        maxdate = 2024)
 
fetch <- EUtilsGet(result, type = "efetch", db = "pubmed") 
abstracts <- data.frame(title = fetch@ArticleTitle,
                        abstract = fetch@AbstractText, 
                        journal = fetch@Title,
                        DOI = fetch@PMID, 
                        year = fetch@YearPubmed) 
abstracts <- abstracts %>% mutate(abstract = as.character(abstract))
# 注意修改年份
abstracts %>%
  group_by(year) %>%
  count() %>%
  filter(year >= 2020) %>% 
  ggplot(aes(year, n)) +
  geom_point() +
  geom_line() +
  labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
       y = "Articles")
 
 
 
 
 
 
 
##################2------
 
 
#限定下检索主题
search_topic <- c('silicosis') #,"silica-induced"
search_query <- EUtilsSummary(search_topic,db="pubmed", retmax=10000,datetype='pdat', mindate=2020, maxdate=2023)
#查查看下检索内容
summary(search_query)
#看下这些文献的Id
QueryId(search_query)
#获取检索结果
records<- EUtilsGet(search_query,db="pubmed")
class(records)
str(records)
#提取检索结果
pubmed_data <- data.frame('Title'=ArticleTitle(records),
                          'Year'=YearAccepted(records),
                          'journal'=ISOAbbreviation(records))
head(pubmed_data,1)
pubmed_data[1:3,1]
write.csv(pubmed_data,file='PAH-CHD.csv')
分析文章情况
y <- YearPubmed(EUtilsGet(search_query))
可视化一下
library(ggplot2)
date()
count <- table(y)
count <- as.data.frame(count)
names(count)<-c("Year", "Counts")
library(RColorBrewer)
library(ggsci)
ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
  geom_bar(stat="identity", width=0.5)+
  labs(y = "Number of articles",title="PubMed articles containing PAH-CHD"
  ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)) +
  theme(legend.position="bottom")
 
 
y <- YearPubmed(EUtilsGet(search_query))
##可视化一下
library(ggplot2)
date()
count <- table(y)
count <- as.data.frame(count)
names(count)<-c("Year", "Counts")
library(ggsci)
ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
  geom_bar(stat="identity", width=0.5)+
  labs(y = "Number of articles",title="PubMed articles containing DNA methylation in plant"
  ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(19, "Accent"))(19)) +
  theme(legend.position="bottom")

相关阅读:
自动化测试在 Kubernetes Operator 开发中的应用：以 OpenTelemetry 为例
 mysql-5:多表关系
 【python】len()、str()、int()和float()函数
 gd32 USB HOST 接口
 多旅行商问题——公式和求解过程概述
 不习惯的Vue3起步二の alias别名、ref和reactive
Spring Cloud Consul
Flutter 没有完整的生命周期？
chromedriver下载地址
 重磅！flink-table-store 将作为独立数据湖项目重新加入 Apache
原文地址：https://blog.csdn.net/qq_52813185/article/details/133688823