
#install.packages("easyPubMed")
my_query <- "silicosis[Title/Abstract]"
entrez_id <- get_pubmed_ids(my_query)
abstracts_txt <- fetch_pubmed_data(entrez_id, format = "xml")
my_abstracts_xml=abstracts_txt
#print(abstracts_txt[1:16])
#https://cran.r-project.org/web/packages/easyPubMed/vignettes/getting_started_with_easyPubMed.html
my_titles <- custom_grep(my_abstracts_xml, "ArticleTitle", "char")
# use gsub to remove the tag, also trim long titles
TTM <- nchar(my_titles) > 75
my_titles[TTM] <- paste(substr(my_titles[TTM], 1, 70), "...", sep = "")
# Print as a data.frame (use kable)
new_PM_df2 <- table_articles_byAuth(pubmed_data = abstracts_txt, included_authors = "last" , max_chars = 0)
new_PM_df <- table_articles_byAuth(pubmed_data = abstracts_txt, included_authors = "last", max_chars = 50000)
write.table(new_PM_df,file = "pubmed_result.txt")
# filter(year >= 2020) %>%
labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
#https://mp.weixin.qq.com/s/JBjTJWt6dabjfmogsYYhXw
pubmed.name <- get_pubmed_ids("silicosis[Title/Abstract]") #限定作者
title.date <- get_pubmed_ids("2024[PDAT]") #("parkinson[TI] AND 2019[PDAT]") #限定关键词和时间
##搜索标题里有APE1或OGG1这两个基因——在2012-2016年间发表的文章
new_query<-"(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND (1858[PDAT]:2024[PDAT])"
new_query<-"(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND (1858[PDAT])"
new_entrez_id <- get_pubmed_ids(new_query)
for (eachyear in 1923:2024) {
each_query <- paste0('(silicosis[Title/Abstract] OR silica-induced[Title/Abstract]) AND ',
each_entrz_id <- get_pubmed_ids(each_query)
print(paste0("=======done===", eachyear))
if (each_entrz_id$Count == 0) {
each_abstracts_txt <- fetch_pubmed_data(each_entrz_id, format = "xml")
print(paste0("=======done===", eachyear))
each_new_PM_df <- table_articles_byAuth(pubmed_data = each_abstracts_txt, included_authors = "last")
all_df[[eachyear]] <- each_new_PM_df
print(paste0("=======done===", eachyear))
print(head(each_new_PM_df))
all_list <- all_df[sapply(all_df, function(df) !is.null(df) && nrow(df) > 0)]
combined_df <- do.call(rbind, all_list)
# 现在,combined_df 包含了所有数据框的内容,按行合并
#filter(year >= 2020) %>%
labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
count<-as.data.frame(count)
names(count)<-c("Year", "Counts")
p<-ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
geom_bar(stat="identity", width=0.5)+
labs(y = "Number of articles",title="PubMed articles containing lncRNA"
scale_fill_brewer(palette="Dark2")
ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
geom_bar(stat = "identity", width = 0.5) +
scale_fill_viridis_d() + # 使用 viridis 调色板
labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
theme(legend.position = "bottom")
ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
geom_bar(stat = "identity", width = 0.9) +
labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)) # 旋转并对齐横坐标文本
ggplot(data = count, aes(x = Year, y = Counts, fill = Year)) +
geom_bar(stat = "identity", width = 0.8) + # 调整柱子的宽度
scale_fill_viridis_d() + # 使用 viridis 调色板 #scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)[1:10]) +
labs(y = "Number of articles", title = "PubMed articles containing PAH-CHD") +
theme(legend.position = "bottom")
ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
geom_bar(stat="identity", width=0.5)+
scale_color_manual( colorRampPalette(c("blue","white","green")) (200) ) +
labs(y = "Number of articles",title="PubMed articles containing PAH-CHD" ) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)) + theme(legend.position="bottom")

.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
"/home/data/t040413/R/yll/usr/local/lib/R/site-library",
"/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
## We load the required packages
##############1------------------
#install.packages("RISmed") #https://www.jingege.wang/2020/06/03/pubmed%E6%95%B0%E6%8D%AE%E6%8C%96%E6%8E%98%EF%BC%9Arismed%E5%8C%85/
library(RISmed) #https://mp.weixin.qq.com/s/a6XfKFXzVWHd52TKeCDrIQ
#install.packages("tidytext")
query <- "(silicosis[Title/Abstract]) AND (silica-induced[Title/Abstract])"
result <- EUtilsSummary(query,
fetch <- EUtilsGet(result, type = "efetch", db = "pubmed")
abstracts <- data.frame(title = fetch@ArticleTitle,
abstract = fetch@AbstractText,
abstracts <- abstracts %>% mutate(abstract = as.character(abstract))
labs(title = "Pubmed articles with search terms lung cancer AND quercetin \n2020-2023", hjust = 0.5,
##################2------
search_topic <- c('silicosis') #,"silica-induced"
search_query <- EUtilsSummary(search_topic,db="pubmed", retmax=10000,datetype='pdat', mindate=2020, maxdate=2023)
records<- EUtilsGet(search_query,db="pubmed")
pubmed_data <- data.frame('Title'=ArticleTitle(records),
'Year'=YearAccepted(records),
'journal'=ISOAbbreviation(records))
write.csv(pubmed_data,file='PAH-CHD.csv')
y <- YearPubmed(EUtilsGet(search_query))
count <- as.data.frame(count)
names(count)<-c("Year", "Counts")
ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
geom_bar(stat="identity", width=0.5)+
labs(y = "Number of articles",title="PubMed articles containing PAH-CHD"
) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(10, "Accent"))(10)) +
theme(legend.position="bottom")
y <- YearPubmed(EUtilsGet(search_query))
count <- as.data.frame(count)
names(count)<-c("Year", "Counts")
ggplot(data=count, aes(x=Year, y=Counts,fill=Year)) +
geom_bar(stat="identity", width=0.5)+
labs(y = "Number of articles",title="PubMed articles containing DNA methylation in plant"
) + theme_bw() + scale_fill_manual(values = colorRampPalette(brewer.pal(19, "Accent"))(19)) +
theme(legend.position="bottom")
