1、原论文数据双标图
代码:
- setwd("D:/Desktop/0000/R") #更改路径
-
- #导入数据
- df <- read.table("Input data.csv", header = T, sep = ",")
-
- # -----------------------------------
- #所需的包:
- packages <- c("ggplot2", "tidyr", "dplyr", "readr", "ggrepel", "cowplot", "factoextra")
- #安装你尚未安装的R包
- installed_packages <- packages %in% rownames(installed.packages())
- if (any(installed_packages == FALSE)) {
- install.packages(packages[!installed_packages])
- }
- invisible(lapply(packages, library, character.only = TRUE))
-
- # -----------------------------------
- # 设置一些颜色、文字的基础设置
- # Colors:
- CatCol <- c(
- CSH = "#586158", DBF = "#C46B39", EBF = "#4DD8C0", ENF = "#3885AB", GRA = "#9C4DC4",
- MF = "#C4AA4D", OSH = "#443396", SAV = "#CC99CC", WET = "#88C44D", WSA = "#AB3232"
- )
- Three_colorblind <- c("#A8AD6F", "#AD6FA8", "#6FA8AD") #c("#809844", "#4f85b0", "#b07495")
- graph_elements_dark <- "black"
- plot_elements_light <- "gray75"
- plot_elements_dark <- "gray25"
-
- # Transparency:
- boot_alpha_main <- 0.9
- boot_alpha_small <- 0.05
-
- # Text:
- # if (n_pcs > 3) {x_angle <- 270; x_adjust <- 0.25} else {x_angle <- 0; x_adjust <- 0} # option to change orientation of x axis text
- x_angle <- 0; x_adjust <- 0
- title_text <- 9 # Nature Communications: max 7 pt; cowplot multiplier: 1/1.618; 7 pt : 1/1.618 = x pt : 1; x = 7 / 1/1.618; x = 11.326 (round up to integer)
- subtitle_text <- 9
- normal_text <- 9 # Nature Communications: min 5 pt; cowplot multiplier: 1/1.618; 5 pt : 1/1.618 = x pt : 1; x = 5 / 1/1.618; x = 8.09 (round up to integer)
-
- # Element dimensions:
- plot_linewidth <- 0.33
- point_shape <- 18
- point_size <- 1.5
-
- # Initialize figure lists:
- p_biplot <- list(); p_r2 <- list(); p_load <- list(); p_contr <- list(); col_ii <- list()
-
- # Labels:
- veg_sub_labels <- c("All Sites", "All Forests", "Evergreen Needle-Forests")
-
- # -----------------------------------
- #选择PCA所需的数据
- codes_4_PCA <- c("SITE_ID", "IGBP", "GPPsat", "wLL", "wNmass", "wLMA", "RECOmax") # 选择需要的列数据
- #执行筛选
- df_subset <- df %>%
- dplyr::select(all_of(codes_4_PCA))
- #运行PCA。dplyr::select(-species):将不需要的列数据去除
- pca_result <- FactoMineR::PCA(df_subset %>% dplyr::select(-SITE_ID, -IGBP), scale.unit = T, ncp = 10, graph = F)
-
- # -----------------------------------
- #绘图
- p1<- fviz_pca_biplot(pca_result,
- axes = c(1, 2),
- col.ind = df_subset$IGBP, #"grey50",
- # col.ind = NA, #plot_elements_light, #"white",
- geom.ind = "point",
- palette = CatCol,#'futurama',
- label = "var",
- col.var = plot_elements_dark,
- labelsize = 3,
- repel = TRUE,
- pointshape = 16,
- pointsize = 2,
- alpha.ind = 0.67,
- arrowsize = 0.5)
-
- # -----------------------------------
- # 它是ggplot2对象,我们在此基础上进一步修改一下标注。
- p1<-p1+
- labs(title = "",
- x = "PC1",
- y = "PC2",
- fill = "IGBP") +
- guides(fill = guide_legend(title = "")) +
- theme(title = element_blank(),
- text = element_text(size = normal_text),
- axis.line = element_blank(),
- axis.ticks = element_blank(),
- axis.title = element_text(size = title_text, face = "bold"),
- axis.text = element_text(size = normal_text),
- #plot.margin = unit(c(0, 0, 0, 0), "cm"),
- # legend.position = "none"
- legend.text = element_text(size = subtitle_text),
- legend.key.height = unit(5, "mm"),
- legend.key.width = unit(2, "mm")
- )
- p1
参考:Leaf-level coordination principles propagate to the ecosystem scale (https://doi.org/10.1038/s41467-023-39572-5)、主成分分析图。
2、我选用iris数据进行重新绘制测试双标图
代码:
- setwd("D:/Desktop/0000/R") #更改路径
-
- #导入数据
- df <- read.table("iris1.csv", header = T, sep = ",")
-
- # -----------------------------------
- #所需的包:
- packages <- c("ggplot2", "tidyr", "dplyr", "readr", "ggrepel", "cowplot", "factoextra")
- #安装你尚未安装的R包
- installed_packages <- packages %in% rownames(installed.packages())
- if (any(installed_packages == FALSE)) {
- install.packages(packages[!installed_packages])
- }
- invisible(lapply(packages, library, character.only = TRUE))
-
- # -----------------------------------
- # 设置一些颜色、文字的基础设置
- # Colors:
- CatCol <- c(
- setosa = "#586158", versicolor = "#C46B39", virginica = "#4DD8C0") # 设置类别颜色
- Three_colorblind <- c("#A8AD6F", "#AD6FA8", "#6FA8AD") #c("#809844", "#4f85b0", "#b07495")
- graph_elements_dark <- "black"
- plot_elements_light <- "gray75"
- plot_elements_dark <- "gray25"
-
- # Transparency:
- boot_alpha_main <- 0.9
- boot_alpha_small <- 0.05
-
- # Text:
- # if (n_pcs > 3) {x_angle <- 270; x_adjust <- 0.25} else {x_angle <- 0; x_adjust <- 0} # option to change orientation of x axis text
- x_angle <- 0; x_adjust <- 0
- title_text <- 9 # Nature Communications: max 7 pt; cowplot multiplier: 1/1.618; 7 pt : 1/1.618 = x pt : 1; x = 7 / 1/1.618; x = 11.326 (round up to integer)
- subtitle_text <- 9
- normal_text <- 9 # Nature Communications: min 5 pt; cowplot multiplier: 1/1.618; 5 pt : 1/1.618 = x pt : 1; x = 5 / 1/1.618; x = 8.09 (round up to integer)
-
- # Element dimensions:
- plot_linewidth <- 0.33
- point_shape <- 18
- point_size <- 1.5
-
- # Initialize figure lists:
- p_biplot <- list(); p_r2 <- list(); p_load <- list(); p_contr <- list(); col_ii <- list()
-
- # Labels:
- veg_sub_labels <- c("All Sites", "All Forests", "Evergreen Needle-Forests")
-
- # -----------------------------------
- #选择PCA所需的数据
- codes_4_PCA <- c("sepal_length", "sepal_width", "petal_length", "petal_width", "species") # 选择需要的列数据
- #执行筛选
- df_subset <- df %>%
- dplyr::select(all_of(codes_4_PCA))
- #运行PCA。dplyr::select(-species):将不需要的列数据去除
- pca_result <- FactoMineR::PCA(df_subset %>% dplyr::select(-species), scale.unit = T, ncp = 10, graph = F)
-
- # -----------------------------------
- #绘图
- p1<- fviz_pca_biplot(pca_result,
- axes = c(1, 2),
- col.ind = df_subset$species, #"grey50",
- # col.ind = NA, #plot_elements_light, #"white",
- geom.ind = "point",
- palette = CatCol,#'futurama',
- label = "var",
- col.var = plot_elements_dark,
- labelsize = 3,
- repel = TRUE,
- pointshape = 16,
- pointsize = 2,
- alpha.ind = 0.67,
- arrowsize = 0.5)
-
- # -----------------------------------
- # 它是ggplot2对象,我们在此基础上修改一下标注。
- p1<-p1+
- labs(title = "",
- x = "PC1",
- y = "PC2",
- fill = "IGBP") +
- guides(fill = guide_legend(title = "")) +
- theme(title = element_blank(),
- text = element_text(size = normal_text),
- axis.line = element_blank(),
- axis.ticks = element_blank(),
- axis.title = element_text(size = title_text, face = "bold"),
- axis.text = element_text(size = normal_text),
- #plot.margin = unit(c(0, 0, 0, 0), "cm"),
- # legend.position = "none"
- legend.text = element_text(size = subtitle_text),
- legend.key.height = unit(5, "mm"),
- legend.key.width = unit(2, "mm")
- )
- p1
3、iris数据进行绘制碎石图、变量载荷图、变量贡献图
代码:
- #加载包
- library(dplyr) #用于数据预处理
- library(tidyr) #用于数据预处理
- library(stringr) #用于字符串处理
- library(modelr) #用于自助法重抽样
- library(FactoMineR) #用于PCA
- library(ade4) #用于PCA
- library(factoextra) #用于PCA结果提取及绘图
- #所需的包:
- packages <- c("ggplot2", "tidyr", "dplyr", "readr", "ggrepel", "cowplot", "factoextra")
- #安装你尚未安装的R包
- installed_packages <- packages %in% rownames(installed.packages())
- if (any(installed_packages == FALSE)) {
- install.packages(packages[!installed_packages])
- }
- invisible(lapply(packages, library, character.only = TRUE))
-
- setwd("D:/Desktop/0000/R") #更改路径
- # 加载数据
- df <- read.csv("iris.csv",header = T, row.names = 1) # row.names = 1: 第一列为标签,这时赋值给df时就没有这列了
-
- #重抽样
- set.seed(123) #设置随机种子
- tt=99 #设置重抽样的次数。iris[,-5]:表示去除第5列,因为这是类别
- df_boot <- iris[,-5] %>% modelr::bootstrap(n = tt) #重抽样,结果是一个列表,包含499个数据框
-
- #使用循环对每一个数据集进行PCA
- #初始化3个空变量
- N_PCS <- tibble() #使用维数检验保留的PC数量
- pca_stats <- tibble() #变量的贡献和载荷
- R2 <- c() #解释方差占比
-
- #使用循环对每一个数据集进行PCA
- #初始化3个空变量
- N_PCS <- tibble() #使用维数检验保留的PC数量
- pca_stats <- tibble() #变量的贡献和载荷
- R2 <- c() #解释方差占比
-
- #循环
- for (j in 1:tt) {
- ##提取第j次bootstrap的数据
- dat <- df_boot %>%
- slice(j) %>% # 选择第j行
- pull(strap) %>% # 提取列表
- as.data.frame() # 提取数据集
-
- #使用FactoMineR包执行PCA
- pca_result <- FactoMineR::PCA(dat, scale.unit = T, ncp = 4, graph = F) # ncp = 4:降维几个主成分,设置最大即为全部
- #使用ade4包执行PCA
- # center:指定是否对数据进行中心化,默认为 TRUE。中心化意味着将数据减去各自的均值,使得数据在每个维度上的平均值为零。
- # scale:指定是否对数据进行缩放,默认为 TRUE。缩放意味着将数据除以各自的标准差,使得数据在每个维度上的标准差为一。
- # scannf:指定是否计算特征值和特征向量,默认为 FALSE。如果设置为 TRUE,则会计算特征值和特征向量。
- pca1 <- ade4::dudi.pca(dat, center = TRUE, scale = TRUE, scannf = FALSE, nf = 4) # nf= 4:降维几个主成分,设置最大即为全部
- #检测不确定性和显著性
- #执行维数检验
- pc_tested <-testdim(pca1, nrepet = 999)
-
- ###提取bootstrap数据集的PCA结果
- N_PCS <- N_PCS %>%
- bind_rows(tibble(strap = j, n_pcs = pc_tested$nb.cor)) #第j次运行的PCA
-
- pca_stats <- bind_rows(pca_stats,
- pca_result$var$contrib %>% # add contributions
- as_tibble(rownames = "var") %>%
- pivot_longer(cols = !var, names_to = "PC", values_to = "contrib") %>%
- left_join(pca_result$var$coord %>% # add loadings
- as_tibble(rownames = "var") %>%
- pivot_longer(cols = !var, names_to = "PC", values_to = "loading"),
- by = c("var", "PC")
- ) %>%
- mutate(PC = str_sub(PC, start = 5), #提取PC名称中的数字
- strap = j) # bootstrap run number
- ) #得到变量贡献和载荷
-
- R2 <- bind_rows(R2,
- tibble(PC = pca_result[["eig"]]%>% rownames(),
- exp_var = pca_result[["eig"]][,2],
- strap = j) %>%
- mutate(PC = str_sub(PC, start = 6)) #提取PC名称中的数字
- )
- }
-
- #保留的PC数量
- N_PCS <- N_PCS %>%
- group_by(n_pcs) %>%
- summarise(n_rep = n()) %>% #对重复值进行计数
- mutate(retained = n_rep / tt * 100) #计算运行次数百分比
- pc_ret <- N_PCS %>% filter(retained == max(retained))
- #输出结果的摘要
- print(paste0("Number of statistical significant components according to Dray method (Dray et al., 2008) was ",pc_ret[1,1], " in ", round(pc_ret[1,3], digits = 1), "% of runs."))
-
-
- n_pcs <- NA #保留PC数的初始设置
- # n_pcs <- 2 #可以手动设置保留PC数
- if (is.na(n_pcs)) {
- n_pcs <- N_PCS %>%
- filter(retained == max(retained)) %>%
- select(n_pcs) %>%
- unlist() %>% unname()
- } #按照Dray等人的方法设置保留PC数
-
- ##变量贡献和载荷
- pca_stats <- pca_stats %>%
- group_by(PC, var) %>%
- mutate(
- contrib_mean = mean(contrib),
- contrib_median = median(contrib),
- contrib_std = sd(contrib),
- # contrib_q25 = quantile(contrib, 0.25), contrib_q75 = quantile(contrib, 0.75),
- loading_mean = mean(loading),
- loading_median = median(loading),
- loading_std = sd(loading),
- # loading_q25 = quantile(loading, 0.25), loading_q75 = quantile(loading, 0.75)
- ) %>%
- ungroup() %>%
- dplyr::rename(contrib_boot = contrib, loading_boot = loading) #重命名以免后续的匹配过程出现混乱
-
- ##修改PC名称
- pca_stats <- pca_stats %>%
- mutate(PC_name = paste0("PC", PC))
-
- ##解释方差占比
- R2 <- R2 %>%
- group_by(PC) %>%
- mutate(
- R2_mean = mean(exp_var),
- R2_median = median(exp_var),
- R2_std = sd(exp_var),
- # R2_q25 = quantile(exp_var, 0.25), R2_q75 = quantile(exp_var, 0.75)
- ) %>%
- ungroup() %>%
- dplyr::rename(R2_boot = exp_var) #重命名以免后续的匹配过程出现混乱
-
- ##添加到pca_stats的表格中
- pca_stats <- pca_stats %>% left_join(R2, by = c("PC", "strap"))
-
- #对原始数据的PCA
- pca_result <- FactoMineR::PCA(iris[,-5], scale.unit = T, ncp = 4, graph = F)
-
- #添加原始数据计算得到的实际值
- pca_stats <- pca_stats %>%
- dplyr::left_join( #添加原始数据的R2(不是bootstrapping的均值)
- tibble(PC = pca_result[["eig"]] %>% rownames(),
- R2 = pca_result[["eig"]][,2]
- ) %>%
- mutate(PC = str_sub(PC, start = 6)), #提取PC数
- by = "PC"
- ) %>%
- dplyr::left_join( #添加原始数据的变量贡献(不是bootstrapping的均值)
- pca_result$var$contrib %>% #添加贡献
- as_tibble(rownames = "var") %>%
- pivot_longer(cols = !var, names_to = "PC", values_to = "contrib") %>%
- mutate(PC = str_sub(PC, start = 5)), #提取PC数
- by = c("PC", "var")
- ) %>%
- dplyr::left_join( #添加原始数据的变量载荷(不是bootstrapping的均值)
- pca_result$var$coord %>% #添加载荷
- as_tibble(rownames = "var") %>%
- pivot_longer(cols = !var, names_to = "PC", values_to = "loading") %>%
- mutate(PC = str_sub(PC, start = 5)), # extract PC numbers
- by = c("PC", "var")
- )
-
- ## 添加PC数的保留百分比(在自助法中PC被保留得有多频繁)
- pca_stats <- pca_stats %>%
- dplyr::left_join(N_PCS %>% dplyr::mutate(PC = n_pcs %>% as.character) %>% dplyr::select(PC, retained),
- by = "PC"
- )
-
- # -----------------------------------
- # 绘制图碎石图
- dat_boot <- pca_stats %>%
- dplyr::select(PC_name, PC, R2_boot) %>% unique()%>% #去除重复
- dplyr::mutate(PC = as.character(PC))
-
- dat_true <- pca_stats %>%
- dplyr::select(PC_name, PC, R2, R2_median, R2_std) %>% unique() %>% #去除重复
- dplyr::mutate(PC = as.character(PC))
-
- p2 <- ggplot(data = dat_true, aes(x = PC_name, y = R2, group = 1)) + # x = PC -> only numbers on axis, x = PC_name -> can give problems with PC10 being ordered before PC2;
- # group 1 是用来避免某些warning/error的
- geom_errorbar(aes(ymin = R2 - R2_std, ymax = R2 + R2_std),
- color = Three_colorblind[1], linewidth = plot_linewidth, width = 0.4) + # bootstrapping的标准差
- # geom_bar(stat = "identity", position = position_dodge(), fill = Three_colorblind[1], width = 0.61) + #b07a4f, #9c6a5e, #643c3c
- geom_line(color = Three_colorblind[1]) +
- geom_point(color = Three_colorblind[1], size = point_size) + #实际值
- geom_jitter(data = dat_boot, aes(x = PC_name, y = R2_boot, group = 1), alpha = 0.1,
- color = "black", shape = point_shape, size = 0.5, width = 0.1) + #每次自助样本的值
- geom_point(aes(x = PC_name, y = R2_median), color = plot_elements_dark,
- alpha = boot_alpha_main, shape = point_shape, size = point_size) + #添加自助法得到的中位数值
- geom_text(aes(x = PC_name, y = R2 + R2_std + 2, label = paste0(R2 %>% round(digits = 1), "%")),
- nudge_x = 0.33, size = 2) + #添加数值标注
- labs(title = "", x = "", y = "Explained variance") +
- theme_classic() +
- theme(title = element_blank(),
- text = element_text(size = normal_text),
- axis.line = element_line(color = graph_elements_dark),
- axis.ticks.x = element_line(color = graph_elements_dark),
- axis.ticks.y = element_blank(),
- axis.title = element_text(size = title_text, face = "bold"),
- # axis.title.x = element_blank(), #已经在'labs'中指定
- axis.text = element_text(size = normal_text),
- axis.text.y = element_blank(),
- plot.margin = unit(c(0, 1, 0, 1), "cm"),
- legend.position = "none"
- ) +
- NULL
- p2
-
- # -----------------------------------
- # 绘制变量载荷图
- dat_boot <- pca_stats %>%
- dplyr::filter(PC <= n_pcs[1]) %>% #去除额外的PC
- dplyr::select(PC_name, var, loading_boot) %>% unique() #去除重复
-
- dat_true <- pca_stats %>%
- dplyr::filter(PC <= n_pcs[1]) %>% #去除额外的PC
- dplyr::select(PC_name, var, loading, loading_median, loading_std) %>% unique() #去除重复
-
- p3 <- ggplot(data = dat_true, aes(x = var, y = loading)) +
- facet_grid(. ~ PC_name, scales = "free_y") +
- geom_errorbar(aes(ymin = loading - loading_std, ymax = loading + loading_std), # loading_q25, ymax = loading_q75
- color = Three_colorblind[2], linewidth = plot_linewidth, width = 0.9) + # standard error = std from bootstrapping
- geom_bar(stat = "identity", position = position_dodge(), fill = Three_colorblind[2]) + #b07a4f, #9c6a5e, #643c3c
- geom_hline(yintercept = 0, color = graph_elements_dark) +
- geom_jitter(data = dat_boot, aes(x = var, y = loading_boot), alpha = boot_alpha_small, color = plot_elements_dark,
- shape = point_shape, size = 0.2, width = 0.1) + #每次自助抽样的值
- geom_point(aes(x = var, y = loading_median), alpha = boot_alpha_main, shape = point_shape,
- size = point_size, color = plot_elements_dark) + #添加自助法得到的中位数值
- coord_flip() + #对调坐标轴以更好地展示图形
- scale_y_continuous(breaks = waiver(), n.breaks = 4) + #修改x轴(对调后,这就是y轴)
- labs(y = "Loadings", x = "", title = "") +
- theme_classic() +
- theme(title = element_text(size = normal_text, face = "bold"),
- text = element_text(size = normal_text),
- axis.line.x = element_line(color = graph_elements_dark),
- axis.line.y = element_blank(),
- axis.ticks.x = element_line(color = graph_elements_dark),
- axis.ticks.y = element_blank(),
- axis.title = element_text(size = title_text),
- axis.text = element_text(size = normal_text),
- axis.text.x = element_text(angle = x_angle, vjust = x_adjust),
- legend.position = "none",
- legend.title = element_text(size = title_text),
- legend.text = element_text(size = subtitle_text),
- legend.key.height = unit(1.0, "mm"),
- legend.key.width = unit(1.0, "mm"),
- plot.margin = unit(c(0, 0, 0, 0), "cm"),
- strip.text = element_text(face = "bold", size = title_text),
- strip.background = element_blank()
- ) +
- NULL
- p3
-
- # -----------------------------------
- # 绘制变量贡献图
- dat_boot <- pca_stats %>%
- dplyr::filter(PC <= n_pcs[1]) %>% #去除额外的PC
- dplyr::select(PC_name, var, contrib_boot) %>% unique() #去除重复
-
- dat_true <- pca_stats %>%
- dplyr::filter(PC <= n_pcs[1]) %>% # remove additional PCs
- dplyr::select(PC_name, var, contrib, contrib_median, contrib_std) %>% unique() #去除重复
-
- p4<- ggplot(data = dat_true, aes(x = var, y = contrib)) +
- facet_grid(. ~ PC_name, scales = "free_y") +
- geom_errorbar(aes(ymin = contrib_median - contrib_std, ymax = contrib_median + contrib_std), # ymin = contrib_q25, ymax = contrib_q75
- color = Three_colorblind[3], linewidth = plot_linewidth, width = 0.9) + # standard error = standard deviation from bootstrapping
- geom_bar(stat = "identity", position = position_dodge(), fill = Three_colorblind[3]) + #4f85b0, #59918e, #3c6464
- geom_hline(yintercept = 0, color = graph_elements_dark) +
- geom_jitter(data = dat_boot, aes(x = var, y = contrib_boot), alpha = boot_alpha_small, color = plot_elements_dark,
- shape = point_shape, size = 0.2, width = 0.1) + #每次自助抽样的值
- geom_point(aes(x = var, y = contrib_median), alpha = boot_alpha_main, shape = point_shape,
- size = point_size, color = plot_elements_dark) + #添加自助法得到的中位数值
- coord_flip() + #对调坐标轴以更好地展示图形
- scale_y_continuous(breaks = waiver(), n.breaks = 4) + #添加自助法得到的中位数值
- labs(y = "Contribution [%]", x = "", title = "") +
- theme_classic() +
- theme(title = element_text(size = normal_text, face = "bold"),
- text = element_text(size = normal_text),
- axis.line.x = element_line(color = graph_elements_dark),
- axis.line.y = element_blank(),
- axis.ticks.x = element_line(color = graph_elements_dark),
- axis.ticks.y = element_blank(),
- axis.title = element_text(size = title_text),
- axis.text = element_text(size = normal_text),
- axis.text.x = element_text(angle = x_angle, vjust = x_adjust),
- legend.position = "none",
- legend.title = element_text(size = title_text),
- legend.text = element_text(size = subtitle_text),
- legend.key.height = unit(1.0, "mm"),
- legend.key.width = unit(1.0, "mm"),
- # plot.margin = unit(c(0, 0, 0, 0), "cm"),
- strip.text = element_text(face = "bold", size = title_text),
- strip.background = element_blank()
- ) +
- NULL
- p4
-
- # -----------------------------------
- # 拼图
- library(patchwork)
- p2+p3/p4