页面说明
本页汇总 Figures 图形分析的核心脚本与下载入口,便于按步骤学习与复现。
页面仅展示学习所需内容,不再展示中间处理统计数据。
模块统计看板
自动读取本页代码区块与下载入口0
脚本区块
可展开阅读
0
下载入口
一键下载学习代码
0
可展开代码
便于分段阅读
0
导航入口
快速切换相关页面
页面类型-
可展开代码块0个
下载按钮0个
快速导航0项
Code_Figure1.Rmd 下载学习代码
点击展开完整代码
---
title: "Map_prevalence_ASR_2019"
output: html_document
date: "2023-08-03"
---
#1.下载并加载R包
```{R}
#install.packages("ggmap")
#install.packages("rgdal")
#install.packages("maps")
#install.packages("cowplot")
#install.packages("patchwork")
#install.packages("sf")
#install.packages("maptools")
#install.packages("tmaptools")
library(ggmap)
library(rgdal)
library(maps)
library(dplyr)
library(cowplot)
library(patchwork)
library(purrr)
library(stringr)
library(sf)
library(maptools)
library(tmaptools)
``` #2.导入数据(2大数据:GBD数据库 & 世界地图数据)
```{R}
Countries_1990to2019 <- read.csv('./data/204counties_1990to2019.csv', header=T) load('./data/GBD_maps.RData')
``` #3.大地图数据清洗_Prevalence_ASR_2019
```{R}
##3.1 数据筛选(目的:最终选出204个location的val)####
Prevalence2019_ASR <- Countries_1990to2019 %>% filter(year == "2019", sex_name == "Both", age_name == "Age-standardized", measure_name == "Prevalence", metric_name == "Rate") %>% select("location_id", "location_name","val") #注意:location选id和name,id用于大地图,name用于小地图 ##3.2 确定色阶 (A.确定节点breaks;B.根据节点形成区间break_labels; C.给不同的区间加上不同的颜色 pal)####
###3.2.1 确定节点breaks####
breaks <- c(668,1000,1500,2000,2500,3000,3500,4000,4500)
###3.2.2 根据节点形成区间break_labels####
breaks_labels <- imap_chr(breaks, function(., idx){ return(paste0(breaks[idx], " to ", breaks[idx+1]))
})
breaks_labels <- breaks_labels[1:length(breaks)-1] breaks_labels[length(breaks_labels)] <- paste0('>=', str_split(breaks_labels[length(breaks_labels)],' ',simplify = T)[1])
breaks_labels
###3.2.3 给不同的区间加上不同的颜色 pal####
pal <- tmaptools::get_brewer_pal(palette = "Spectral",n = length(breaks_labels)) ##3.3 整理画图数据 (该数据是用于可视化的基础,在本情景下,主要是生成两列变量:1.每个国家的地理边界经纬度信息;2.每个国家的色阶labels)####
Prevalence2019_ASR_map <- left_join(Prevalence2019_ASR, world_GBD, by = c('location_id' = 'Location.ID')) %>% mutate(val2 = cut(val, breaks = breaks, labels = breaks_labels, include.lowest = T,right = F)) #cut函数:目的将连续型变量转化为分类型数据;使用时包含的数据有:变量、breaks、labels、逻辑变量(是否包含左右端点) ##3.4 数据可视化;(基于ggplot函数,需要了解其设计理念:基于图层来构建图形。图层由三部分组成:数据、映射、几何对象)####
Prevalence2019_ASR_map_plot <- ggplot(data = Prevalence2019_ASR_map,aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + #geom_sf函数用于绘制空间几何图形 scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + theme_void+ labs(x="", y="")+ guides(fill = guide_legend(title='ASPR in 2019 per 100,000 persons', ncol =2))+ theme(text = element_text(size = 6), legend.position = c(0.06,0.1), legend.key.size = unit(0.4, "cm") )+ coord_sf(xlim = c(-180,208), expand = FALSE)
``` #4 小地图数据清洗_Prevalence_ASR_2019
```{R}
##4.1 Caribbean and central America#### (每个小地图,涉及2大步:1.筛选作图数据;2.利用ggplot作图,在开始第二步前,需要先准备主题设置以及X和Y边界)
###4.1.1 准备作图数据####
a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['Caribbean and central America']]$Location.Name,] ###4.1.2 ggplot前准备####
####准备1:主题theme (适用于所有小地图)
theme_map_sub <- theme_void+labs(x="", y="")+theme_bw+ theme(text = element_text(size = 4), panel.grid.major = element_blank, panel.grid.minor = element_blank, legend.position = 'none', axis.text = element_blank, axis.ticks = element_blank, plot.title = element_text(vjust = 1, hjust = 0.5)) ####准备2:确定小地图的X和Y边界(每个小地图不同)
x_location = c(-90,-59) #经度区间
y_location = c(7,28) #纬度区间 ###4.1.3 可视化####
sub1 <- ggplot(data = a, aes(fill = val2)) + #创建ggplot对象“名为sub1”,并设置了数据源为a。fill = val2指定了在图中使用val2变量的值作为填充颜色 geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('Caribbean and central America')+ theme_map_sub ## 4.2 Persian Gulf ####
###4.2.1 准备作图数据
a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['Persian Gulf']]$Location.Name,] ###4.2.2 ggplot前准备
x_location = c(45,55)
y_location = c(18.5,31.5) ###4.2.3 可视化
sub2 <- ggplot(data = a, aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('Persian Gulf')+ theme_map_sub ## 4.3 Balkan Peninsula #### a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['Balkan Peninsula']]$Location.Name,] x_location = c(12.5,32)
y_location = c(35,53) sub3 <- ggplot(data = a, aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('Balkan Peninsula')+ theme_map_sub ## 4.4 Southeast Asia ####
a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['Southeast Asia']]$Location.Name,] x_location = c(97.5,119.7)
y_location = c(-9.2,9) sub4 <- ggplot(data = a, aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('Southeast Asia')+ theme_map_sub ## 4.5.West Africa ####
a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['West Africa']]$Location.Name,] x_location = c(-17.5,-7)
y_location = c(6.8,16.7) sub5 <- ggplot(data = a, aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('West Africa')+ theme_map_sub ## 4.6.Eastern Mediterranean ####
a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['Eastern Mediterranean']]$Location.Name,] x_location = c(30.5,38.5)
y_location = c(29.1,34.9) sub6 <- ggplot(data = a, aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('Eastern Mediterranean')+ theme_map_sub ## 4.7.Northern Europe ####
a <- Prevalence2019_ASR_map[Prevalence2019_ASR_map$location_name %in% subregions_shp[['Nothern Europe']]$Location.Name,] x_location = c(2.5,27)
y_location = c(48,59)
sub7 <- ggplot(data = a, aes(fill = val2)) + geom_sf(aes(geometry = geometry)) + scale_fill_manual( values = rev(pal), breaks = breaks_labels, labels = breaks_labels) + coord_sf(xlim = x_location, ylim = y_location, expand = FALSE)+ ggtitle('Northern Europe')+ theme_map_sub
``` #5.拼图
```{R}
plot1 <- (sub1 + sub2 + sub3 + sub4) + plot_layout(nrow = 1) plot2 <- (sub5 | sub6) / sub7 + plot_layout(height = c(1, 1.2)) plot3 <- plot1|plot2 + plot_layout(widths = c(1, 15)) Prevalence2019_ASR_map_plot <- Prevalence2019_ASR_map_plot / plot3 + plot_layout(height = c(2,1),widths = c(2,1))
``` #6.保存
```{R}
ggsave(Prevalence2019_ASR_map_plot, file = './output/Prevalence2019_ASR_map.pdf', units = 'cm', width = 24, height = 17.8)
``` Code_Figure3.Rmd 下载学习代码
点击展开完整代码
= ---
title: "Code_Figure3.Rmd"
output: html_document
date: "2023-08-07"
--- #1.安装并加载R包
```{r}
#install.packages("tidyverse")
#install.packages("ggplot2")
#install.packages("scales")
library(tidyverse)
library(ggplot2)
library(scales)
``` #2.加载数据
```{r}
Global_2019_AgeandSex <- read.csv('./data/Global_2019_AgeandSex.csv') ``` #3.数据清洗(3.1字符串;3.2筛选行与列;3.3确定用于作图的变量内部的顺序)
```{R}
##3.1 字符串(如1-4 year)预处理
Global_2019_AgeandSex$age_name <- str_split(Global_2019_AgeandSex$age_name, ' ', simplify = T)[,1] ##3.2 筛选行与列 (20age_groups * 2sex * 2metric = 80)
Global_2019_AgeandSex_Prevalance <- Global_2019_AgeandSex %>% select("location_name","year","sex_name","age_name","measure_name","metric_name","val","upper","lower") %>% filter(location_name == "Global", year == "2019", age_name %in% c("1-4","5-9","10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74","75-79","80-84","85-89","90-94","95+"), sex_name %in% c("Male","Female"), measure_name == 'Prevalence', metric_name %in% c('Number', 'Rate')) %>% mutate(val = if_else(metric_name == 'Number', val/1000000,val/(1000*10/3)), #条件表达式if_else upper = if_else(metric_name == 'Number', upper/1000000,upper/(1000*10/3)), lower = if_else(metric_name == 'Number', lower/1000000,lower/(1000*10/3))) ##3.3 确定作图相关的变量(年龄、性别)内部信息的顺序(如 男性、女性 vs 女性、男性)
Global_2019_AgeandSex_Prevalance$age_name <- factor(Global_2019_AgeandSex_Prevalance$age_name, levels = c('1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39','40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79','80-84','85-89','90-94','95+'))
Global_2019_AgeandSex_Prevalance$sex_name <- factor(Global_2019_AgeandSex_Prevalance$sex_name, levels = c('Male', 'Female')) ``` #4.画图(bar + errorbar + line*3 + 坐标轴[xlab, scale_y_continous] +图例 + 主题)
```{r}
plot <- ggplot + # 添加左边纵坐标轴和柱状图 geom_col(data = subset(Global_2019_AgeandSex_Prevalance, metric_name == "Number"), aes(x = age_name, y = val, fill = sex_name), width = 0.6,position = position_dodge(width = 0.6)) + # 添加柱状图的误差线 geom_errorbar(data = subset(Global_2019_AgeandSex_Prevalance, metric_name == "Number"), aes(x = age_name, ymin = lower, ymax = upper, group = sex_name),width = 0.3,position = position_dodge(width = 0.6)) + # 添加折线图(3组,val, lower, upper) geom_line(data = subset(Global_2019_AgeandSex_Prevalance, metric_name == "Rate"), aes(x = age_name, y = val, group = sex_name, color = sex_name),linetype = "dashed",size = 1) + geom_line(data = subset(Global_2019_AgeandSex_Prevalance, metric_name == "Rate"), aes(x = age_name, y = upper, group = sex_name, color = sex_name), linetype = "dashed") + geom_line(data = subset(Global_2019_AgeandSex_Prevalance, metric_name == "Rate"), aes(x = age_name, y = lower, group = sex_name, color = sex_name),linetype = "dashed") + # 手动设置图例,针对左边的纵坐标轴,scale_fill_manual里面的fill对应柱状图,所以图例名称设置为Number scale_fill_manual(name = "Number", values = c("Male" = "#6B58A6", "Female" = "#FCAF17"), guide = "legend") + # 手动设置图例,针对右边的纵坐标轴,scale_color_manual里面的color对应线图,所以图例名称设置为Rate scale_color_manual(name = "Rate", values = c("Male" = "#D30F8C", "Female" = "#0871B9"), guide = "legend") + # 设置主题(背景网格、图例[位置、标题、大小]、坐标轴) theme_bw + theme(panel.grid.minor = element_blank, panel.grid.major.x = element_blank, panel.grid.minor.x = element_blank, legend.position = 'top', legend.title = element_blank, legend.key.size = unit(0.3, "cm"), axis.text = element_text(size = 6), axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1), axis.text.y.left = element_text(margin = margin(r = 0)), axis.text.y.right = element_text(margin = margin(r = 0)), axis.title = element_text(size = 8), axis.ticks = element_blank) + # 设置两个纵坐标轴(主 + 辅助),主纵坐标轴(左边)辅助纵坐标轴(右边)的name、数值关系、范围 scale_y_continuous( name = "Total prevalent cases (millions)", sec.axis = sec_axis(~ .*10/3, name = "Prevalence per 100,000 \n population (000s)"), breaks = seq(0, 18, by = 3), limits = c(0, 18),expand = c(0,0)) + xlab('Age group (years)')
``` #5.文件保存
```{R}
ggsave(plot, file = './output/Prevalence_age_and_sex_2019.pdf',units = 'cm', height = 6, width = 10) ``` Correlation_SDI_regions.Rmd 下载学习代码
点击展开完整代码
---
title: "Correlation_SDI_地区层面"
output: html_document
date: "2023-08-7"
--- #1.安装并加载相关R包
```{r}
#install.packages("tidyverse")
#install.packages("readxl")
#install.packages("ggplot2") library(readxl)
library(tidyverse)
library(ggplot2)
``` #2.加载数据(主要有三个:1.全球和21个地区从1990到2019年的SDI数据;2.全球和21个地区在2019年的DALYs ASR;3.全球和21个地区的list)
```{r}
##获取SDI_value文件(这个文件是location × year的SDI矩阵)
SDI_value <- readxl::read_xlsx('./data/SDI_value_GBD2019_1990_2019.XLSX',sheet=2,skip=1) #读入xlsx文件的第二个sheet,跳过第一行 ##获取Global和21regions的COPD疾病负担数据
Global_21Regions_1990to2019 <- read.csv('./data/Global_21Regions_1990to2019.csv', header=T) ##获取Global和21regions的list文件
order_globalandregions <- read.csv("./data/order_globalandregions.csv", header = F)
``` #3.数据清洗(即需要整理出3列数据,分别是Global和21个region的location_year pair,SDI值,疾病负担指标如ASR)
```{r}
##3.1 制作第一列数据:Global和21个regions的"location_year"组合,一共22*30=660行,在整理第二列和第三列数据的时候,整理 ##3.2 制作第二列数据:Global和21个regions在1990-2019年的SDI数据
SDI_value_Global_21Regions <- SDI_value %>% filter(Location %in% order_globalandregions$V1) SDI_value_Global_21Regions_long <- pivot_longer(SDI_value_Global_21Regions, # 输入的数据框(wide format) cols = colnames(SDI_value_Global_21Regions)[-1], # 指定要进行转换的列,排除第一列 names_to = 'Year', # 转换后的列名将存储在名为'Year'的新列中 values_to = 'SDI') %>% # 转换后的数值将存储在名为'SDI'的新列中 mutate(ID = paste0(Location,"_", Year)) #用于后续数据框合并 ##3.3 制作第三列数据:全球和21个地区,1990-2019年,年龄标准化的DALYs rate
Global_21regions_DALYs_ASR <- Global_21Regions_1990to2019 %>% select("location_name","year","sex_name","age_name","measure_name","metric_name","val") %>% filter(location_name %in% order_globalandregions$V1, sex_name == "Both", age_name == "Age-standardized", measure_name == "DALYs (Disability-Adjusted Life Years)", metric_name == "Rate") %>% rename("DALYs_ASR" = "val") %>% #列名重命名 mutate(DALYs_ASR = round(DALYs_ASR/1000,5), ID = paste0(location_name,"_",year)) #用于后续数据框合并 ##3.4 将上述数据框合并(基于location_time组合)
Global_21regions_DALYs_ASR_LocationandYear <- left_join(SDI_value_Global_21Regions_long,Global_21regions_DALYs_ASR, by = 'ID') %>% mutate(SDI = as.numeric(str_replace(SDI,'\\·','.'))) #因为用于相关性分析的变量需要是数值型变量,而不能是字符串型。明确变量类型用class函数
``` #4.计算相关系数并可视化
```{R}
##4.1 计算相关系数(包括置信区间和p值)
cor_test <- cor.test(Global_21regions_DALYs_ASR_LocationandYear[,"SDI",drop = T], Global_21regions_DALYs_ASR_LocationandYear[, "DALYs_ASR", drop = TRUE]) cor_r <- cor_test$estimate
cor_int<- cor_test$conf.int
cor_p <- cor_test$p.value ##4.2 基于ggplot函数进行可视化(数据、映射【x轴 + y轴】、几何图形【点 + 线 + 文本】、坐标轴信息、主题)
Corr_Global_Regions_SDI <- ggplot(Global_21regions_DALYs_ASR_LocationandYear, # 数据框,用于绘图 aes(Global_21regions_DALYs_ASR_LocationandYear[,"SDI",drop = T], # x轴变量,从数据框中获取"SDI"列 Global_21regions_DALYs_ASR_LocationandYear[,"DALYs_ASR",drop = T])) + # y轴变量,从数据框中获取"DALYs_ASR"列 geom_point(aes(color = Location, shape= Location))+ #添加图形几何图层以及该图层中颜色/形状映射 scale_shape_manual(values = 1:22) + #手动设置点的形状,一共22种 geom_smooth(colour='black',stat = "smooth",method='loess',se=F,span=0.5) + #添加平滑曲线的图形几何图层 geom_text(x = min(Global_21regions_DALYs_ASR_LocationandYear$SDI + 0.2), #添加文本标签的图层,设置该文本的位置(x和y)、内容(label)、大小 y = max(Global_21regions_DALYs_ASR_LocationandYear[,"DALYs_ASR",drop = T])*0.8, label = paste("R =", round(cor_r, 2),"(", round(cor_int[1],2), "to", round(cor_int[2],2), ")","\n", "p =", ifelse(cor_p<0.001,"< 0.001",round(cor_p,2))), hjust = 1, vjust = 0, size = 4)+ ylab(paste0("DALYs_ASR",'\n (100,000 persons)'))+ # 设置x和y轴的标签 xlab("Sociodemographic index")+ theme_bw(base_size = 8)+ #设置主题样式和基本字体大小,theme_bw 函数用于设置图表的主题样式为黑白风格。通过 base_size = 14 参数,设置图表的基本字体大小为 14 theme(panel.background = element_rect(fill = "transparent"), # 设置绘图区域的背景为透明 plot.background = element_rect(fill = "transparent"), # 设置整个绘图的背景为透明 legend.position = 'top', # 设置legend的位置和大小 legend.key.size = unit(0.3, "cm"), legend.title = element_blank, axis.text.x = element_text(face = "bold",size = 8), # 设置坐标轴文本的外观和大小 axis.text.y = element_text(face = "bold",size = 8), axis.title.x = element_text(face = "bold",size = 10),# 设置坐标轴标题的外观和大小 axis.title.y = element_text(face = "bold",size = 10)) Corr_Global_Regions_SDI
``` #5.文件储存
```{R}
ggsave(Corr_Global_Regions_SDI, file = './output/DALYs_rate_22region_SDI.pdf', width = 7, height = 4)
```
Correlation_SDI_countries.Rmd 下载学习代码
点击展开完整代码
---
title: "Correlation_SDI_国家层面"
output: html_document
date: "2023-08-7"
--- #1.安装并加载相关R包
```{r}
#install.packages("readxl")
#install.packages("tidyverse")
#install.packages("ggplot2")
library(readxl)
library(tidyverse)
library(ggplot2)
``` #2.加载数据
```{r}
##获取SDI_value文件(这个文件是location × year的SDI矩阵)
SDI_value <- readxl::read_xlsx('./data/SDI_value_GBD2019_1990_2019_modified.XLSX',sheet=1) #读入xlsx文件的第二个sheet,跳过第一行 ##获取204个国家的COPD疾病负担数据(我们直接利用TableS4的数据)
Global_21Regions_204counties_1990to2019 <- read.csv('./data/Global_21Regions_204counties_1990to2019.csv', header=T)
Global_21Regions_204counties_1990to2019[Global_21Regions_204counties_1990to2019$location_name == "C么te d'Ivoire","location_name"] = "Côte d'Ivoire" ##获取204个国家的list文件(我们直接利用TableS4的数据)
list_global_21regions_204countries <- read_xlsx("./data/list_global_21regions_204countries.xlsx", sheet=1)
``` #3.数据清洗(即需要整理出3列数据,分别是204个国家的location list,SDI值,疾病负担指标DALYs ASR) ```{r}
##3.1 制作第一列数据:204个国家的location list
list_204countries <- list_global_21regions_204countries %>% filter(!subregion %in% c("Global","region21")) %>% #匹配符号,常常与向量一起使用 rename("Location" = "Country") ##3.2 制作第二列数据:204个国家的在2019年的SDI数据
SDI_value_204countries <- SDI_value %>% select("Location","2019") %>% filter(Location %in% list_204countries$Location) %>% rename("SDI_2019"="2019") ##3.3 制作第三列数据:204个国家,2019年,年龄标准化的DALYs rate
Countries204_DALYs_ASR <- Global_21Regions_204counties_1990to2019 %>% select("location_name","year","sex_name","age_name","measure_name","metric_name","val") %>% filter(location_name %in% list_204countries$Location, year == "2019", sex_name == "Both", age_name == "Age-standardized", measure_name == "DALYs (Disability-Adjusted Life Years)", metric_name == "Rate") %>% rename("DALYs_ASR_2019" = "val", "Location" = "location_name") #列名重命名 ##3.4 将上述数据框合并(基于location)
Countries204_DALYs_ASR_SDI_1 <- left_join(SDI_value_204countries,Countries204_DALYs_ASR, by = 'Location') Countries204_DALYs_ASR_SDI_2 <- left_join(Countries204_DALYs_ASR_SDI_1, list_204countries, by = 'Location') %>% mutate(SDI_2019 = as.numeric(str_replace(SDI_2019,'\\·','.'))) #因为用于相关性分析的变量需要是数值型变量,而不能是字符串型。明确变量类型用class函数
``` #4.计算相关系数并可视化
```{R}
##4.1 计算相关系数
cor_test <- cor.test(Countries204_DALYs_ASR_SDI_2[,"SDI_2019",drop = T], Countries204_DALYs_ASR_SDI_2[, "DALYs_ASR_2019", drop = TRUE]) cor_r <- cor_test$estimate
cor_int<- cor_test$conf.int
cor_p <- cor_test$p.value ##4.2 基于ggplot函数进行可视化
Corr_204countries_SDI <- ggplot(Countries204_DALYs_ASR_SDI_2, # 数据框,用于绘图 aes(Countries204_DALYs_ASR_SDI_2[,"SDI_2019",drop = T], # x轴变量,从数据框中获取"SDI"列 Countries204_DALYs_ASR_SDI_2[,"DALYs_ASR_2019",drop = T])) + # y轴变量,从数据框中获取"DALYs_ASR"列 geom_point(aes(color = subregion),size = 0.5)+ #添加图形几何图层以及该图层中颜色/形状映射 scale_shape_manual(values = 1:22) + #手动设置点的形状,一共22种 geom_smooth(colour='black',stat = "smooth",method='loess',se=F,span=0.5) + #添加平滑曲线的图形几何图层 geom_text(x = min(Countries204_DALYs_ASR_SDI_2$SDI_2019 + 0.25), #添加文本标签的图层,设置该文本的位置(x和y)、内容(label)、大小 y = max(Countries204_DALYs_ASR_SDI_2[,"DALYs_ASR_2019",drop = T])*0.8, label = paste("R =", round(cor_r, 2),"(", round(cor_int[1],2), "to", round(cor_int[2],2), ")","\n", "p =", ifelse(cor_p<0.001,"< 0.001",round(cor_p,2))), hjust = 1, vjust = 0, size = 4)+ geom_text(aes(label = Location,color = subregion), # 将Location列的值作为标签 hjust = 0.7, vjust = 0, size = 1) + ylab(paste0("DALYs_ASR",'\n (100,000 persons)'))+ # 设置x和y轴的标签 xlab("Socio-demographic index")+ theme_bw(base_size = 8)+ #设置主题样式和基本字体大小,theme_bw 函数用于设置图表的主题样式为黑白风格。通过 base_size = 14 参数,设置图表的基本字体大小为 14 theme(panel.background = element_rect(fill = "transparent"), # 设置绘图区域的背景为透明 plot.background = element_rect(fill = "transparent"), # 设置整个绘图的背景为透明 legend.position = 'top', # 设置legend的位置和大小 legend.key.size = unit(0.3, "cm"), legend.title = element_blank, axis.text.x = element_text(face = "bold",size = 8), # 设置坐标轴文本的外观和大小 axis.text.y = element_text(face = "bold",size = 8), axis.title.x = element_text(face = "bold",size = 10),# 设置坐标轴标题的外观和大小 axis.title.y = element_text(face = "bold",size = 10)) ``` #5.文件导出
```{R}
ggsave(Corr_204countries_SDI, file = './output/DALYs_rate_204countries_SDI.pdf', width = 8, height = 4)
``` risk factor.Rmd 下载学习代码
点击展开完整代码
---
title: "risk factor"
output: html_document
date: "2023-08-12"
--- #1.下载并加载R包
```{r}
#install.packages("tidyverse")
#install.packages("ggplot2")
library(tidyverse)
library(ggplot2)
``` #2.数据加载
```{R}
Risk_factor_2019 <- read.csv("./data/Risk_factor_2019.csv",header = T)
order_globalandregions <- read.csv("./data/order_globalandregions.csv",header = F)
``` #3.数据清洗(0.了解数据;1.行与列的筛选;2.数据单位转化;3.与可视化相关的变量内顺序调整)
```{r}
##确定数据清洗的目标:通过对比发现需要进行3个清洗步骤,分别是列行筛选(非常7+1、数值格式、行排序 ##3.0初步了解本数据的risk factor信息
unique(Risk_factor_2019$rei_name) ##3.1基于select和filter函数分别筛选列与行 (一共10列,22*8=176行)
Risk_factor_2019_DALYs_Male <- Risk_factor_2019 %>% select("location_name","year","sex_name","age_name","measure_name","metric_name","rei_name","val","upper", "lower" ) %>% #管道符 filter(location_name %in% order_globalandregions$V1,#匹配符 year == "2019", sex_name == "Male", age_name == "All ages", measure_name == "DALYs (Disability-Adjusted Life Years)", metric_name == "Percent", rei_name %in% c("Ambient particulate matter pollution", "Household air pollution from solid fuels","Ambient ozone pollution","Smoking" ,"Secondhand smoke" , "Occupational particulate matter, gases, and fumes","High temperature" ,"Low temperature")) ##3.2数据单位转化(单位、小数点):使用mutate函数
Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male %>% mutate(val = round(val*100,1), upper_1 = round(upper*100,1), lower = round(lower*100,1), rei_name = str_wrap(Risk_factor_2019_DALYs_Male$rei_name, width = 30)) ##3.3与可视化相关的变量内顺序调整(Location_name, rei_name)
Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male %>% mutate(location_name = fct_relevel(location_name,rev(c('Global','High-income Asia Pacific','High-income North America', 'Western Europe','Australasia', 'Andean Latin America','Tropical Latin America','Central Latin America','Southern Latin America', 'Caribbean', 'Central Europe', 'Eastern Europe','Central Asia', 'North Africa and Middle East', 'South Asia', 'Southeast Asia', 'East Asia','Oceania', 'Western Sub-Saharan Africa', 'Eastern Sub-Saharan Africa', 'Central Sub-Saharan Africa', 'Southern Sub-Saharan Africa'))), rei_name = fct_relevel(rei_name, c('Smoking', 'Ambient particulate matter\npollution', 'Occupational particulate\nmatter, gases, and fumes', 'Household air pollution from\nsolid fuels', 'Secondhand smoke', 'Ambient ozone pollution', 'Low temperature', 'High temperature'))) #以下为Table1中的老方法,不适用于目前的情景
###对行进行排序
####第一步是新建一个因子(将向量转变为因子),用于第二步
#Order_location_factor <- factor(Risk_factor_2019_DALYs_Male$"location_name", levels = order_globalandregions$"V1") ####第二步是应用order函数,对数据框的行按照既定的因子进行排序
#Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male[order(Order_location_factor), ] ###对行进行排序
####第一步是新建一个因子(将向量转变为因子),用于第二步
#Order_RF_factor <- factor(Risk_factor_2019_DALYs_Male$"rei_name", levels = c('Smoking','Ambient particulate matter pollution','Occupational particulate matter, gases, and fumes','Household air pollution from solid fuels', 'Secondhand smoke','Ambient ozone pollution','Low temperature', 'High temperature'))
####第二步是应用order函数,对数据框的行按照既定的因子进行排序
#Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male[order(Order_RF_factor), ]
``` #4.可视化(Male)
```{R}
##4.1 针对目标图形的准备工作(手动设置颜色)
color_risk <- c('Smoking' = '#2B24D6', 'Ambient particulate matter\npollution' = '#AD1B2A', 'Occupational particulate\nmatter, gases, and fumes' = '#006660', 'Household air pollution from\nsolid fuels' = '#523104', 'Secondhand smoke' = '#7E8E2B', 'Ambient ozone pollution' = '#2396FD', 'Low temperature' = '#B16184', 'High temperature' = '#FBD9CD') ##4.2 可视化(柱状图、文本、坐标轴互换、小面板分割、坐标轴设置、主题设置[背景、网格线、坐标轴text和title、分面的背景和文本、图例])
p1 <- ggplot+ geom_col(data = Risk_factor_2019_DALYs_Male,aes(x = location_name,y = val, fill = rei_name),color = 'black',width = .7,position = 'dodge',size = .3)+ #geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.7))+ 用于加误差线 scale_fill_manual(values = color_risk)+ #手动添加颜色 scale_y_continuous(breaks = c(0,20,40,60,80),limits = c(0, 90),expand = c(0,0))+ #手动调整J纵坐标轴 coord_flip + #将 X 轴和 Y 轴交换位置,从而实现横向显示的效果(coord的全称是coordinate) geom_text(data = Risk_factor_2019_DALYs_Male, aes(x= location_name, label=val, y=val+1), position=position_dodge(width=0.7), vjust=0.5,hjust = -0.1, size = 2) + #添加文本图层 facet_wrap(~rei_name, scales = "free_x",nrow = 1)+ #将数据分割成小块或小面板 ylab('DALYs attributable to risk factors (%)')+ xlab("GBD regions")+ theme_light+ #多种预设的主题,如theme_classic, theme_minimal(简洁样式), theme_bw(黑白样式),theme_light(浅色) theme(panel.background = element_rect(fill = "transparent"),#elemet_rect函数是用于对矩形元素的相关参数设置(如背景background) panel.grid.major = element_blank, #grid是指网络,panel.grid.major是一个参数,用于设置主要网络线的颜色、类型、粗细,而element_blank是指隐藏 panel.grid.minor = element_blank, axis.text = element_text(size = 6, colour = 'black'), axis.title.x = element_text(size = 8, colour = 'black'), axis.title.y = element_text(size = 8, colour = 'black'), strip.background = element_rect(fill = '#B6D1FA'), #strip.background和strip.text都是针对分面facet的设置(背景和文本) strip.text = element_text(colour = 'black',size = 6,lineheight = 1), legend.position = 'none')
``` #5.文件导出
```{R}
ggsave(p1,file = "./output/risk_factor_male.pdf", units = 'cm', height = 15, width = 25 ) ```