APC模型
读取相关R包
library(magrittr)
library(dplyr)
library(tidyverse)
library(data.table)
source('source_apc.R')读取人口学数据
dir <- "path/to/GBD_Population"
files <- list.files(dir,
pattern = ".CSV",
full.names = T)
GBD_population <- map_dfr(files,fread) %>% as.data.frame()## 对人口学数据进行整理
## 取我们需要的列名
GBD_population <- GBD_population %>%
select(location_name,sex_name,age_group_name,year_id,metric_name,val)
## 采用case_when函数 对sex的标签进行重新命名
GBD_population <- GBD_population %>% mutate(sex_name = case_when(
sex_name == "both" ~ "Both",
sex_name == "male" ~ "Male",
sex_name == "female" ~ "Female"))
names(GBD_population) <- c("location","sex","age","year","metric","val")整理疾病数据
## 对疾病数据进行整理
case <- read.csv('糖尿病.csv')
# 按照下面条件筛选数据
DM <- case %>%
filter(measure=="Prevalence") %>%
filter(cause!="Diabetes mellitus") %>%
filter(metric=="Number") %>%
filter(age!="All Ages")匹配筛选人口数据
## 合并疾病和人口学数据
df <- left_join(DM,GBD_population,by=c("location","sex","age","metric","year"))
colnames(df)[8] <- "prevalence"
colnames(df)[11] <- "population"
df <- df %>%
mutate(population=round(population,digits = 0)) %>%
mutate(prevalence=round(prevalence,digits = 0)) %>%
select(location,sex,age,cause,year,prevalence,population) %>%
arrange(location,sex,age,cause,year) # 筛选不同年龄组的数据
ages <- c("15 to 19","20 to 24","25 to 29","30 to 34", "35 to 39", "40 to 44", "45 to 49", "50 to 54", "55 to 59",
"60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85 to 89", "90 to 94",'95 plus')
df <- df %>% filter(prevalence > 0 &
age %in% ages) %>% #删除缺失值
mutate(age=sub('95 plus',replacement = '95 to 99', age)) %>%
mutate(age=factor(age,levels = ages,ordered = T))
## 对year重新命名
df <- df %>% mutate(year=case_when(
year<1995 ~ "1990~1994",
year<2000 & year>=1995 ~ "1995~1999",
year<2005 & year>=2000 ~ "2000~2004",
year<2010 & year>=1995 ~ "2005~2009",
year<2015 & year>=2000 ~ "2010~2014",
year<2020 & year>=2015 ~ "2015~2019"))
df2 <- df %>%
group_by(location,sex,cause,age,year) %>%
mutate(case=round(mean(prevalence),digits = 0)) %>%
mutate(population2=round(mean(population),digits = 0)) %>%
select(location,sex,age,cause,year,case,population2) %>%
unique()## 挑选除全球、男性、不同年龄段的、不同年份的数据
## 将数据转换成行为年龄,列为年的数据
apc_case <- df2 %>% filter(location=="Global" & sex== "Male")
apc_case_preval <- reshape2::dcast(apc_case,age~year,value.var = 'case') %>%
arrange(age) %>%
select(-1)
## 将数据转换成行为年龄,列为年的数据
apc_case_pop <- reshape2::dcast(apc_case,age~year,value.var = 'population2') %>%
arrange(age) %>%
select(-1)
head(apc_case_preval)课程示例来源 1990~1994 1995~1999 2000~2004 2005~2009 2010~2014 2015~2019
课程示例来源 1 446019 511722 617877 742345 834629 918286
课程示例来源 2 1333600 1415518 1621238 2155945 2696779 2807725
课程示例来源 3 2524464 2863499 3085007 3632505 4723582 5665627
课程示例来源 4 3566148 4384714 4999932 5591028 6546156 8364953
课程示例来源 5 5095827 5781665 7123508 8411145 9453526 11195830
课程示例来源 6 6395007 7649305 8821923 11244421 13335295 15163028
head(apc_case_pop)课程示例来源 1990~1994 1995~1999 2000~2004 2005~2009 2010~2014 2015~2019
课程示例来源 1 263489077 275964699 305287790 320884895 312624286 314526879
课程示例来源 2 252444621 254270673 266409630 295459239 311510043 304807073
课程示例来源 3 234080057 249773178 251850669 263859000 291488843 307325657
课程示例来源 4 203974876 233248699 248120178 252430916 264438465 291326984
课程示例来源 5 186252870 202377891 231761665 247151662 252004962 263678889
课程示例来源 6 158058040 182004469 197196451 226075241 241663285 246950961
## 将数据整理成 APC网页版需要的数据格式:疾病和人口数据交替排列
for (i in 1:(ncol(apc_case_preval)+ncol(apc_case_pop))){
if(i == 1){
result <-apc_case_preval[,i] %>% as.data.frame()}
else{
if(i%%2==0){
result <- cbind(result,apc_case_pop[,ceiling(i/2)])}
else{
result <- cbind(result,apc_case_preval[,ceiling(i/2)])}
}
}
head(result)课程示例来源 . apc_case_pop[, ceiling(i/2)] apc_case_preval[, ceiling(i/2)]
课程示例来源 1 446019 263489077 511722
课程示例来源 2 1333600 252444621 1415518
课程示例来源 3 2524464 234080057 2863499
课程示例来源 4 3566148 203974876 4384714
课程示例来源 5 5095827 186252870 5781665
课程示例来源 6 6395007 158058040 7649305
课程示例来源 apc_case_pop[, ceiling(i/2)] apc_case_preval[, ceiling(i/2)]
课程示例来源 1 275964699 617877
课程示例来源 2 254270673 1621238
课程示例来源 3 249773178 3085007
课程示例来源 4 233248699 4999932
课程示例来源 5 202377891 7123508
课程示例来源 6 182004469 8821923
课程示例来源 apc_case_pop[, ceiling(i/2)] apc_case_preval[, ceiling(i/2)]
课程示例来源 1 305287790 742345
课程示例来源 2 266409630 2155945
课程示例来源 3 251850669 3632505
课程示例来源 4 248120178 5591028
课程示例来源 5 231761665 8411145
课程示例来源 6 197196451 11244421
课程示例来源 apc_case_pop[, ceiling(i/2)] apc_case_preval[, ceiling(i/2)]
课程示例来源 1 320884895 834629
课程示例来源 2 295459239 2696779
课程示例来源 3 263859000 4723582
课程示例来源 4 252430916 6546156
课程示例来源 5 247151662 9453526
课程示例来源 6 226075241 13335295
课程示例来源 apc_case_pop[, ceiling(i/2)] apc_case_preval[, ceiling(i/2)]
课程示例来源 1 312624286 918286
课程示例来源 2 311510043 2807725
课程示例来源 3 291488843 5665627
课程示例来源 4 264438465 8364953
课程示例来源 5 252004962 11195830
课程示例来源 6 241663285 15163028
课程示例来源 apc_case_pop[, ceiling(i/2)]
课程示例来源 1 314526879
课程示例来源 2 304807073
课程示例来源 3 307325657
课程示例来源 4 291326984
课程示例来源 5 263678889
课程示例来源 6 246950961
## APC模型进一步处理
R <- prepare_rates(result,
StartYear=1990,StartAge=15,Interval=5,
fullname='',description='')
## APC模型计算
M <- apc2fit(R)
## 画图
plot.apc1(M)decomposition analysis
将疾病发病数/死亡数/DALY转分解成3个因素:人口数变化、年龄结构变化、疾病本身流行病学改变
R包读取
library(dplyr)
library(data.table)
library(purrr)
library(tidyr)
library(ggplot2)
library(ggsci)读取疾病数据
case <- read.csv('EC_predict.csv')
ages <- c("20 to 24", "25 to 29",
"30 to 34", "35 to 39", "40 to 44", "45 to 49", "50 to 54", "55 to 59",
"60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85 to 89",
"90 to 94", "95 plus")
## sub函数:将年龄的label,比如"15-19 years"转换成“15 to 19”,以匹配人口学数据和疾病数据
case <- case %>% mutate(age=sub('-',replacement = ' to ', age)) %>%
mutate(age=sub(' years',replacement = '', age)) %>%
mutate(age=sub(' year',replacement = '', age)) %>%
mutate(age=sub('95\\+',replacement = '95 plus', age)) %>%
mutate(age=sub('Age to standardized',replacement = 'Age-standardized', age)) %>%
mutate(age=sub('<5',replacement = 'Under 5', age)) %>%
filter(val>0) %>%
filter(age %in% ages) %>%
mutate(age = factor(age,levels = ages, ordered = T))
str(case)课程示例来源 'data.frame': 34560 obs. of 10 variables:
课程示例来源 $ measure : chr "Deaths" "Deaths" "Deaths" "Deaths" ...
课程示例来源 $ location: chr "Global" "Global" "Global" "Global" ...
课程示例来源 $ sex : chr "Male" "Female" "Both" "Male" ...
课程示例来源 $ age : Ord.factor w/ 16 levels "20 to 24"<"25 to 29"<..: 1 1 1 1 1 1 2 2 2 2 ...
课程示例来源 $ cause : chr "Esophageal cancer" "Esophageal cancer" "Esophageal cancer" "Esophageal cancer" ...
课程示例来源 $ metric : chr "Number" "Number" "Number" "Rate" ...
课程示例来源 $ year : int 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
课程示例来源 $ val : num 204.0673 211.5666 415.6338 0.0822 0.0866 ...
课程示例来源 $ upper : num 233.1789 251.6184 473.4528 0.0939 0.103 ...
课程示例来源 $ lower : num 147.8799 148.4676 308.4978 0.0596 0.0608 ...
读取人口学数据
dir <- "path/to/GBD_Population"
files <- list.files(dir,
pattern = ".CSV",
full.names = T)
## 这里我们只需要2年:2019和1990年,我们需要比较2019年和1990的情况
GBD_population <- map_dfr(files[c(1,30)],fread) %>% as.data.frame()
GBD_population <- GBD_population %>% mutate(sex_name = case_when(
sex_name == "both" ~ "Both",
sex_name == "male" ~ "Male",
sex_name == "female" ~ "Female"))
population <- GBD_population %>%
select("location_name","sex_name","year_id","age_group_name","val") %>%
filter(age_group_name %in% ages) %>%
mutate(age_group_name = factor(age_group_name, levels = ages,ordered = T))
str(population)课程示例来源 'data.frame': 64800 obs. of 5 variables:
课程示例来源 $ location_name : chr "Global" "Global" "Global" "Global" ...
课程示例来源 $ sex_name : chr "Male" "Male" "Male" "Male" ...
课程示例来源 $ year_id : int 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
课程示例来源 $ age_group_name: Ord.factor w/ 16 levels "20 to 24"<"25 to 29"<..: 1 2 3 4 5 6 7 8 9 10 ...
课程示例来源 $ val : num 2.48e+08 2.23e+08 1.95e+08 1.79e+08 1.46e+08 ...
我们以单个国家给大家进行讲解
分解分析数据准备
## 这里,我们以全球男性发病数据为例,进行decomposition的讲解
## 获取1990年不同年龄段的占比
Global_population_1990 <- population %>%
filter(location_name == 'Global' &
year_id == 1990 &
sex_name == 'Male') %>%
arrange(age_group_name) ### 按年龄从小到大排序
Global_1990 <- sum(Global_population_1990$val)
Global_population_1990$percent <- Global_population_1990$val/Global_1990 ### 获取1990年不同年龄段的占比
## 获取2019年不同年龄段的占比
Global_population_2019 <- population %>%
filter(location_name == 'Global' &
year_id == 2019 &
sex_name == 'Male') %>%
arrange(age_group_name) ### 按年龄从小到大排序
Global_2019 <- sum(Global_population_2019$val)
Global_population_2019$percent <- Global_population_2019$val/Global_2019 ### 获取2019年不同年龄段的占比
### a代表年龄占比,p代表总人群数
a_1990 <- Global_population_1990$percent
a_2019 <- Global_population_2019$percent
p_1990 <- Global_1990
p_2019 <- Global_2019
## 获取率的数据
case_1990 <- case %>% filter(year == 1990 &
sex == 'Male' &
location == 'Global' &
metric == 'Rate' &
measure == 'Incidence') %>%
arrange(age)
case_2019 <- case %>% filter(year == 2019 &
sex == 'Male' &
location == 'Global' &
metric == 'Rate' &
measure == 'Incidence') %>%
arrange(age)
r_1990 <- as.numeric(case_1990$val)/10^5 ## 单位转换
r_2019 <- as.numeric(case_2019$val)/10^5 ## 单位转换decomposition结果计算
计算原理与公式 疾病数的计算原理
分解分析的原理:
## 根据公式计算
a_effect <- round((sum(a_2019*p_1990*r_1990) + sum(a_2019*p_2019*r_2019))/3 +
(sum(a_2019*p_1990*r_2019) + sum(a_2019*p_2019*r_1990))/6 -
(sum(a_1990*p_1990*r_1990) + sum(a_1990*p_2019*r_2019))/3 -
(sum(a_1990*p_1990*r_2019) + sum(a_1990*p_2019*r_1990))/6,3)
p_effect <- round((sum(a_1990*p_2019*r_1990) + sum(a_2019*p_2019*r_2019))/3 +
(sum(a_1990*p_2019*r_2019) + sum(a_2019*p_2019*r_1990))/6 -
(sum(a_1990*p_1990*r_1990) + sum(a_2019*p_1990*r_2019))/3 -
(sum(a_1990*p_1990*r_2019) + sum(a_2019*p_1990*r_1990))/6,3)
r_effect <- round((sum(a_1990*p_1990*r_2019) + sum(a_2019*p_2019*r_2019))/3 +
(sum(a_1990*p_2019*r_2019) + sum(a_2019*p_1990*r_2019))/6 -
(sum(a_1990*p_1990*r_1990) + sum(a_2019*p_2019*r_1990))/3 -
(sum(a_1990*p_2019*r_1990) + sum(a_2019*p_1990*r_1990))/6,3)
overll_differ <- round(a_effect + p_effect + r_effect,3)循环计算6个地区的结果
## decomposition calculation
decomposition_name <- c('location','overll_difference','a_effect','p_effect','r_effect','a_percent',
'p_percent','r_percent')
decomposition_data <- as.data.frame(matrix(nrow=0,ncol=length(decomposition_name)))
names(decomposition_data) <- decomposition_name
loc_name <- c("Global","High SDI","High-middle SDI","Middle SDI","Low-middle SDI","Low SDI")
for (a in loc_name) {
Global_population_1990 <- population %>%
filter(location_name == a &
year_id == 1990 &
sex_name == 'Male') %>%
arrange(age_group_name) ### 按年龄从小到大排序
Global_1990 <- sum(Global_population_1990$val) ## 计算1990人口总数
Global_population_1990$percent <- Global_population_1990$val/Global_1990 ### 获取1990年不同年龄段的占比
## 获取2019年不同年龄段的占比
Global_population_2019 <- population %>%
filter(location_name == a &
year_id == 2019 &
sex_name == 'Male') %>%
arrange(age_group_name) ### 按年龄从小到大排序
Global_2019 <- sum(Global_population_2019$val) ## 计算2019人口总数
Global_population_2019$percent <- Global_population_2019$val/Global_2019 ### 获取2019年不同年龄段的占比
### a代表年龄占比,p代表总人群数
a_1990 <- Global_population_1990$percent
a_2019 <- Global_population_2019$percent
p_1990 <- Global_1990
p_2019 <- Global_2019
## 获取率的数据
case_1990 <- case %>% filter(year == 1990 &
sex == 'Male' &
location == a &
metric == 'Rate' &
measure == 'Incidence') %>%
arrange(age)
case_2019 <- case %>% filter(year == 2019 &
sex == 'Male' &
location == a &
metric == 'Rate' &
measure == 'Incidence') %>%
arrange(age)
r_1990 <- as.numeric(case_1990$val)/10^5 ## 单位转换
r_2019 <- as.numeric(case_2019$val)/10^5 ## 单位转换
##### 根据公式计算
a_effect <- round((sum(a_2019*p_1990*r_1990) + sum(a_2019*p_2019*r_2019))/3 +
(sum(a_2019*p_1990*r_2019) + sum(a_2019*p_2019*r_1990))/6 -
(sum(a_1990*p_1990*r_1990) + sum(a_1990*p_2019*r_2019))/3 -
(sum(a_1990*p_1990*r_2019) + sum(a_1990*p_2019*r_1990))/6,3)
p_effect <- round((sum(a_1990*p_2019*r_1990) + sum(a_2019*p_2019*r_2019))/3 +
(sum(a_1990*p_2019*r_2019) + sum(a_2019*p_2019*r_1990))/6 -
(sum(a_1990*p_1990*r_1990) + sum(a_2019*p_1990*r_2019))/3 -
(sum(a_1990*p_1990*r_2019) + sum(a_2019*p_1990*r_1990))/6,3)
r_effect <- round((sum(a_1990*p_1990*r_2019) + sum(a_2019*p_2019*r_2019))/3 +
(sum(a_1990*p_2019*r_2019) + sum(a_2019*p_1990*r_2019))/6 -
(sum(a_1990*p_1990*r_1990) + sum(a_2019*p_2019*r_1990))/3 -
(sum(a_1990*p_2019*r_1990) + sum(a_2019*p_1990*r_1990))/6,3)
overll_differ <- round(a_effect + p_effect + r_effect,2)
a_percent <- round(a_effect/overll_differ*100,2)
p_percent <- round(p_effect/overll_differ*100,2)
r_percent <- round(r_effect/overll_differ*100,2)
temp <- c(a,overll_differ,a_effect,p_effect,r_effect,a_percent,
p_percent,r_percent) %>% t() %>% as.data.frame()
names(temp) <- decomposition_name
decomposition_data <- rbind(decomposition_data,temp)
}判断结果计算是否正确
data <- read.csv("EC_predict.csv")
num_1990 <- data %>% filter(age == 'All ages' &
sex == 'Male' &
metric == 'Number' &
year == 1990 &
measure == 'Incidence') %>%
select(location, val) %>%
rename(val_1990 = val)
num_2019 <- data %>% filter(age == 'All ages' &
sex == 'Male' &
metric == 'Number' &
year == 2019 &
measure == 'Incidence') %>%
select(location, val) %>%
rename(val_2019 = val)
decomposition_data <- left_join(decomposition_data,num_1990, by = 'location') %>%
left_join(num_2019, by = 'location')
decomposition_data$diff <- decomposition_data$val_2019 - decomposition_data$val_1990
decomposition_data[,2:11] <- decomposition_data[,2:11] %>% apply(c(1,2),as.numeric)
round(decomposition_data$diff) == round(decomposition_data$overll_difference)课程示例来源 [1] TRUE TRUE TRUE TRUE TRUE TRUE
decomposition作图
names(decomposition_data)[2:5] <- c('Overll difference','Aging','Population','Epidemiological change')
decomposition_plot <- decomposition_data[c(1:5)] %>%
pivot_longer(Aging:`Epidemiological change`,
names_to = "varname",
values_to = "value") %>%
mutate(value=as.numeric(value)) %>%
mutate(location=factor(location,levels = loc_name,ordered = T))
p <- ggplot(decomposition_plot, aes(x= location,y=value, fill= varname)) +
geom_bar(stat="identity",position = "stack") +
coord_flip() + scale_fill_nejm() +
theme_bw()
decomposition_data$`Overll difference` <- as.numeric(decomposition_data$`Overll difference`)
plot <- p+ geom_point(data=decomposition_data, mapping=aes(x=location,y= `Overll difference`),fill='black',color='black',size=3)Frontier analysis
载入R包
library(dplyr)
library(data.table)
library(purrr)
library(tidyr)
library(ggplot2)
library(ggrepel)数据准备
case <- read.csv('frontier.csv',header = T)
SDI <- read.csv('SDI1990-2019modified.csv',header = T)## 合并SDI值
frontier_data <- case %>%
filter(age=='Age-standardized' &
metric== 'Rate' &
sex=='Both' &
measure=='DALYs (Disability-Adjusted Life Years)') %>%
select(2,7,8) %>% rename(ASR = val)
frontier_SDI <- left_join(frontier_data,SDI,by=c('location','year'))frontier思路
FDH方法是指按SDI、ASR后取每个位置的最小值,然后选取其中的点用水平线和垂直线连接,让所有的点都在这条线内部,但这个方法算出来的点是没有误差的,所以我们需要引入误差,采用重抽样方法来产生误差,同时我们需要找出每个点是不是super-efficient点,并进行排除。
具体思路是每个循环内:
1.有放回抽样形成一个新的数据
2.新数据内分别删除一个点画frontier线,按这个frontier线判断这个点是否落在这个线以内,如果是,不需要排除,如果判断这个点的值比frontier值低,认定是super-efficient点,需要进行排除;依次循环确认每个点是否是super-efficient点。
3.在新的数据里排除super-efficient点后,我们再对剩下的点计算每个SDI值的frontier。
以上方法为一个boostrap,1000次就是重复上述操作1000次
这里开始我们的假设:随着SDI的增大,疾病的ASR应该是逐渐降低的,因为SDI越高,国家的医疗水平更高,DALY率也越低
boostrap_DEA <- as.data.frame(matrix(nrow=0,ncol=6))
names(boostrap_DEA) <- c(names(frontier_SDI),'frontier','super')
boostrap_num <- 1 ### boostrap次数
for(interation_number in 1:boostrap_num){
boot_sample <- frontier_SDI[sample(1:nrow(frontier_SDI),nrow(frontier_SDI),replace = TRUE),] ## 有放回抽样,抽样nrow(frontier_SDI)次
boot_sample <- boot_sample %>% arrange(SDI,desc(ASR)) ## 我们对SDI从小到大排列、ASR从大到小排列进行排列
boot_sample$super <- NA
boot_sample$super[1] <- 0 ## super变量是用来判断是否是super-efficient
for (i in 2:nrow(frontier_SDI)) {
data <- boot_sample[-i,] ## 对每个点进行排除,判断是否是super-efficient
data$frontier <- NA ## 产生frontier变量
min <- data$ASR[1] ## ASR已从大到小排列,因此第一点是默认的frontier点
for (j in 1:(i-1)) { ## 为了节约计算空间,我们只需要对剔除点以前的数据生成frontier值来判断这个点是否是super-efficient
min <- ifelse(data$ASR[j]<min,data$ASR[j],min)
data$frontier[j] <- min} ## 判断该点是否比frontier值小,如果是,成为新的frontier
boot_sample$super[i] <- ifelse(boot_sample[i,1]==boot_sample[i+1,1] & boot_sample[i,2]==boot_sample[i+1,2],0, ifelse(boot_sample$ASR[i]<data$frontier[i-1],0,1)) ## 判断排除的点是否是super-efficient,两种情况,一种是排除的点和后面的点是同一个值,那就默认不是super-efficient;如果不和后一个点一样,就判断ASR和前一个点的frontier的大小,如果小于frontier,就认为是super-efficient
}
## 排除super-efficient后计算每个点的frontier值
boot_sample_exclude <- boot_sample[boot_sample$super==0,]
min <- boot_sample_exclude$ASR[1]
for (z in 1:nrow(boot_sample_exclude)){
min <- ifelse(boot_sample_exclude$ASR[z]<min,boot_sample_exclude$ASR[z],min)
boot_sample_exclude$frontier[z] <- min
}
boostrap_DEA <- rbind(boostrap_DEA,boot_sample_exclude)
boostrap_DEA <-boostrap_DEA %>% group_by(location,year,ASR,SDI)%>%summarize(frontier=mean(frontier))
print(interation_number)
}课程示例来源 [1] 1
#save(boostrap_DEA,file = 'boostrap_DEA.Rdata')load(file='boostrap_DEA.Rdata')
boostrap_DEA <- boostrap_DEA %>%
mutate(eff_diff = ASR - frontier)
boostrap_DEA_1990 <- boostrap_DEA %>%
filter(year == 1990)
boostrap_DEA_2019 <- boostrap_DEA %>%
filter(year == 2019)
boostrap_DEA_2019$trend <- ifelse(boostrap_DEA_2019$ASR > boostrap_DEA_1990$ASR, "Increase",
"Decrease")绘制示例论文中的图片
制作图A
plotA <- ggplot(boostrap_DEA, aes(SDI,ASR)) + geom_point(aes(color = year),size=1.8)+
scale_x_continuous(breaks = c(0,0.2,0.4,0.6,0.8,1.0),limits = c(0,1)) +
scale_y_reverse() + ## y轴旋转
scale_color_gradient(low='orange',high='darkgreen') + ## 连续性变量配色
stat_smooth(data=boostrap_DEA, aes(SDI,frontier),colour='black',formula=y ~ poly(x, 1),
stat = "smooth",method='loess',se=F,span=0.2,fullrange=T) + ## 绘制frontier线
theme_bw()制作图B
black <- boostrap_DEA_2019[order(boostrap_DEA_2019$eff_diff,decreasing = T),][1:15,] ## 获取距离差最大的15个点
blue <- subset(boostrap_DEA_2019,SDI<0.5)[order(subset(boostrap_DEA_2019,SDI<0.5)$eff_diff),][1:5,] ## 低SDI国家中距离差最小的5个国家
red <- subset(boostrap_DEA_2019,SDI>0.85)[order(subset(boostrap_DEA_2019,SDI>0.85)$eff_diff,decreasing = T),][1:5,] ## 高SDI国家中距离差最大的5个国家
plotB <- ggplot(boostrap_DEA_2019, aes(SDI,ASR)) + geom_point(aes(color = trend),size=2.5)+
scale_x_continuous(breaks = c(0,0.2,0.4,0.6,0.8,1.0),limits = c(0,1)) + ## x轴标度设定
scale_y_reverse() + ## y轴调转
stat_smooth(data=boostrap_DEA, aes(SDI,frontier),colour='black',formula=y ~ poly(x, 1),
stat = "smooth",method='loess',se=F,span=0.2,fullrange=T) + ## 画frontier拟合线
geom_text_repel(data=black,colour='black',aes(SDI,ASR, label = location),size=2.5,fontface= 'bold',max.overlaps = 160) + ##添加距离差最大的15个地区名称,并标记在对应的点上
geom_text_repel(data=red,colour='darkred',aes(SDI,ASR, label = location),size=2.5,fontface= 'bold',max.overlaps = 160) + ##添加高SDI国家中距离差最大的5个国家,并标记在对应的点上,用红色字体现实
geom_text_repel(data=blue,colour='darkblue',aes(SDI,ASR, label = location),size=2.5,fontface= 'bold',max.overlaps = 160) + ##添加低SDI国家中距离差最小的5个国家,并标记在对应的点上,用蓝色字体现实
theme_bw()参考文献
- A Web Tool for Age-Period-Cohort Analysis of Cancer Incidence and Mortality Rates. Cancer Epidemiology
- Analysis of the Global Burden of Disease study highlights the global, regional, and national trends of chronic kidney disease epidemiology from 1990 to 2016
- Healthcare Access and Quality Index based on mortality from causes amenable to personal health care in 195 countries and territories, 1990-2015: a novel analysis from the Global Burden of Disease Study 2015