library(magrittr)
library(dplyr)
library(tidyverse)
library(data.table)
#install.packages('')


dir <- "path/to/GBD_Population"
files <- list.files(dir,
                    pattern = ".CSV",
                    full.names = T)
GBD_population <- map_dfr(files,fread) %>% as.data.frame()

unique(GBD_population$year_id)
unique(GBD_population$age_group_name)

names(GBD_population)
## 对人口学数据进行整理
## 取我们需要的列名
GBD_population <- GBD_population %>% 
  select(location_name,sex_name,age_group_name,year_id,metric_name,val) 
## 采用case_when函数 对sex的标签进行重新命名
GBD_population <- GBD_population %>% mutate(sex_name = case_when(
  sex_name == "both"  ~ "Both",
  sex_name == "male"  ~ "Male",
  sex_name == "female" ~ "Female"))

names(GBD_population) <- c("location","sex","age","year","metric","val")

###
## 对疾病数据进行整理
case <- read.csv('糖尿病.csv')
# 按照下面条件筛选数据
DM <- case %>%
  filter(measure=="Prevalence") %>% 
  filter(cause!="Diabetes mellitus") %>%
  filter(metric=="Number") %>% 
  filter(age!="All Ages")

## 合并疾病和人口学数据
df <- left_join(DM,GBD_population,by=c("location","sex","age","metric","year"))

colnames(df)[8] <- "prevalence"
colnames(df)[11] <- "population"


df <- df %>% 
  mutate(population=round(population,digits = 0)) %>% 
  mutate(prevalence=round(prevalence,digits = 0)) %>% 
  select(location,sex,age,cause,year,prevalence,population) %>% 
  arrange(location,sex,age,cause,year) 


# 筛选不同年龄组的数据
ages <- c("15 to 19","20 to 24","25 to 29","30 to 34", "35 to 39", "40 to 44", "45 to 49", "50 to 54", "55 to 59",        
          "60 to 64", "65 to 69", "70 to 74", "75 to 79", "80 to 84", "85 to 89", "90 to 94",'95 to 99')  

df <- df %>% filter(prevalence > 0) %>% #删除缺失值
  mutate(age=sub('95 plus',replacement = '95 to 99', age)) %>% 
  mutate(age=factor(age,levels = ages,ordered = T)) 

## 对year重新命名
df <- df %>% mutate(year=case_when(
  year<1995 ~ "1990~1994",
  year<2000 & year>=1995 ~ "1995~1999",
  year<2005 & year>=2000 ~ "2000~2004",
  year<2010 & year>=1995 ~ "2005~2009",
  year<2015 & year>=2000 ~ "2010~2014",
  year<2020 & year>=2015 ~ "2015~2019"))

df2 <- df %>% 
  group_by(location,sex,cause,age,year) %>% 
  mutate(case=round(mean(prevalence),digits = 0)) %>% 
  mutate(population2=round(mean(population),digits = 0)) %>% 
  select(location,sex,age,cause,year,case,population2) %>% 
  unique()


## 挑选除全球、男性、不同年龄段的、不同年份的数据
## 将数据转换成行为年龄，列为年的数据
apc_case <- df2 %>% filter(location=="Global" & sex== "Male")
apc_case_preval <- reshape2::dcast(apc_case,age~year,value.var = 'case') %>% 
  arrange(age) %>% 
  select(-1)
## 将数据转换成行为年龄，列为年的数据
apc_case_pop <- reshape2::dcast(apc_case,age~year,value.var = 'population2') %>% 
  arrange(age) %>% 
  select(-1)

## 将数据整理成 APC网页版需要的数据格式:疾病和人口数据交替排列
for (i in 1:(ncol(apc_case_preval)+ncol(apc_case_pop))){
  if(i == 1){
    result <-apc_case_preval[,i] %>% as.data.frame()}
  else{
    if(i%%2==0){
      result <- cbind(result,apc_case_pop[,ceiling(i/2)])}
    else{
      result <- cbind(result,apc_case_preval[,ceiling(i/2)])}
  }
}
write.csv(result,'apc_web_data.csv',row.names = F,col.names = F)



## long age curve
long_age <- APC_output[["LongAge"]] %>% as.data.frame()
ggplot(data=long_age,aes(x=Age,y=Rate)) +
  geom_line(color='Red') +
  geom_ribbon(aes(Age,ymin=CILo,ymax=CIHi,fill='Red'),alpha=0.1) +
  theme_bw()


## CohortRR
CohortRR <- APC_output[["CohortRR"]] %>% as.data.frame()
ggplot(data=CohortRR,aes(x=Cohort,y=`Rate Ratio`)) +
  geom_line(color='Red') +
  geom_ribbon(aes(Cohort,ymin=CILo,ymax=CIHi,fill='Red'),alpha=0.1) +
  theme_bw() +
  geom_hline(yintercept = 1,linetype = 2, color= 'black') +
  geom_vline(xintercept = CohortRR[which(CohortRR$`Rate Ratio`==1),1],
             linetype = 2, color= 'black')

## periodRR
PeriodRR <- APC_output[["PeriodRR"]] %>% as.data.frame()
ggplot(data=PeriodRR,aes(x=Period,y=`Rate Ratio`)) +
  geom_line(color='Red') +
  geom_ribbon(aes(Period,ymin=CILo,ymax=CIHi,fill='Red'),alpha=0.1) +
  theme_bw() +
  geom_hline(yintercept = 1,linetype = 2, color= 'black') +
  geom_vline(xintercept = PeriodRR[which(PeriodRR$`Rate Ratio`==1),1],
             linetype = 2, color= 'black')


### Local drift and net drift
Local <- APC_output[["LocalDrifts"]] %>% as.data.frame()
Net <- APC_output[["NetDrift"]] %>% as.data.frame()
ggplot(data=Local,aes(x=Age,y=`Percent per Year`)) +
  geom_line(color='Red') + geom_point(color='Red') +
  geom_ribbon(aes(Age,ymin=CILo,ymax=CIHi,fill='Red'),alpha=0.1) +
  theme_bw() +
  geom_hline(yintercept = 0,linetype = 2, color= 'black') +
  geom_hline(data= Net,aes(yintercept = `Net Drift (%/year)`),color='Blue',
             linetype = 1) +
  geom_hline(data= Net,aes(yintercept = `CI Lo`),color='Blue',
             linetype = 2) +
  geom_hline(data= Net,aes(yintercept = `CI Hi`),color='Blue',
             linetype = 2)
