# 数据筛选脚本, for joinpoint
# 2022-11-19

library(tidyverse)

# 设定路径
# setwd("D:\\Users\\35111\\Desktop\\课程安排与课件")  # 已注释：请按本地环境设置工作目录

# 读取并查看数据
df <- read.csv("Global_HIV.csv",header = T)
colnames(df)
unique(df$measure)
unique(df$location)
unique(df$sex)
unique(df$age)
unique(df$cause)
unique(df$metric)
unique(df$year)

# 筛选数据：
number <- df |> 
  filter(cause%in%c("HIV/AIDS","Syphilis","Chlamydial infection",
                    "Gonococcal infection","Trichomoniasis",
                    "Genital herpes")) |> 
  filter(age=="All ages") |> 
  filter(metric=="Number") |> 
  filter(measure=="Incidence") |> 
  mutate(val=round(val,0),    #将数据变为整数
         upper=round(upper,0),
         lower=round(lower,0)) |> 
  arrange(cause,sex,year)    #重排序，非常重要


ASR <- df |> 
  filter(cause%in%c("HIV/AIDS","Syphilis","Chlamydial infection",
                    "Gonococcal infection","Trichomoniasis",
                    "Genital herpes")) |> 
  filter(age=="Age-standardized") |> 
  filter(measure=="Incidence") |> 
  filter(metric=="Rate") |> 
  mutate(se=(upper-lower)/(2*1.96)) |>  #生成se
  arrange(cause,sex,year)    #重排序，非常重要

# 写出数据
write.csv(number,"number.csv",row.names = F)
write.csv(ASR,"ASR.csv",row.names = F)