####################################################################################################
## ACS Data Collection
####################################################################################################

####################################################################################################################################################
# PURPOSE: Get ACS Population Estimates
# By: Mike McQueen; adapted by Alyssa Le
# Date: 09/13/21
####################################################################################################################################################

#Load packages
library(dplyr)
library(tidycensus)
library(tidyr)
library(purrr)

setwd("C:/Users/50526/ICF/WOTUS Reconsideration - GIS - GIS/Environmental Justice/R_programs/")

##############################################################################################
# PART 1: Determine variables to download
##############################################################################################

# get API key info (you'll need your own key, it's free: https://api.census.gov/data/key_signup.html)
# can reference your API key using this command: Sys.getenv("CENSUS_API_KEY")
# AL: (09/13/21) can comment out this code since the API key has already been installed
# key <- readline(prompt="Enter Census API Key: ")
# census_api_key(key, install=TRUE)

# Identify which years to pull Census data for
years <- c(2019)

# #get list of all variables available in acs for the year of interest
# for (i in 1:length(years)) {
#   ACS_var <- load_variables(years[i], "acs5", cache = T)
#   write.csv(ACS_var, paste0("Outputs/ACS_Var/v",years[i],"_ACS_var.csv"))
# }

# Review the downloaded variable list(s) to determine which variables of interest that you'd like to pull
  # Census block groups
BG_vars <- read.csv("Inputs/BG_vars_v19.csv", fileEncoding = "UTF-8-BOM")
BG_vars_codes <- BG_vars %>% pull(name)
  # Census tracts
Tracts_vars <- read.csv("Inputs/Tracts_vars_v19.csv", fileEncoding = "UTF-8-BOM")
Tracts_vars_codes <- Tracts_vars %>% pull(name)

us <- unique(fips_codes$state)[1:51]

##############################################################################################
# PART 2: Download and clean census ACS data at the block group and tract level
##############################################################################################

### Block group level

#Get this data from ACS at the national level
BG_data <- data.frame()
for (i in 1:length(years)) {
  BG_temp_data <- map_df(us, function(x) {
    get_acs(geography = "block group",
            year = years[i],
            survey = "acs5",
            state = x,
            variables = BG_vars_codes,
            overwrite = T)
  }) %>% mutate(year = years[i])
  BG_data <- rbind(BG_data, BG_temp_data)
}

BG_data_estimate <- BG_data %>%
  select(-NAME, -moe, -year) %>%
  spread(key="variable", value="estimate")

BG_data_estimate$estimate_edu <- BG_data_estimate %>%
  select(B15002_003:B15002_027) %>%
  rowSums(na.rm=T)

BG_data_estimate$estimate_lingiso <- BG_data_estimate %>%
  select(C16002_004:C16002_013) %>%
  rowSums(na.rm=T)

BG_data_final <- BG_data_estimate %>%
  select(-(B15002_003:B15002_027), -(C16002_004:C16002_013))

write.csv(BG_data_final, "Outputs/BG_ACS_WOTUS_2019.csv", row.names=F)

# #Join the descriptions of the variables, splitting out the label info
# #Make adjustments based on your variables of interest
#
#   ### Age
#   BG_age_varnames <- BG_data %>%
#     left_join(BG_vars, by = c("variable" = "name")) %>%
#     filter(concept == "SEX BY AGE") %>%
#     separate(col = label,
#              sep = "!!",
#              into = c("type", "agg", "sex", "age")) %>%
#     select(-type, -agg)
#
#   BG_age_varnames$total <- with(BG_age_varnames,ifelse(is.na(sex)&is.na(age),1,0))
#   BG_age_varnames$minors <- with(BG_age_varnames,ifelse(age=="Under 5 years"|
#                                                             age=="5 to 9 years"|
#                                                             age=="10 to 14 years"|
#                                                             age=="15 to 17 years",1,0))
#   BG_age_varnames$children <- with(BG_age_varnames,ifelse(age=="Under 5 years"|
#                                                             age=="5 to 9 years",1,0))
#   BG_age_varnames$elderly <- with(BG_age_varnames,ifelse(age=="65 and 66 years"|
#                                                            age=="67 to 69 years"|
#                                                            age=="70 to 74 years"|
#                                                            age=="75 to 79 years"|
#                                                            age=="80 to 84 years"|
#                                                            age=="85 years and over",1,0))
#   BG_age_varnames$retiree <- with(BG_age_varnames,ifelse(age=="65 and 66 years"|
#                                                             age=="67 to 69 years"|
#                                                             age=="70 to 74 years",1,0))
#
#   # #Combine populations across sexes and ages
#   # BG_age_agg <- BG_age_varnames %>%
#   #   group_by(GEOID) %>%
#   #   summarize(NAME = first(NAME),
#   #             estimate_total = sum(estimate[total==1]),
#   #             estimate_minors = sum(estimate[minors==1]),
#   #             estimate_children = sum(estimate[children==1]),
#   #             estimate_elderly = sum(estimate[elderly==1]),
#   #             estimate_retiree = sum(estimate[retiree==1]),
#   #             .groups = "drop")
#
#   #Write this data out for use in other scripts
#   write.csv(BG_age_varnames, "Outputs/BG_ACS_Age_2019.csv", row.names = F)
#
#   ### Race
#   BG_race_varnames <- BG_data %>%
#     left_join(BG_vars, by = c("variable" = "name")) %>%
#     filter(concept == "HISPANIC OR LATINO ORIGIN BY RACE") %>%
#     separate(col = label,
#              sep = "!!",
#              into = c("type", "agg", "origin", "race")) %>%
#     select(-type, -agg)
#
#   #Write this data out for use in other scripts
#   write.csv(BG_age_varnames, "Outputs/BG_ACS_Race_2019.csv", row.names = F)

### Tract level

#Get this data from ACS at the national level
tract_data <- data.frame()
for (i in 1:length(years)) {
    tract_temp_data <- map_df(us, function(x) {
      get_acs(geography = "tract",
              year = years[i],
              survey = "acs5",
              state = x,
              variables = Tracts_vars_codes,
              overwrite = T)
      }) %>% mutate(year = years[i])
  tract_data <- rbind(tract_data, tract_temp_data)
}

tract_data_estimate <- tract_data %>%
  select(-NAME, -moe, -year) %>%
  spread(key="variable", value="estimate")

#Write this data out for use in other scripts
write.csv(tract_data_estimate, "Outputs/Tract_ACS_WOTUS_2019.csv", row.names = F)

# #Join the descriptions of the variables, splitting out the label info
# #Make adjustments based on your variables of interest
# tract_data_varnames <- tract_data %>%
#   left_join(Tracts_vars, by = c("variable" = "name")) %>%
#   separate(col = label,
#            sep = "!!",
#            into = c("type", "agg", "sex", "age", "disability")) %>%
#   select(-type, -agg)
#
# tract_data_varnames$total <- with(tract_data_varnames,ifelse(is.na(sex)&is.na(age),1,0))
#
# #Combine populations across sexes and ages
# tract_data_agg <- tract_data_varnames %>%
#   group_by(GEOID) %>%
#   summarize(NAME = first(NAME),
#             estimate_total = sum(estimate[total==1]),
#             estimate_disability = sum(estimate[total==0]),
#             .groups = "drop")

# ##############################################################################################
# # PART 3: Download and clean state ACS data
# ##############################################################################################
#
# #Get this data for each year from ACS
# state_data <- data.frame()
# for (i in 1:length(years)) {
#   state_temp_data <- map_df(us, function(x) {
#     get_acs(geography = "state",
#             year = years[i],
#             survey = "acs5",
#             state = x,
#             variables = relevant_vars_codes,
#             overwrite = T)
#   }) %>% mutate(year = years[i])
#   state_data <- rbind(state_data, state_temp_data)
# }
#
# #Join the descriptions of the variables, splitting out the label info
# state_data_varnames <- state_data %>%
#   left_join(relevant_vars, by = c("variable" = "name")) %>%
#   separate(col = label,
#            sep = "!!",
#            into = c("type", "agg", "sex", "age", "disability")) %>%
#   select(-type, -agg)
#
# state_data_varnames$total <- with(state_data_varnames,ifelse(is.na(sex)&is.na(age),1,0))
#
# #Combine populations across sexes and ages
# state_data_agg <- state_data_varnames %>%
#   group_by(GEOID) %>%
#   summarize(NAME = first(NAME),
#             estimate_total = sum(estimate[total==1]),
#             estimate_disability = sum(estimate[total==0]),
#             .groups = "drop")
#
# #Write this data out for use in other scripts
# write.csv(state_data_agg, "Outputs/State_ACS_Disability_2019.csv", row.names = F)
