########################################################################################
## Created by: George Gardner, ICF 
## Date Created: 6/17/22
## Get HH projections from SEDAC at the state level
########################################################################################
rm(list = ls(all = TRUE)) #first, clear R's workspace

library(readxl)
library(openxlsx)
library(dplyr)

setwd('C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Wetland MRM - R Code/')

# SEDAC path
SEDAC_path <- "C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/National Analysis/Population Projections/SEDAC/SEDAC_georeferenced_county_population_proj_excel files"

options(stringsAsFactors=FALSE)
pacman::p_load(dplyr, tidycensus, readxl, tidyverse, labelled, units)

## STEP 1: read in data.

# read in SEDAC data
SEDAC_pop <- read_xlsx(paste0(SEDAC_path, "/hauer_county_totpop_SSPs.xlsx"))
names(SEDAC_pop)[names(SEDAC_pop)=="GEOID10"] <- "GEOID"

SEDAC_pop <- SEDAC_pop[, c(1:6, 20, 25, 30, 35, 40, 45)]
SEDAC_pop <- SEDAC_pop %>%
  group_by(STATEFP10) %>%
  summarise(ssp22020=sum(ssp22020),
            ssp22025=sum(ssp22025),
            ssp22030=sum(ssp22030),
            ssp22035=sum(ssp22035),
            ssp22040=sum(ssp22040),
            ssp22045=sum(ssp22045))

names(SEDAC_pop)[names(SEDAC_pop)=="STATEFP10"] <- "GEOID"

# STEP 2: Get persons per household data ("DP02_0016E") using 2019 ACS

# ACS Call Variables
years <- c(2019)
geography <- "state"

# Activate census API key
census_api_key("a37705549debf5d0b8e43a4fa62c7eb552f928bc")

pph <- get_acs(geography = "state", 
               variables = "DP02_0016E", 
               year = 2019)

# merge data and remove Hawaii, Puerto Rico, DC.
Data <- left_join(pph, SEDAC_pop)
Data <- subset(Data, NAME != "Hawaii" & NAME != "Puerto Rico" & NAME != "District of Columbia")

i=1
while (i<=nrow(Data)) {
  Data$NAME[i] <- state.abb[grep(Data$NAME[i], state.name)]    # Get state abbreviation for crosswalk
  i <- i+1
  }

pop_matrix <- as.data.frame(pop_matrix)
pop_matrix$state <- state_pop$state_abb

# STEP 3: extract SEDAC projections and fill in missing years using linear extrapolation.
# Divide population projections by persons per household from ACS 2019 to get HHs.

# output SEDAC projections
# Create the matrix for HH data projections using SEDAC SSP2.

HHpop <- matrix(0,nrow(Data),ncol=26) # later remove first and last 3 columns. only added for extrap.
colnames(HHpop) <- c("2020", "2021","2022","2023", "2024","2025","2026","2027","2028", "2029","2030","2031","2032","2033","2034","2035","2036",
                     "2037", "2038","2039","2040","2041","2042","2043","2044","2045")

HHpop[,1] <- Data$ssp22020 # extract SSP2 in year 2020
HHpop[,6] <- Data$ssp22025 # extract SSP2 in year 2025
HHpop[,11] <- Data$ssp22030 # extract SSP2 in year 2030
HHpop[,16] <- Data$ssp22035 # extract SSP2 in year 2035
HHpop[,21] <- Data$ssp22040 # extract SSP2 in year 2040
HHpop[,26] <- Data$ssp22045 # extract SSP2 in year 2045

# need to fill in blanks using linear extrapolation. Take the difference between the
# years and divide by 5 (since estimates are every 5 years). Then add this for each year.

diff_20_25 <- (Data$ssp22025 - Data$ssp22020)/5
diff_25_30 <- (Data$ssp22030 - Data$ssp22025)/5
diff_30_35 <- (Data$ssp22035 - Data$ssp22030)/5
diff_35_40 <- (Data$ssp22040 - Data$ssp22035)/5
diff_40_45 <- (Data$ssp22045 - Data$ssp22040)/5

# start to fill in matrix.
for (i in 1:4) {
  HHpop[,i+1] <- Data$ssp22020 + diff_20_25*i
}

for (i in 1:4) {
  HHpop[,i+6] <- Data$ssp22025 + diff_25_30*i
}

for (i in 1:4) {
  HHpop[,i+11] <- Data$ssp22030 + diff_30_35*i
}

for (i in 1:4) {
  HHpop[,i+16] <- Data$ssp22035 + diff_35_40*i
}

for (i in 1:4) {
  HHpop[,i+21] <- Data$ssp22040 + diff_40_45*i
}

HHpop <- HHpop[, c(4:23)]

# now divide by the number of persons per household to get HH projections
HHpop <- HHpop/Data$estimate

HHpop <- as.data.frame(HHpop) # must convert to dataframe to add character type variable
HHpop$state <- Data$NAME # add state abbreviation to merge with crosswalk and get correct numbering/order.

# STEP 4: Merge with the state numbering crosswalk to get the data in the correct order, then write to csv and rda.
crosswalk <- read_xlsx("./data/State_Numbering.xlsx")

HHpop <- left_join(crosswalk, HHpop)

# convert back to matrix
HHpop <- as.matrix(HHpop[, c(3:22)])

# STEP 5: write to file.

write.csv(HHpop,"./data/HHdataFull_SEDAC.csv")
save(HHpop,file = "./data/HHdataFull_SEDAC.rda")
