########################################################################################
## Created by: George Gardner, ICF 
## Date Created: 7/29/22
## Modifies state input data from USdata_R7 to include temporary and permanent wetland
## impacts separated by NWPR and Rapanos. Uses proportion of baseline wetlands local
## and proportion of impacts local (within 30-, or 50-miles out of the 100-mile boundary).
########################################################################################

rm(list = ls(all = TRUE)) #first, clear R's workspace

library(readxl)
library(openxlsx)
library(dplyr)

# set working directory to keep file paths short
setwd('C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Wetland MRM - R Code/')

# STEP 1: Read in state input data, state abbreviation and numbering crosswalk, and impact spreadsheet.

load("./data/USdata_R7.rda")

USdata <- USdata[, -c(9:10)]
colnames(USdata) <- c("statenum", "lninc", "sagulf", "nema", "nmw", "propfor", "proploc", "q0")

crosswalk <- read.xlsx(xlsxFile = "./data/State_Numbering.xlsx", colNames = TRUE)

ORM_path <- "C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/ORM Data Processing/Outputs/"
orm <- read_excel(path = paste0(ORM_path, "FINAL_404_ImpactChanges_2021.09.23.v2.xlsx"), sheet = "Impacts_By_STATE")
orm <- orm[, c(1, 4, 7, 10, 13)]
orm <- rename(orm, state = STATE_FINAL)

# STEP 2: convert HH income from 2020$ to 2021$ using BEA GDP deflator.
GDP <- read.csv("C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Meta-data/Metadata Data Extraction - March 2022/BEA_GDP_Deflator_Feb2022.csv")
conv <- GDP$Gross.domestic.product[32]/GDP$Gross.domestic.product[31]

USdata$lninc <- log(exp(USdata$lninc) * conv)

# STEP 3: Combine data.

USdata <- left_join(USdata, crosswalk)
USdata <- left_join(USdata, orm)
USdata <- USdata[, c(1:8, 12, 13, 10, 11, 9)]

# STEP 4: Make a new version of the data with state-specific settings for
# the proportion of local wetlands using 50-mile radius assumption.
prop_loc <- read.xlsx("./data/StateAverages_PropLocal_PropLocalImpacts_11-01-22.xlsx")
colnames(prop_loc)[colnames(prop_loc) == 'stusps'] <- 'state'

USdata <- left_join(USdata, prop_loc)

# since we don't have estimate for Alaska, we assume proportions local = national avgs
USdata[1, 14] <- mean(prop_loc$avg.prop.local_100_30)
USdata[1, 15] <- mean(prop_loc$avg.prop.local_100_50)
USdata[1, 16] <- mean(prop_loc$avg.prop.local.impacts.NWPR.perm_100_30)
USdata[1, 17] <- mean(prop_loc$avg.prop.local.impacts.Rapanos.perm_100_30)
USdata[1, 18] <- mean(prop_loc$avg.prop.local.impacts.NWPR.temp_100_30)
USdata[1, 19] <- mean(prop_loc$avg.prop.local.impacts.Rapanos.temp_100_30)
USdata[1, 20] <- mean(prop_loc$avg.prop.local.impacts.NWPR.perm_100_50)
USdata[1, 21] <- mean(prop_loc$avg.prop.local.impacts.Rapanos.perm_100_50)
USdata[1, 22] <- mean(prop_loc$avg.prop.local.impacts.NWPR.temp_100_50)
USdata[1, 23] <- mean(prop_loc$avg.prop.local.impacts.Rapanos.temp_100_50)

# STEP 5: Use the updated wetland impacts
orm_update <- read.csv(paste0(ORM_path, "Impacts_By_STATE_2022.07.28.csv"))
USdata <- left_join(USdata, orm_update, by=c("state" = "STATE_FINAL"))
USdata$PERM_TOTAL_NWPR.x <- USdata$PERM_TOTAL_NWPR.y
USdata$PERM_TOTAL_Rapanos.x <- USdata$PERM_TOTAL_Rapanos.y
USdata$TEMP_TOTAL_NWPR.x <- USdata$TEMP_TOTAL_NWPR.y
USdata$TEMP_TOTAL_Rapanos.x <- USdata$TEMP_TOTAL_Rapanos.y
USdata <- USdata[,-c(24:41)]

names(USdata)[names(USdata)=="PERM_TOTAL_NWPR.x"] <- "PERM_TOTAL_NWPR"
names(USdata)[names(USdata)=="PERM_TOTAL_Rapanos.x"] <- "PERM_TOTAL_Rapanos"
names(USdata)[names(USdata)=="TEMP_TOTAL_NWPR.x"] <- "TEMP_TOTAL_NWPR"
names(USdata)[names(USdata)=="TEMP_TOTAL_Rapanos.x"] <- "TEMP_TOTAL_Rapanos"

save(USdata, file = "./data/USdata_Rev5.rda")
openxlsx::write.xlsx(USdata, file = './data/USdata_Rev5.xlsx')
