############### WOTUS Wetland Multi-Buffer Analysis  #######################

# Author: Sam Ennett
# Date Created: 05/17/22
# Date Updated: 10/10/22

# Purpose: Combines the various buffers created during the WOTUS project into a single dataset for output 

options(stringsAsFactors=FALSE)

pacman::p_load(dplyr, sf, tidycensus, readxl, openxlsx, tidyverse, parallel, foreach, doParallel)

rm(list=ls())
gc()
memory.limit(size=100000)

# Variables to change before running the code ---------------------------------
Input_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/100 Mile Buffer/Inputs/"
Output_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/100 Mile Buffer/Outputs/"
TAMU_path <- "C:/Users/56407/ICF/EPA-Economic and Water Quality Modeling and Assessment Support - From_TAMU/"
GIS_path <- "C:/Users/56407/ICF/Le, Alyssa - GIS_Data/TIGER/"
ORM_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/ORM Data Processing/Outputs/"
wood_poole_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/National Analysis/Population Projections/Outputs/"

# Load data from 50-mile anlaysis first
load(paste0(Output_path, "50mileWetlandAnalysis_10-5-22.RData"))

# Load data from 200-mile analysis next
load(file = paste0(Output_path, "200mileWetlandAnalysis_All_7-14-22.RData"))

## Summarize HAWQS Wetland Areas by HUC12 -------------------------------------
hru_wetland <- hawqs_hru %>%
  rename(HUC12 = Subbasin) %>%
  group_by(HUC12) %>%
  summarise(TotalWetlandArea = sum(HruArea[Land_Use=="AGWT" | Land_Use=="AGWF" | Land_Use=="RIWF" | Land_Use=="RIWN" | Land_Use=="UPWF" | Land_Use=="UPWN"]),
            ForestWetlandArea = sum(HruArea[Land_Use=="AGWF" | Land_Use=="RIWF" | Land_Use=="UPWF"]))

# Revise dataframe names fro 200 mile data
buff_200_bind <- buff_100_bind %>%
  rename(HUC_RATIO_200 = HUC_RATIO_100)
buff_30_bind_200 <- buff_30_bind

# Load data from 100-mile analysis
load(file = paste0(Output_path, "100mileWetlandAnalysis_All_6-7-22.RData"))

rm(buff_100, buff_int_BC, buff_int)

# Merge Buffers ---------------------------------------------------------------
buff_merge_200_100 <- merge(buff_200_bind %>% select(GEOID, Split_GEOID, HUC12, HUC_RATIO_200),
                            buff_100_bind %>% select(GEOID, Split_GEOID, HUC12, HUC_RATIO_100),
                            by = c("Split_GEOID", "HUC12"), all.x = T) %>%
  select(Split_GEOID, HUC12, HUC_RATIO_200, HUC_RATIO_100)

buff_merge_200_100_50 <- merge(buff_merge_200_100,
                            buff_50_bind %>% select(GEOID, Split_GEOID, HUC12, HUC_RATIO_50),
                            by = c("Split_GEOID", "HUC12"), all.x = T) %>%
  select(Split_GEOID, HUC12, HUC_RATIO_200, HUC_RATIO_100, HUC_RATIO_50)

buff_merge <- merge(buff_merge_200_100_50,
                               buff_30_bind %>% select(GEOID, Split_GEOID, HUC12, HUC_RATIO_30),
                               by = c("Split_GEOID", "HUC12"), all.x = T) %>%
  select(Split_GEOID, HUC12, HUC_RATIO_200, HUC_RATIO_100, HUC_RATIO_50, HUC_RATIO_30)

### Join summarized wetland areas to HUC12 boundaries by HUC12 ----------------
hawqs_wetland_changes_merge <- Reduce(function(x, y) merge(x=x, y=y, by="HUC12", all.x=T),
                                      list(buff_merge, hru_wetland, orm))

## Summarize by HUC12 -------------------------------------------------------------
hawqs_wetland_changes_summary <- hawqs_wetland_changes_merge %>%
  group_by(Split_GEOID) %>%
  units::drop_units() %>%
  summarise(Baseline.Wetlands_200 = sum(TotalWetlandArea*HUC_RATIO_200, na.rm = T),
            Baseline.Wetlands_100 = sum(TotalWetlandArea*HUC_RATIO_100, na.rm = T),
            Baseline.Wetlands_50 = sum(TotalWetlandArea*HUC_RATIO_50, na.rm = T),
            Baseline.Wetlands_30 = sum(TotalWetlandArea*HUC_RATIO_30, na.rm = T),
            
            # Proprtion of impacted forested wetlands
            Prop.Forested_200 = (sum((ForestWetlandArea*HUC_RATIO_200))/sum((TotalWetlandArea*HUC_RATIO_200))),
            Prop.Forested_100 = (sum((ForestWetlandArea*HUC_RATIO_100), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_100), na.rm = T)),
            Prop.Forested_50 = (sum((ForestWetlandArea*HUC_RATIO_50), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_50), na.rm = T)),
            Prop.Forested_30 = (sum((ForestWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)),
            
            # Create local proportions for wetlands
            Prop.local_200 = sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_200), na.rm = T),
            Prop.local_100 = sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_100), na.rm = T),
            Prop.local_50 = sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_50), na.rm = T),
            Prop.local_30 = sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T),
            
            # Create a local proportions for impacts
            NWPR.perm.Prop.local_200 = ifelse(sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_NWPR*HUC_RATIO_200), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_NWPR*HUC_RATIO_200), na.rm = T)),
            NWPR.perm.Prop.local_100 = ifelse(sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_NWPR*HUC_RATIO_100), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_NWPR*HUC_RATIO_100), na.rm = T)),
            NWPR.perm.Prop.local_50 = ifelse(sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_NWPR*HUC_RATIO_50), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_NWPR*HUC_RATIO_50), na.rm = T)),
            NWPR.perm.Prop.local_30 = ifelse(sum(PERM_TOTAL_NWPR*HUC_RATIO_30, na.rm = T)/sum(PERM_TOTAL_NWPR*HUC_RATIO_30, na.rm = T)=="NaN", 0, sum(PERM_TOTAL_NWPR*HUC_RATIO_30, na.rm = T)/sum(PERM_TOTAL_NWPR*HUC_RATIO_30, na.rm = T)),
            NWPR.temp.Prop.local_200 = ifelse(sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_200), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_200), na.rm = T)),
            NWPR.temp.Prop.local_100 = ifelse(sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_100), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_100), na.rm = T)),
            NWPR.temp.Prop.local_50 = ifelse(sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_50), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_50), na.rm = T)),
            NWPR.temp.Prop.local_30 = ifelse(sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T)),
            Rapanos.perm.Prop.local_200 = ifelse(sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_200), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_200), na.rm = T)),
            Rapanos.perm.Prop.local_100 = ifelse(sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T)),
            Rapanos.perm.Prop.local_50 = ifelse(sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T)),
            Rapanos.perm.Prop.local_30 = ifelse(sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)=="NaN", 0, sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)),
            Rapanos.temp.Prop.local_200 = ifelse(sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_200), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_200), na.rm = T)),
            Rapanos.temp.Prop.local_100 = ifelse(sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T)),
            Rapanos.temp.Prop.local_50 = ifelse(sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T)),
            Rapanos.temp.Prop.local_30 = ifelse(sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)=="NaN", 0, sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)/sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)),
            
            # Summarize impacts for NWPR and Rapanos
            impacts.NWPR.perm_200 = sum((PERM_TOTAL_NWPR*HUC_RATIO_200), na.rm = T),
            impacts.Rapanos.perm_200 = sum((PERM_TOTAL_Rapanos*HUC_RATIO_200), na.rm = T),
            impacts.NWPR.temp_200 = sum((TEMP_TOTAL_NWPR*HUC_RATIO_200), na.rm = T),
            impacts.Rapanos.temp_200 = sum((TEMP_TOTAL_Rapanos*HUC_RATIO_200), na.rm = T),
            impacts.NWPR.perm_100 = sum((PERM_TOTAL_NWPR*HUC_RATIO_100), na.rm = T),
            impacts.Rapanos.perm_100 = sum((PERM_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T),
            impacts.NWPR.temp_100 = sum((TEMP_TOTAL_NWPR*HUC_RATIO_100), na.rm = T),
            impacts.Rapanos.temp_100 = sum((TEMP_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T),
            impacts.NWPR.perm_50 = sum((PERM_TOTAL_NWPR*HUC_RATIO_50), na.rm = T),
            impacts.Rapanos.perm_50 = sum((PERM_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T),
            impacts.NWPR.temp_50 = sum((TEMP_TOTAL_NWPR*HUC_RATIO_50), na.rm = T),
            impacts.Rapanos.temp_50 = sum((TEMP_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T),
            impacts.NWPR.perm_30 = sum((PERM_TOTAL_NWPR*HUC_RATIO_30), na.rm = T),
            impacts.Rapanos.perm_30 = sum((PERM_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T),
            impacts.NWPR.temp_30 = sum((TEMP_TOTAL_NWPR*HUC_RATIO_30), na.rm = T),
            impacts.Rapanos.temp_30 = sum((TEMP_TOTAL_Rapanos*HUC_RATIO_30), na.rm = T)) %>%
  ungroup() %>%
  mutate(GEOID = substr(Split_GEOID, 1, 5)) 

# Check for NaN in prop.locals
missing <- hawqs_wetland_changes_summary %>%
  filter(NWPR.temp.Prop.local_30=="NaN")

# QA hawqs wetland changes summary --------------------------------------------
hawqs_wetland_changes_summary_QA <- hawqs_wetland_changes_summary %>%
  mutate(buff200_localacres = Baseline.Wetlands_200*Prop.local_200,
         buff100_localacres = Baseline.Wetlands_100*Prop.local_100,
         buff50_localacres = Baseline.Wetlands_50*Prop.local_50, 
         buff30_localacres = Baseline.Wetlands_30*Prop.local_30,
         local.NWPR.perm_200 = impacts.NWPR.perm_200*NWPR.perm.Prop.local_200,
         local.NWPR.perm_100 = impacts.NWPR.perm_100*NWPR.perm.Prop.local_100,
         local.NWPR.perm_50 = impacts.NWPR.perm_50*NWPR.perm.Prop.local_50,
         local.NWPR.perm_30 = impacts.NWPR.perm_30*NWPR.perm.Prop.local_30) %>% 
  select(Split_GEOID, buff200_localacres, buff100_localacres, buff50_localacres, buff30_localacres, 
        local.NWPR.perm_200, local.NWPR.perm_100, local.NWPR.perm_50, local.NWPR.perm_30) %>%
  mutate(IsSame_baseline = ifelse(buff200_localacres==buff100_localacres & buff100_localacres==buff50_localacres & buff50_localacres==buff30_localacres, 1, 0),
       difference_baseline = ifelse(IsSame_baseline==0, buff200_localacres-buff100_localacres, "Same"),
       IsSame_NWPR.perm = ifelse(local.NWPR.perm_200==local.NWPR.perm_100 & local.NWPR.perm_100==local.NWPR.perm_50 & local.NWPR.perm_50==local.NWPR.perm_30, 1, 0),
       difference_NWPR.perm = ifelse(IsSame_NWPR.perm==0, local.NWPR.perm_200-local.NWPR.perm_100, "Same"))


### Join boundary polygons with census data by GEOID --------------------------
census_wetland_merge <- merge(hawqs_wetland_changes_summary, cen_data_wide, by = "GEOID") %>%
  mutate(STATEFP = substr(GEOID,1,2))

## Read in Woods and Poole Data -----------------------------------------------
# This section is used to remove the VA GEOIDs that represent cities
wp_data_raw <- read.csv(paste0(wood_poole_path, "Woods_and_Poole_2021_CountyData_v2.csv"))

wp_data <- wp_data_raw %>%
  mutate(GEOID = trimws(str_sub(CountyName, (nchar(CountyName)-6), (nchar(CountyName)-1))),
         State = trimws(str_sub(CountyName, (nchar(CountyName)-19), (nchar(CountyName)-17))))

we <- census_wetland_region$GEOID
wp <- wp_data$GEOID
ans <- setdiff(we, wp) # 51 missing FIPS codes from VA

### Join Region Crosswalk and Format Final Output -----------------------------
census_wetland_region <- merge(census_wetland_merge, State_xwalk, by.x = "STATEFP", by.y = "st", all.x = T) %>%
  mutate(`ln(Median Income)` = log(B19013_001)) %>%
  select(GEOID, Split_GEOID, stusps, `ln(Median Income)`, sagulf, nema, nmw, 
         Prop.Forested_200, Prop.Forested_100, Prop.Forested_50, Prop.Forested_30,
         Prop.local_200, Prop.local_100, Prop.local_50,
         NWPR.perm.Prop.local_200, NWPR.perm.Prop.local_100, NWPR.perm.Prop.local_50,
         NWPR.temp.Prop.local_200, NWPR.temp.Prop.local_100, NWPR.temp.Prop.local_50,
         Rapanos.perm.Prop.local_200, Rapanos.perm.Prop.local_100, Rapanos.perm.Prop.local_50,
         Rapanos.temp.Prop.local_200, Rapanos.temp.Prop.local_100, Rapanos.temp.Prop.local_50,
         Baseline.Wetlands_200, Baseline.Wetlands_100,Baseline.Wetlands_50, Baseline.Wetlands_30,
         impacts.NWPR.perm_200, impacts.NWPR.perm_100, impacts.NWPR.perm_50, impacts.NWPR.perm_30,
         impacts.Rapanos.perm_200, impacts.Rapanos.perm_100, impacts.Rapanos.perm_50, impacts.Rapanos.perm_30,
         impacts.NWPR.temp_200, impacts.NWPR.temp_100, impacts.NWPR.temp_50, impacts.NWPR.temp_30,
         impacts.Rapanos.temp_200, impacts.Rapanos.temp_100, impacts.Rapanos.temp_50, impacts.Rapanos.temp_30) %>%
  filter(!GEOID %in% ans | stusps == "HI") 

# Merge to get county names
output_named <- merge(census_wetland_region,
                      st_drop_geometry(TIGER_boundary) %>% select(GEOID, NAME),
                      by = "GEOID",
                      all.x = T) %>%
  merge(., st_drop_geometry(big_count) %>% select(GEOID, NAME),
        by = "GEOID",
        all.x = T) %>%
  distinct(Split_GEOID, .keep_all = T) %>%
  units::drop_units() %>%
  mutate(NAME = ifelse(is.na(NAME.x), NAME.y, NAME.x)) %>%
  select(GEOID, Split_GEOID, stusps, NAME, `ln(Median Income)`, sagulf, nema, nmw, 
         Prop.Forested_200, Prop.Forested_100, Prop.Forested_50, Prop.Forested_30,
         Prop.local_200, Prop.local_100, Prop.local_50,
         NWPR.perm.Prop.local_200, NWPR.perm.Prop.local_100, NWPR.perm.Prop.local_50,
         NWPR.temp.Prop.local_200, NWPR.temp.Prop.local_100, NWPR.temp.Prop.local_50,
         Rapanos.perm.Prop.local_200, Rapanos.perm.Prop.local_100, Rapanos.perm.Prop.local_50,
         Rapanos.temp.Prop.local_200, Rapanos.temp.Prop.local_100, Rapanos.temp.Prop.local_50,
         Baseline.Wetlands_200, Baseline.Wetlands_100,Baseline.Wetlands_50, Baseline.Wetlands_30,
         impacts.NWPR.perm_200, impacts.NWPR.perm_100, impacts.NWPR.perm_50, impacts.NWPR.perm_30,
         impacts.Rapanos.perm_200, impacts.Rapanos.perm_100, impacts.Rapanos.perm_50, impacts.Rapanos.perm_30,
         impacts.NWPR.temp_200, impacts.NWPR.temp_100, impacts.NWPR.temp_50, impacts.NWPR.temp_30,
         impacts.Rapanos.temp_200, impacts.Rapanos.temp_100, impacts.Rapanos.temp_50, impacts.Rapanos.temp_30)

# State Averages --------------------------------------------------------------
prop.local.avg <- census_wetland_region %>%
  group_by(stusps) %>%
  summarise(avg.prop.local_200 = mean(Prop.local_200),
            avg.prop.local_100 = mean(Prop.local_100),
            avg.prop.local_50 = mean(Prop.local_50),
            avg.impacts.NWPR.perm_30 = mean(impacts.NWPR.perm_30),
            avg.impacts.Rapanos.perm_30 = mean(impacts.Rapanos.perm_30),
            avg.impacts.NWPR.temp_30 = mean(impacts.NWPR.temp_30),
            avg.impacts.Rapanos.temp_30 = mean(impacts.Rapanos.temp_30))

# Write output files ----------------------------------------------------------
write.xlsx(output_named, paste0(Output_path, "WOTUS_Buffer_WetlandAnalysis_10-12-22.xlsx"))
write.xlsx(prop.local.avg, paste0(Output_path, "StateAvergaes_PropLocal_LocalImpacts_10-26-22.xlsx"))
