############### WOTUS Wetland 100-Mile Buffer Analysis  #######################

# Author: Sam Ennett
# Date Created: 05/17/22

options(stringsAsFactors=FALSE)

pacman::p_load(dplyr, sf, tidycensus, readxl, tidyverse, labelled, units, openxlsx)

rm(list=ls())

# Variables to change before running the code ---------------------------------
Input_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/100 Mile Buffer/Inputs/"
Output_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/100 Mile Buffer/Outputs/"
TAMU_path <- "C:/Users/56407/ICF/EPA-Economic and Water Quality Modeling and Assessment Support - From_TAMU/"
GIS_path <- "C:/Users/56407/ICF/Le, Alyssa - GIS_Data/TIGER/"
ORM_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/ORM Data Processing/Outputs/"
wood_poole_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/National Analysis/Population Projections/Outputs/"

# Load Global Environment, can skip to line 147
load(file = paste0(Output_path, "100mileWetlandAnalysis_All_6-7-22.RData"))

# Conversion factor
dist_conv_metertomiles <- 0.000621371

# Buffer distances
Buffer_Dist <- c(100, 30)

# ACS Call Variables
years <- c(2019)
geography <- "county"

# Activate census API key
census_api_key("a37705549debf5d0b8e43a4fa62c7eb552f928bc")

Census_var <- c("S2501_C01_001E", # occupied housing units
                "B19013_001",     # median household income
                "DP02_0016E")     # persons per household

# Read in data ----------------------------------------------------------------
## Read in state crosswalk for FIPS code, regions -----------------------------
State_xwalk <- read.csv(paste0(Input_path,"us-state-ansi-fips-region.csv"), header=T) %>%
  filter(stusps!="AK" & stusps!="DC") # remove Alaska to deal with separately

State_xwalk$st <- str_pad(State_xwalk$st, width = 2, side = "left", pad = "0")

## TIGER Boundary Dataset ------------------------------------------------------
### Import TIGER boundaries
TIGER_boundary_raw <- st_read(paste0(GIS_path, "2019/County/tl_2019_us_county.shp"))

## Import shapefile with predetermined oversize counties
big_count <- st_read(paste0(Input_path, "tl_2020_us_county_CEC_oversized_sub.shp"))

## Get unique GEOIDs for oversized counties
big_geoid <- unique(big_count$GEOID)
big_st <- unique(big_count$STATEFP)

TIGER_boundary <-  subset(TIGER_boundary_raw, !(GEOID %in% big_geoid))

## HAWQS HUC Boundaries --------------------------------------------------------
# create list of directories
hawqs_dirs <- list.dirs(path = paste0(TAMU_path,"HAWQS_HUC_Dataset_2022-01"), recursive = FALSE)

# Read in all HAWQS boundaries
if (exists("hawqs_huc12")) {
  rm(hawqs_huc12)
}

for (i in 1:length(hawqs_dirs)) {
  if (exists("hawqs_huc12")){ 
    temp_hawqs_huc12 <- st_read(paste0(hawqs_dirs[[i]],"/SWAT/Fields_CDL/HUC12/Merged",
                                       substr(hawqs_dirs[[i]],nchar(hawqs_dirs[[i]])-1,nchar(hawqs_dirs[[i]])),
                                       "/Watershed/Shapes/demwshed.shp")) %>%
      mutate(region = i)
    hawqs_huc12 <- rbind(hawqs_huc12, temp_hawqs_huc12) 
    rm(temp_hawqs_huc12)
  }
  else {
    hawqs_huc12 <- st_read(paste0(hawqs_dirs[[i]],"/SWAT/Fields_CDL/HUC12/Merged",
                                  substr(hawqs_dirs[[i]],nchar(hawqs_dirs[[i]])-1,nchar(hawqs_dirs[[i]])),
                                  "/Watershed/Shapes/demwshed.shp")) %>%
      mutate(region = i)
  }
}
unique(hawqs_huc12$region)
# st_write(hawqs_huc12, paste0(Output_path, "hawqs_huc12.shp"))

## HAWQS wetlands -------------------------------------------------------------
### Region numerals
numerals <- substr(hawqs_dirs,nchar(hawqs_dirs[[i]])-1,nchar(hawqs_dirs[[i]]))

### Read in HAWQS SWAT Inputs
# hawqs_hru <- read.csv(paste0(Input_path, "hawqs_hru.csv"))

# The below code takes ~10 minutes to run. 
if (exists("hawqs_hru")) {
  rm(hawqs_hru)
}

for (i in 1:length(numerals)) {
  if (exists("hawqs_hru")){
    temp_hawqs_hru <- read.csv(paste0(TAMU_path, "SWAT_Input_Files_2022-01/hawqs-v2-",numerals[[i]],"-huc12-hru.csv"))
    hawqs_hru <- rbind(hawqs_hru, temp_hawqs_hru)
    rm(temp_hawqs_hru)
  }
  else {
    hawqs_hru <- read.csv(paste0(TAMU_path, "SWAT_Input_Files_2022-01/hawqs-v2-",numerals[[i]],"-huc12-hru.csv"))
  }
  hawqs_hru$Subbasin <- str_pad(hawqs_hru$Subbasin, width = 12, side = "left", pad = "0")
}

### HAWQS HRU QA ---------------------------------------------------------------
hawqs_hru_QA <- hawqs_hru %>%
  mutate(huc8 = substr(Subbasin, 1, 4)) %>%
  filter(huc8 == "0808" | huc8 == "0804")

## 404 Modeled Impacts to Wetlands -------------------------------------
orm <- read_excel(path = paste0(ORM_path, "FINAL_404_ImpactChanges_2021.09.23.v2.xlsx"), sheet = "Impacts_By_HUC12")

## Import Census Data ---------------------------------------------------------
# use S2501_C01_001E "occupied housing units"
# figure out how to make it flexible for CBGs or Counties

# Census Occupied Housing Data Call -------------------------------------------
states <- State_xwalk$stusps

cen_data <- data.frame()

if (exists("cen_temp_data")) {
  rm(cen_temp_data)
}

## Census API Call ------------------------------------------------------------
for (i in 1:length(years)) {
  cen_temp_data <- map_df(states, function(x) {
    get_acs(geography = geography,
            year = years[i],
            survey = "acs5",
            state = x,
            variables = Census_var,
            overwrite = T)
  }) %>% mutate(year = years[i])
  cen_data <- rbind(cen_data, cen_temp_data)
}

## Pivot Census Data Wider
cen_data_wide <- cen_data %>%
  select(GEOID, variable, estimate) %>%
  pivot_wider(names_from = "variable", values_from = "estimate")

## Save Global Environment ----------------------------------------------------
# save.image(file = paste0(Output_path, "100mileWetlandAnalysis_All_6-7-22.RData"))

# GIS Analysis ----------------------------------------------------------------
## Regular Counties -----------------------------------------------------------

### Convert polygons to North American Albers projection ----------------------
TIGER_boundary_albers <- st_transform(TIGER_boundary, 
                                crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")

### Create centroids for boundary polygons ------------------------------------
TIGER_boundary_points <- st_centroid(TIGER_boundary_albers)

## Reproject HUC 12 Boundaries ------------------------------------------------
hawqs_huc12 <- st_transform(hawqs_huc12, 
             crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")

hawqs_huc12$Area_m2 <- st_area(hawqs_huc12)

## Buffer and Intersect Loop ---------------------------------------------------
ptm <- proc.time()
dist_conv_metertomiles <- 0.000621371
fac_buffer <- list()
buff_int <- list()
buff_100 <- list()
buff_30 <- list()
state_fips <- unique(TIGER_boundary$STATEFP)

# takes 2.63 hours to run the loop with all states
for (j in 1:length(state_fips)) { # length(state_fips)
  
  boundary_points_filt <- TIGER_boundary_points %>%
    filter(STATEFP == state_fips[j])
  
  for (i in 1:length(Buffer_Dist)) {
    # Draw specified buffers around facility locations
    fac_buffer[[i]] <-
      st_buffer(boundary_points_filt, dist = Buffer_Dist[i] / dist_conv_metertomiles)
    
    # Intersect the facility buffers with CBG boundaries
    buff_int[[i]] <- st_intersection(fac_buffer[[i]], hawqs_huc12)
    
    # Calculate area of intersection
    buff_int[[i]]$Int_Area_m2 <- st_area(buff_int[[i]])
    
  }
  buff_100[[j]] <- buff_int[[1]] %>%
    st_drop_geometry() %>%
    select(GEOID, HUC12, Area_m2, Int_Area_m2)
  
  buff_30[[j]] <- buff_int[[2]] %>%
    st_drop_geometry() %>%
    select(GEOID, HUC12, Area_m2, Int_Area_m2)
}
proc.time() - ptm

## Combine Buffer Datasets ----------------------------------------------------
for (i in 1:length(buff_100)) {
  if (exists("buff_100_total")) {
    buff_100_temp <- buff_100[[i]]
    
    buff_100_total <- rbind(buff_100_total, buff_100_temp)
  } else {
    buff_100_total <- buff_100[[i]]
  }
}

for (i in 1:length(buff_30)) {
  if (exists("buff_30_total")) {
    buff_30_temp <- buff_30[[i]]
    
    buff_30_total <- rbind(buff_30_total, buff_30_temp)
  } else {
    buff_30_total <- buff_30[[i]]
  }
}

# Calculate the proportion of the HUC within each buffer
buff_100_total$HUC_RATIO_100 <- with(buff_100_total, Int_Area_m2/(Area_m2))
buff_30_total$HUC_RATIO_30 <- with(buff_30_total, Int_Area_m2/(Area_m2))

## Oversized Counties ----------------------------------------------------------
big_count_albers <- st_transform(big_count, 
                                 crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")

big_count_points <- st_centroid(big_count_albers)

big_count_points$Split_GEOID <- ifelse(duplicated(big_count_points$GEOID)==FALSE, paste0(big_count_points$GEOID, "_1"), paste0(big_count_points$GEOID, "_2"))

### Buffer and Intersect Loop ---------------------------------------------------
ptm <- proc.time()
dist_conv_metertomiles <- 0.000621371
fac_buffer_BC <- list()
buff_int_BC <- list()
buff_100_BC <- list()
buff_30_BC <- list()

for (j in 1:length(big_st)) { # length(state_fips)
  
  boundary_points_filt_BC <- big_count_points %>%
    filter(STATEFP == big_st[j])
  
  for (i in 1:length(Buffer_Dist)) {
    # Draw specified buffers around facility locations
    fac_buffer_BC[[i]] <-
      st_buffer(boundary_points_filt_BC, dist = Buffer_Dist[i] / dist_conv_metertomiles)
    
    # Intersect the facility buffers with CBG boundaries
    buff_int_BC[[i]] <- st_intersection(fac_buffer_BC[[i]], hawqs_huc12)
    
    # Calculate area of intersection
    buff_int_BC[[i]]$Int_Area_m2 <- st_area(buff_int_BC[[i]])
    
  }
  buff_100_BC[[j]] <- buff_int_BC[[1]] %>%
    st_drop_geometry() %>%
    select(GEOID, Split_GEOID, HUC12, Area_m2, Int_Area_m2)
  
  buff_30_BC[[j]] <- buff_int_BC[[2]] %>%
    st_drop_geometry() %>%
    select(GEOID, Split_GEOID, HUC12, Area_m2, Int_Area_m2)
}
proc.time() - ptm
### Combine Buffer Datasets ----------------------------------------------------
for (i in 1:length(buff_100_BC)) {
  if (exists("buff_100_total_BC")) {
    buff_100_temp <- buff_100_BC[[i]]
    
    buff_100_total_BC <- rbind(buff_100_total_BC, buff_100_temp)
  } else {
    buff_100_total_BC <- buff_100_BC[[i]]
  }
}

for (i in 1:length(buff_30_BC)) {
  if (exists("buff_30_total_BC")) {
    buff_30_temp <- buff_30_BC[[i]]
    
    buff_30_total_BC <- rbind(buff_30_total_BC, buff_30_temp)
  } else {
    buff_30_total_BC <- buff_30_BC[[i]]
  }
}

# Calculate the proportion of the HUC within each buffer
buff_100_total_BC$HUC_RATIO_100 <- with(buff_100_total_BC, Int_Area_m2/(Area_m2))
buff_30_total_BC$HUC_RATIO_30 <- with(buff_30_total_BC, Int_Area_m2/(Area_m2))

# Bind Regular and Oversize Areas together ------------------------------------
buff_100_total$Split_GEOID <- buff_100_total$GEOID
buff_30_total$Split_GEOID <- buff_30_total$GEOID

buff_100_bind <- rbind(buff_100_total, buff_100_total_BC)
buff_30_bind <- rbind(buff_30_total, buff_30_total_BC)

# Format Data for Analysis ----------------------------------------------------
## Summarize HAWQS Wetland Areas by HUC12 -------------------------------------
hru_wetland <- hawqs_hru %>%
  rename(HUC12 = Subbasin) %>%
  group_by(HUC12) %>%
  summarise(TotalWetlandArea = sum(HruArea[Land_Use=="AGWT" | Land_Use=="AGWF" | Land_Use=="RIWF" | Land_Use=="RIWN" | Land_Use=="UPWF" | Land_Use=="UPWN"]),
            ForestWetlandArea = sum(HruArea[Land_Use=="AGWF" | Land_Use=="RIWF" | Land_Use=="UPWF"]))

# Merge buffer data frames
buff_merge <- merge(buff_100_bind, buff_30_bind, by = c("Split_GEOID", "HUC12"), all.x = T)

## Merge Datasets -------------------------------------------------------------
### Join summarized wetland areas to HUC12 boundaries by HUC12
hawqs_wetland_changes_merge <- Reduce(function(x, y) merge(x=x, y=y, by="HUC12", all.x=T),
       list(buff_merge, hru_wetland, orm))

hawqs_wetland_changes_summary <- hawqs_wetland_changes_merge %>%
  group_by(Split_GEOID) %>%
  summarise(Prop.Forested = (sum((ForestWetlandArea*HUC_RATIO_100))/sum((TotalWetlandArea*HUC_RATIO_100))),
            Baseline.Wetlands = sum(TotalWetlandArea),
            Prop.local = sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_100), na.rm = T),
            impacts.NWPR.perm = sum((PERM_TOTAL_NWPR*HUC_RATIO_100), na.rm = T),
            impacts.Rapanos.perm = sum((PERM_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T),
            impacts.NWPR.temp = sum((TEMP_TOTAL_NWPR*HUC_RATIO_100), na.rm = T),
            impacts.Rapanos.temp = sum((TEMP_TOTAL_Rapanos*HUC_RATIO_100), na.rm = T)) %>%
  mutate(GEOID = substr(Split_GEOID, 1, 5))

### QA - how many HUC12s have missing wetland change data? Over half.
length(unique(hawqs_wetland_changes_merge$HUC12[is.na(hawqs_wetland_changes_merge$PERM_ACRES_CHANGE)]))/
  length(unique(hawqs_wetland_changes_merge$HUC12))

### QA - how many HUC12s have missing wetland data? Less than 3% but mostly in region 08; 0804 and 0808
length(unique(hawqs_wetland_changes_merge$HUC12[is.na(hawqs_wetland_changes_merge$TotalWetlandArea)]))/
  length(unique(hawqs_wetland_changes_merge$HUC12))
#*# AL: Created a data frame so you could investigate further.
wetland_area_issues <- hawqs_wetland_changes_merge[which(is.na(hawqs_wetland_changes_merge$TotalWetlandArea)), ]

### Join boundary polygons with census data by GEOID
census_wetland_merge <- merge(hawqs_wetland_changes_summary, cen_data_wide, by = "GEOID") %>%
  mutate(STATEFP = substr(GEOID,1,2))

## Read in Woods and Poole Data -----------------------------------------------
wp_data_raw <- read.csv(paste0(wood_poole_path, "Woods_and_Poole_2021_CountyData_v2.csv"))

wp_data <- wp_data_raw %>%
  mutate(GEOID = trimws(str_sub(CountyName, (nchar(CountyName)-6), (nchar(CountyName)-1))),
         State = trimws(str_sub(CountyName, (nchar(CountyName)-19), (nchar(CountyName)-17))))

we <- census_wetland_region$GEOID
wp <- wp_data$GEOID
ans <- setdiff(we, wp) # 51 missing FIPS codes from VA

### Join Region Crosswalk and Format Final Output -----------------------------
census_wetland_region <- merge(census_wetland_merge, State_xwalk, by.x = "STATEFP", by.y = "st", all.x = T) %>%
  mutate(`ln(Median Income)` = log(B19013_001)) %>%
  select(GEOID, Split_GEOID, stusps, `ln(Median Income)`, sagulf, nema, nmw, Prop.Forested, Prop.local, Baseline.Wetlands, impacts.NWPR.perm,
         impacts.Rapanos.perm, impacts.NWPR.temp, impacts.Rapanos.temp) %>%
  filter(!GEOID %in% ans | stusps == "HI")

# Persons per Household -------------------------------------------------------
# DP02_0016E
cen_pphh <- cen_data %>%
  filter(variable == "DP02_0016")

# QA Number of Row between Census Wetland region and HH WTP
setdiff(census_wetland_region$GEOID, hh_wtp$GEOID)
setdiff(hh_wtp$GEOID, census_wetland_region$GEOID)

# Merge HH WTP and 100-mile buffer
wetlands_hhwtp <- merge(census_wetland_region, hh_wtp, by = "GEOID")

# Write output files ----------------------------------------------------------
write.csv(census_wetland_region, paste0(Output_path, "WOTUS_100milebuffer_wetlandanalysis_6-15-22.csv"), row.names=FALSE)
write.csv(hh_wtp, paste0(Output_path, "WOTUS_100milebuffer_HouseholdWTP_6-15-22.csv"), row.names=FALSE)
write.xlsx(census_wetland_region, paste0(Output_path, "WOTUS_100milebuffer_wetlandanalysis_6-15-22.xlsx"))
write.xlsx(hh_wtp, paste0(Output_path, "WOTUS_100milebuffer_HouseholdWTP_6-15-22.xlsx"))

# Alaska ---------------------------------------------------------------------
# msa <- st_read(paste0(Input_path, "Alaska_MSA_sub.shp")) 
# 
# msa$Split_GEOID <- ifelse(duplicated(msa$GEO_ID)==TRUE, paste0(msa$GEO_ID, "_S"), paste0(msa$GEO_ID))
# 
# 
# msa_albers <- st_transform(msa, 
#                crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")
# 
# msa_points <- st_centroid(msa_albers)
# 
# ### Buffer and Intersect Loop ---------------------------------------------------
# ptm <- proc.time()
# dist_conv_metertomiles <- 0.000621371
# ak <- "AK"
# fac_buffer_AK <- list()
# buff_int_AK <- list()
# buff_100_AK <- list()
# buff_30_AK <- list()
# 
# boundary_points_filt_AK <- msa_points
#   
#   for (i in 1:length(Buffer_Dist)) {
#     # Draw specified buffers around facility locations
#     fac_buffer_AK[[i]] <-
#       st_buffer(boundary_points_filt_AK, dist = Buffer_Dist[i] / dist_conv_metertomiles)
#     
#     # Intersect the facility buffers with CBG boundaries
#     buff_int_AK[[i]] <- st_intersection(fac_buffer_AK[[i]], hawqs_huc12)
#     
#     # Calculate area of intersection
#     buff_int_AK[[i]]$Int_Area_m2 <- st_area(buff_int_AK[[i]])
#     
#   }
# buff_100_total_AK <- buff_int_AK[[1]] %>%
#     st_drop_geometry() %>%
#     select(GEO_ID, Split_GEOID, HUC12, Area_m2, Int_Area_m2)
#   
# buff_30_total_AK <- buff_int_AK[[2]] %>%
#     st_drop_geometry() %>%
#     select(GEO_ID, Split_GEOID, HUC12, Area_m2, Int_Area_m2)
# 
# 
# ### Combine Buffer Datasets ----------------------------------------------------
# # for (i in 1:length(buff_100_AK)) {
# #   if (exists("buff_100_total_AK")) {
# #     buff_100_temp <- buff_100_AK[[i]]
# #     
# #     buff_100_total_AK <- rbind(buff_100_total_AK, buff_100_temp)
# #   } else {
# #     buff_100_total_AK <- buff_100_AK[[i]]
# #   }
# # }
# # 
# # for (i in 1:length(buff_30_AK)) {
# #   if (exists("buff_30_total_AK")) {
# #     buff_30_temp <- buff_30_AK[[i]]
# #     
# #     buff_30_total_AK <- rbind(buff_30_total_AK, buff_30_temp)
# #   } else {
# #     buff_30_total_AK <- buff_30_AK[[i]]
# #   }
# # }
# 
# # Calculate the proportion of the HUC within each buffer
# buff_100_total_AK$HUC_RATIO_100 <- with(buff_100_total_AK, Int_Area_m2/(Area_m2))
# buff_30_total_AK$HUC_RATIO_30 <- with(buff_30_total_AK, Int_Area_m2/(Area_m2))
# 

# prop local -----------------------------------------------------------------
prop.local.avg <- census_wetland_region %>%
  group_by(stusps) %>%
  summarise(avg.prop.local = mean(Prop.local))

# write.csv(prop.local.avg, paste0(Output_path, "prop_local_avg.csv"), row.names = F)

states <- data.frame(state.name, state.abb)

