############### WOTUS Wetland 50-Mile Buffer Analysis  #######################

# Author: Sam Ennett
# Date Created: 05/17/22
# Date updated: 10/05/22

options(stringsAsFactors=FALSE)

pacman::p_load(dplyr, sf, tidycensus, readxl, openxlsx, tidyverse)

rm(list=ls())
gc()
memory.limit(size=100000)

# Variables to change before running the code ---------------------------------
Input_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/100 Mile Buffer/Inputs/"
Output_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/100 Mile Buffer/Outputs/"
TAMU_path <- "C:/Users/56407/ICF/EPA-Economic and Water Quality Modeling and Assessment Support - From_TAMU/"
GIS_path <- "C:/Users/56407/ICF/Le, Alyssa - GIS_Data/TIGER/"
ORM_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/ORM Data Processing/Outputs/"
wood_poole_path <- "C:/Users/56407/ICF/WOTUS Reconsideration (ICF Internal Site) - General/National Analysis/Population Projections/Outputs/"

# load(paste0(Output_path, "50mileWetlandAnalysis_10-5-22.RData"))

# Conversion factor
dist_conv_metertomiles <- 0.000621371

# Buffer distances
Buffer_Dist <- c(50, 30)

# ACS Call Variables
years <- c(2019)
geography <- "county"

# Activate census API key
census_api_key("a37705549debf5d0b8e43a4fa62c7eb552f928bc")

Census_var <- c("S2501_C01_001E", # occupied housing units
                "B19013_001",     # median household income
                "DP02_0016E")     # persons per household

# Read in data ----------------------------------------------------------------
## Read in state crosswalk for FIPS code, regions -----------------------------
State_xwalk <- read.csv(paste0(Input_path,"us-state-ansi-fips-region.csv"), header=T) %>%
  filter(stusps!="AK" & stusps!="DC") # remove Alaska to deal with separately

State_xwalk$st <- str_pad(State_xwalk$st, width = 2, side = "left", pad = "0")

## TIGER Boundary Dataset ------------------------------------------------------
### Import TIGER boundaries
TIGER_boundary_raw <- st_read(paste0(GIS_path, "2019/County/tl_2019_us_county.shp"))

## Import shapefile with predetermined oversize counties
big_count <- st_read(paste0(Input_path, "tl_2020_us_county_CEC_oversized_sub.shp"))

## Get unique GEOIDs for oversized counties
big_geoid <- unique(big_count$GEOID)
big_st <- unique(big_count$STATEFP)

TIGER_boundary <-  subset(TIGER_boundary_raw, !(GEOID %in% big_geoid))

## HAWQS HUC Boundaries --------------------------------------------------------
# create list of directories
hawqs_dirs <- list.dirs(path = paste0(TAMU_path,"HAWQS_HUC_Dataset_2022-01"), recursive = FALSE)

# Read in all HAWQS boundaries
if (exists("hawqs_huc12")) {
  rm(hawqs_huc12)
}

for (i in 1:length(hawqs_dirs)) {
  if (exists("hawqs_huc12")){ 
    temp_hawqs_huc12 <- st_read(paste0(hawqs_dirs[[i]],"/SWAT/Fields_CDL/HUC12/Merged",
                                       substr(hawqs_dirs[[i]],nchar(hawqs_dirs[[i]])-1,nchar(hawqs_dirs[[i]])),
                                       "/Watershed/Shapes/demwshed.shp")) %>%
      mutate(region = i)
    hawqs_huc12 <- rbind(hawqs_huc12, temp_hawqs_huc12) 
    rm(temp_hawqs_huc12)
  }
  else {
    hawqs_huc12 <- st_read(paste0(hawqs_dirs[[i]],"/SWAT/Fields_CDL/HUC12/Merged",
                                  substr(hawqs_dirs[[i]],nchar(hawqs_dirs[[i]])-1,nchar(hawqs_dirs[[i]])),
                                  "/Watershed/Shapes/demwshed.shp")) %>%
      mutate(region = i)
  }
}
unique(hawqs_huc12$region)
# st_write(hawqs_huc12, paste0(Output_path, "hawqs_huc12.shp"))

## Import Census Data ---------------------------------------------------------
# use S2501_C01_001E "occupied housing units"
# figure out how to make it flexible for CBGs or Counties

# Census Occupied Housing Data Call -------------------------------------------
states <- State_xwalk$stusps

cen_data <- data.frame()

if (exists("cen_temp_data")) {
  rm(cen_temp_data)
}

## Census API Call ------------------------------------------------------------
for (i in 1:length(years)) {
  cen_temp_data <- map_df(states, function(x) {
    get_acs(geography = geography,
            year = years[i],
            survey = "acs5",
            state = x,
            variables = Census_var,
            overwrite = T)
  }) %>% mutate(year = years[i])
  cen_data <- rbind(cen_data, cen_temp_data)
}

## Pivot Census Data Wider
cen_data_wide <- cen_data %>%
  select(GEOID, variable, estimate) %>%
  pivot_wider(names_from = "variable", values_from = "estimate")

## Save Global Environment ----------------------------------------------------
save.image(file = paste0(Output_path, "200mileWetlandAnalysis_All_7-14-22.RData"))

# GIS Analysis ----------------------------------------------------------------
## Regular Counties -----------------------------------------------------------

### Convert polygons to North American Albers projection ----------------------
TIGER_boundary_albers <- st_transform(TIGER_boundary, 
                                crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")

### Create centroids for boundary polygons ------------------------------------
TIGER_boundary_points <- st_centroid(TIGER_boundary_albers)

## Reproject HUC 12 Boundaries ------------------------------------------------
hawqs_huc12 <- st_transform(hawqs_huc12, 
             crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")

hawqs_huc12$Area_m2 <- st_area(hawqs_huc12)

## Buffer and Intersect Loop ---------------------------------------------------
ptm <- proc.time()
dist_conv_metertomiles <- 0.000621371
state_fips <- unique(TIGER_boundary$STATEFP)
buff_50 <- list()
buff_30 <- list()

for (j in 1:length(state_fips)) { # length(state_fips)
  
  fac_buffer <- list()
  buff_int <- list()
  buff_int_nogeom <- list()
  
  boundary_points_filt <- TIGER_boundary_points %>%
    filter(STATEFP == state_fips[j]) # state_fips[j]
  
  for (i in 1:length(Buffer_Dist)) {
    # Draw specified buffers around facility locations
    fac_buffer[[i]] <-
      st_buffer(boundary_points_filt, dist = Buffer_Dist[i] / dist_conv_metertomiles)
    
    # Intersect the facility buffers with CBG boundaries
    buff_int[[i]] <- st_intersection(fac_buffer[[i]], hawqs_huc12)
    
    # Calculate area of intersection
    buff_int[[i]]$Int_Area_m2 <- st_area(buff_int[[i]])
    
    # drop geometry for space
    buff_int_nogeom[[i]] <- buff_int[[i]] %>%
      st_drop_geometry()
   
  }
  
  rm(buff_int, fac_buffer)
  gc()
  
  buff_50[[j]] <- buff_int_nogeom[[1]] %>%
    select(GEOID, HUC12, Area_m2, Int_Area_m2)
  
  buff_30[[j]] <- buff_int_nogeom[[2]] %>%
    select(GEOID, HUC12, Area_m2, Int_Area_m2)
  

}
proc.time() - ptm

## Combine Buffer Datasets ----------------------------------------------------
for (i in 1:length(buff_50)) {
  if (exists("buff_50_total")) {
    buff_50_temp <- buff_50[[i]]
    
    buff_50_total <- rbind(buff_50_total, buff_50_temp)
  } else {
    buff_50_total <- buff_50[[i]]
  }
}

for (i in 1:length(buff_30)) {
  if (exists("buff_30_total")) {
    buff_30_temp <- buff_30[[i]]
    
    buff_30_total <- rbind(buff_30_total, buff_30_temp)
  } else {
    buff_30_total <- buff_30[[i]]
  }
}

# Calculate the proportion of the HUC within each buffer
buff_50_total$HUC_RATIO_50 <- with(buff_50_total, Int_Area_m2/(Area_m2))
buff_30_total$HUC_RATIO_30 <- with(buff_30_total, Int_Area_m2/(Area_m2))

## Oversized Counties ----------------------------------------------------------
big_count_albers <- st_transform(big_count, 
                                 crs="+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")

big_count_points <- st_centroid(big_count_albers)

big_count_points$Split_GEOID <- ifelse(duplicated(big_count_points$GEOID)==FALSE, paste0(big_count_points$GEOID, "_1"), paste0(big_count_points$GEOID, "_2"))

### Buffer and Intersect Loop ---------------------------------------------------

dist_conv_metertomiles <- 0.000621371
buff_50_BC <- list()
buff_30_BC <- list()

for (j in 1:length(big_st)) { # length(state_fips)
  
  fac_buffer_BC <- list()
  buff_int_BC <- list()
  buff_int_nogeom_BC <- list()
  
  boundary_points_filt_BC <- big_count_points %>%
    filter(STATEFP == big_st[j])
  
  for (i in 1:length(Buffer_Dist)) {
    # Draw specified buffers around facility locations
    fac_buffer_BC[[i]] <-
      st_buffer(boundary_points_filt_BC, dist = Buffer_Dist[i] / dist_conv_metertomiles)
    
    # Intersect the facility buffers with CBG boundaries
    buff_int_BC[[i]] <- st_intersection(fac_buffer_BC[[i]], hawqs_huc12)
    
    # Calculate area of intersection
    buff_int_BC[[i]]$Int_Area_m2 <- st_area(buff_int_BC[[i]])
    
    # drop geometry for space
    buff_int_nogeom_BC[[i]] <- buff_int_BC[[i]] %>%
      st_drop_geometry()
    
  }
  
  rm(buff_int_BC, fac_buffer_BC)
  gc()
  
  buff_50_BC[[j]] <- buff_int_nogeom_BC[[1]] %>%
    select(GEOID, Split_GEOID, HUC12, Area_m2, Int_Area_m2)
  
  buff_30_BC[[j]] <- buff_int_nogeom_BC[[2]] %>%
    select(GEOID, Split_GEOID, HUC12, Area_m2, Int_Area_m2)
}

### Combine Buffer Datasets ----------------------------------------------------
rm(buff_50_total_BC)
rm(buff_30_total_BC)

for (i in 1:length(buff_50_BC)) {
  if (exists("buff_50_total_BC")) {
    buff_50_temp <- buff_50_BC[[i]]
    
    buff_50_total_BC <- rbind(buff_50_total_BC, buff_50_temp)
  } else {
    buff_50_total_BC <- buff_50_BC[[i]]
  }
}

for (i in 1:length(buff_30_BC)) {
  if (exists("buff_30_total_BC")) {
    buff_30_temp <- buff_30_BC[[i]]
    
    buff_30_total_BC <- rbind(buff_30_total_BC, buff_30_temp)
  } else {
    buff_30_total_BC <- buff_30_BC[[i]]
  }
}

# Calculate the proportion of the HUC within each buffer
buff_50_total_BC$HUC_RATIO_50 <- with(buff_50_total_BC, Int_Area_m2/(Area_m2))
buff_30_total_BC$HUC_RATIO_30 <- with(buff_30_total_BC, Int_Area_m2/(Area_m2))

# Bind Regular and Oversize Areas together ------------------------------------
buff_50_total$Split_GEOID <- buff_50_total$GEOID
buff_30_total$Split_GEOID <- buff_30_total$GEOID

buff_50_bind <- rbind(buff_50_total, buff_50_total_BC)
buff_30_bind <- rbind(buff_30_total, buff_30_total_BC)

end.time <- sys.time()
# Format Data for Analysis ----------------------------------------------------
## HAWQS wetlands -------------------------------------------------------------
### Region numerals
numerals <- substr(hawqs_dirs,nchar(hawqs_dirs[[i]])-1,nchar(hawqs_dirs[[i]]))

### Read in HAWQS SWAT Inputs
# hawqs_hru <- read.csv(paste0(Input_path, "hawqs_hru.csv"))

# The below code takes ~10 minutes to run. 
if (exists("hawqs_hru")) {
  rm(hawqs_hru)
}

for (i in 1:length(numerals)) {
  if (exists("hawqs_hru")){
    temp_hawqs_hru <- read.csv(paste0(TAMU_path, "SWAT_Input_Files_2022-01/hawqs-v2-",numerals[[i]],"-huc12-hru.csv"))
    hawqs_hru <- rbind(hawqs_hru, temp_hawqs_hru)
    rm(temp_hawqs_hru)
  }
  else {
    hawqs_hru <- read.csv(paste0(TAMU_path, "SWAT_Input_Files_2022-01/hawqs-v2-",numerals[[i]],"-huc12-hru.csv"))
  }
  hawqs_hru$Subbasin <- str_pad(hawqs_hru$Subbasin, width = 12, side = "left", pad = "0")
}

### HAWQS HRU QA ---------------------------------------------------------------
hawqs_hru_QA <- hawqs_hru %>%
  mutate(huc8 = substr(Subbasin, 1, 4)) %>%
  filter(huc8 == "0808" | huc8 == "0804")

## 404 Modeled Impacts to Wetlands -------------------------------------
orm_old <- read_excel(path = paste0(ORM_path, "FINAL_404_ImpactChanges_2021.09.23.v2.xlsx"), sheet = "Impacts_By_HUC12")
orm <- read.csv(paste0(ORM_path, "Impacts_By_HUC12_2022.07.28.csv"))


## Summarize HAWQS Wetland Areas by HUC12 -------------------------------------
hru_wetland <- hawqs_hru %>%
  rename(HUC12 = Subbasin) %>%
  group_by(HUC12) %>%
  summarise(TotalWetlandArea = sum(HruArea[Land_Use=="AGWT" | Land_Use=="AGWF" | Land_Use=="RIWF" | Land_Use=="RIWN" | Land_Use=="UPWF" | Land_Use=="UPWN"]),
            ForestWetlandArea = sum(HruArea[Land_Use=="AGWF" | Land_Use=="RIWF" | Land_Use=="UPWF"]))

# Merge buffer data frames
buff_merge <- merge(buff_50_bind, buff_30_bind, by = c("Split_GEOID", "HUC12"), all.x = T)

## Merge Datasets -------------------------------------------------------------
### Join summarized wetland areas to HUC12 boundaries by HUC12
hawqs_wetland_changes_merge <- Reduce(function(x, y) merge(x=x, y=y, by="HUC12", all.x=T),
       list(buff_merge, hru_wetland, orm))

hawqs_wetland_changes_summary <- hawqs_wetland_changes_merge %>%
  group_by(Split_GEOID) %>%
  summarise(Prop.Forested = (sum((ForestWetlandArea*HUC_RATIO_50))/sum((TotalWetlandArea*HUC_RATIO_50))),
            Baseline.Wetlands_50 = sum(TotalWetlandArea*HUC_RATIO_50, na.rm = T),
            Baseline.Wetlands_30 = sum(TotalWetlandArea*HUC_RATIO_30, na.rm = T),
            Prop.local = sum((TotalWetlandArea*HUC_RATIO_30), na.rm = T)/sum((TotalWetlandArea*HUC_RATIO_50), na.rm = T),
            impacts.NWPR.perm = sum((PERM_TOTAL_NWPR*HUC_RATIO_50), na.rm = T),
            impacts.Rapanos.perm = sum((PERM_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T),
            impacts.NWPR.temp = sum((TEMP_TOTAL_NWPR*HUC_RATIO_50), na.rm = T),
            impacts.Rapanos.temp = sum((TEMP_TOTAL_Rapanos*HUC_RATIO_50), na.rm = T)) %>%
  mutate(GEOID = substr(Split_GEOID, 1, 5))

### QA - how many HUC12s have missing wetland change data? Over half.
length(unique(hawqs_wetland_changes_merge$HUC12[is.na(hawqs_wetland_changes_merge$PERM_ACRES_CHANGE)]))/
  length(unique(hawqs_wetland_changes_merge$HUC12))

### QA - how many HUC12s have missing wetland data? Less than 3% but mostly in region 08; 0804 and 0808
length(unique(hawqs_wetland_changes_merge$HUC12[is.na(hawqs_wetland_changes_merge$TotalWetlandArea)]))/
  length(unique(hawqs_wetland_changes_merge$HUC12))
#*# AL: Created a data frame so you could investigate further.
wetland_area_issues <- hawqs_wetland_changes_merge[which(is.na(hawqs_wetland_changes_merge$TotalWetlandArea)), ]

### Join boundary polygons with census data by GEOID
census_wetland_merge <- merge(hawqs_wetland_changes_summary, cen_data_wide, by = "GEOID") %>%
  mutate(STATEFP = substr(GEOID,1,2))

## Read in Woods and Poole Data -----------------------------------------------
wp_data_raw <- read.csv(paste0(wood_poole_path, "Woods_and_Poole_2021_CountyData_v2.csv"))

wp_data <- wp_data_raw %>%
  mutate(GEOID = trimws(str_sub(CountyName, (nchar(CountyName)-6), (nchar(CountyName)-1))),
         State = trimws(str_sub(CountyName, (nchar(CountyName)-19), (nchar(CountyName)-17))))

we <- census_wetland_merge$GEOID
wp <- wp_data$GEOID
ans <- setdiff(we, wp) # 51 missing FIPS codes from VA

### Join Region Crosswalk and Format Final Output -----------------------------
census_wetland_region <- merge(census_wetland_merge, State_xwalk, by.x = "STATEFP", by.y = "st", all.x = T) %>%
  mutate(`ln(Median Income)` = log(B19013_001)) %>%
  select(GEOID, Split_GEOID, stusps, `ln(Median Income)`, sagulf, nema, nmw, Prop.Forested, Prop.local, Baseline.Wetlands_50, Baseline.Wetlands_30, impacts.NWPR.perm,
         impacts.Rapanos.perm, impacts.NWPR.temp, impacts.Rapanos.temp) %>%
  filter(!GEOID %in% ans | stusps == "HI") 

# Merge to get county names
output_named <- merge(census_wetland_region,
                      st_drop_geometry(TIGER_boundary) %>% select(GEOID, NAME),
                      by = "GEOID",
                      all.x = T) %>%
  merge(., st_drop_geometry(big_count) %>% select(GEOID, NAME),
        by = "GEOID",
        all.x = T) %>%
  distinct(Split_GEOID, .keep_all = T) %>%
  mutate(NAME = ifelse(is.na(NAME.x), NAME.y, NAME.x)) %>%
  select(GEOID, Split_GEOID, stusps, NAME, `ln(Median Income)`, sagulf, nema, nmw, Prop.Forested,
         Prop.local, Baseline.Wetlands, impacts.NWPR.perm, impacts.Rapanos.perm,
         impacts.NWPR.temp, impacts.Rapanos.temp)

beepr::beep()

# QA 50 mile buffer -----------------------------------------------------------
old_buff <- read.xlsx(paste0(Output_path, "WOTUS_Buffer_WetlandAnalysis_7-29-22.xlsx"))

QA_50mibuff <- merge(census_wetland_region, old_buff, by = "GEOID")

length(unique(old_buff$GEOID)) # 3056
length(unique(census_wetland_region$GEOID)) # 3056
length(unique(QA_50mibuff$GEOID)) # 3056

QA_50mibuff$QA_30mile <- ifelse(units::drop_units(QA_50mibuff$Baseline.Wetlands_30.x)==QA_50mibuff$Baseline.Wetlands_30.y, "Same", "Different")

QA_50mibuff$QA_30mile_diff <- units::drop_units(QA_50mibuff$Baseline.Wetlands_30.x)-as.numeric(QA_50mibuff$Baseline.Wetlands_30.y)

QA_50mibuff_filt <- QA_50mibuff %>%
  filter(QA_30mile=="Different") %>%
  group_by(GEOID) %>%
  summarise(diff_sum = sum(QA_30mile_diff))

# Findings:
# Only the oversized counties have differences in the 30 mile baseline wetlands
# Differences in oversized counties are from split being switched. The differences are equal between GEOIDs
# Dataset is equal to previous 
# Non-oversized counties are confirmed matching

# Write output files ----------------------------------------------------------
write.csv(census_wetland_region, paste0(Output_path, "WOTUS_50milebuffer_wetlandanalysis_10-05-22.csv"), row.names=FALSE)

write.xlsx(census_wetland_region, paste0(Output_path, "WOTUS_50milebuffer_wetlandanalysis_10-05-22.xlsx"))

write.csv(census_wetland_region, paste0(Output_path, "WOTUS_50milebuffer_wetlandanalysis_CountyNames_10-10-22.csv"), row.names=FALSE)

write.xlsx(census_wetland_region, paste0(Output_path, "WOTUS_50milebuffer_wetlandanalysis_CountyNames_10-10-22.xlsx"))

save.image(file = paste0(Output_path, "50mileWetlandAnalysis_10-5-22.RData"))

# prop local -----------------------------------------------------------------
prop.local.avg <- census_wetland_region %>%
  group_by(stusps) %>%
  summarise(avg.prop.local = mean(Prop.local))

# write.csv(prop.local.avg, paste0(Output_path, "prop_local_avg.csv"), row.names = F)

states <- data.frame(state.name, state.abb)

