#########################################################################
#												                                                #
#				          Property Value Benefit Transfer                       #
#												                                                #
#########################################################################

# Authors: George Gardner, Mary Sluder
# Date Created: 06/23/23
# Last Updated: 12/04/23

# Purpose: estimates illustrative property value benefits of a 1 mg/L DO improvement in the
# specified zones of the Delaware River, accounting for values of single-family households 
# within 2 miles of the specified zones of the Delaware River. 

# See Netusil; see Appendix D in Alkire (page D-11)

rm(list=ls())
gc()

tic<-proc.time() #start stop watch

options(stringsAsFactors=FALSE)
pacman::p_load(dplyr, sf, tidycensus, tidyr, purrr, magrittr, ggplot2, nhdplusTools, measurements,
               foreign, tigris, openxlsx, terra, rgdal, readr, rgeos, sp, raster)

#Increase memory limit
memory.limit(size=10000)

# Variables to change before running the code --------------------------------

var <- "59229" # Change to user ID

outpath <- paste0("C:/Users/",var,"/ICF/CWA Anniversary & Econ Support - TD3_Delaware River WQS/Property Values/Property Value Benefit Transfer/Outputs/")
# has information on the FMA, Delaware River main stem and zones, sturgeon critical habitats.
EPA_Data <- paste0("C:/Users/",var,"/ICF/CWA Anniversary & Econ Support - TD3_Delaware River WQS/GIS/Data/")

# Census Data Prep
# Activate API Key
census_api_key("0d3711e1f7f03d54ea326708428a15bcec0c0621") # Key registered to Sam Ennett
years <- c(2021)
# Census Tract level data on: # occupied hh's, # total hh's, # 1-unit hh's (detached, attached), median housing value (owner occupied units)
# DP04_0001 = Total Housing Units
# DP04_0002 = Occupied Housing Units
# DP04_0007 = 1-Unit Detached Housing Units
# DP04_0008 = 1-Unit Attached Housing Units
# DP04_0089 = Median Housing Value (Owner-occupied Units)

Census_var <- c("DP04_0001", "DP04_0002", "DP04_0007", "DP04_0008", "DP04_0089")  

# STEP 1: Read in data ----------------------------------------------------------------

# Fish Maintenance Area
FMA <- st_read(paste0(EPA_Data,"Fish Maintenance Area/delriv_fma.shp")) |>
  st_transform("ESRI:102008")

# # Census Data for NY, DE, PA, and NJ
# Note: can adjust this call to pull any of the ACS data you need.

# get tract-level ACS data for NY, DE, NJ, and PA. 
Tract_df <- get_acs(geography = "tract",
             year = years,
             survey = "acs5",
             state = c("NY", "PA", "NJ","DE"),
             variables = Census_var,
             geometry = T) 
Tract_df <- Tract_df |>
  st_transform("ESRI:102008") # 56500

# NOTE: The test adds tract-level population ACS data and compares it to tract-level number of housing units.
# There are 205 tracts with 0 housing units but 153 tracts with 0 population. Of the 205 tracts with 0 households,
# 57 tracts have non-zero populations. Test signifies some accuracy issues with the ACS data.
#Test <- get_acs(geography = "tract",
#                year = years,
#                survey = "acs5",
#                state = c("NY", "PA", "NJ"),
#                variables = c(Census_var, "B01001_001"),
#                geometry = F) 
#Test <-   Test %>%
#  dplyr::select(-moe) %>%
#  pivot_wider(names_from = "variable", values_from = "estimate")

# Create table of Tract-level ACS statistics:
ACS_Stats <- Tract_df %>%
  st_drop_geometry() %>%
  dplyr::select(-moe) %>%
  pivot_wider(names_from = "variable", values_from = "estimate") # 11300
names(ACS_Stats) <- c("GEOID", "TRACT", "TOT_HH_UNITS", "OCC_HH_UNITS", "ONE_UNIT_DET",
                      "ONE_UNIT_ATT", "MED_HH_VAL")

# Fill in NAs for MED_HH_VAL with County statistics.
Temp_County <- get_acs(geography = "county",
                  year = years,
                  survey = "acs5",
                  state = c("NY", "PA", "NJ","DE"), 
                  variables = "DP04_0089",
                  geometry = F) 
Temp_County <- Temp_County[, c(1, 4)] # 153
names(Temp_County) <- c("GEOID2", "MED_HH_VAL2")

# merge tract data to county data.
ACS_Stats <- ACS_Stats %>%
  mutate(GEOID2 = substr(GEOID, 1, 5)) 
ACS_Stats <- merge(ACS_Stats, Temp_County, by.x = "GEOID2", by.y = "GEOID2", all.x = TRUE) #11300
# replace NA values.
ACS_Stats <- ACS_Stats %>%
  mutate(MED_HH_VAL = ifelse(is.na(MED_HH_VAL), MED_HH_VAL2, MED_HH_VAL)) 
# clean up data and remove tract and county dataframes.
ACS_Stats <- ACS_Stats[, c(2:8)]
rm(Temp_County, Tract_df)

# STEP 2: draw buffers around the specified zones of the Lower Delaware River (i.e., the FMA) ----------------
# 1/4 mile buffer
Buff_A <- st_union(st_buffer(FMA, conv_unit(0.25, "mi", "m")))
Buff_A <- st_sf(Buff_A)
# 1/2 mile buffer
Buff_B <- st_union(st_buffer(FMA, conv_unit(0.50, "mi", "m")))
Buff_B <- st_sf(Buff_B)
# 1 mile buffer
Buff_C <- st_union(st_buffer(FMA, conv_unit(1, "mi", "m")))
Buff_C <- st_sf(Buff_C)
# 2 mile buffer
Buff_D <- st_union(st_buffer(FMA, conv_unit(2, "mi", "m")))
Buff_D <- st_sf(Buff_D)

# visual confirmation of buffers.
ggplot() +
  geom_sf(data = Buff_D, fill = "black") +
  geom_sf(data = Buff_C, fill = "yellow") +
  geom_sf(data = Buff_B, fill = "green") +
  geom_sf(data = Buff_A, fill = "red") +
  geom_sf(data = FMA, fill = "blue")

st_erase = function(x, y) st_difference(x, st_union(st_combine(y))) # required because FMA zone has more than one feature and st_difference doesn't work

# Turn buffers into bands.
Band_A <- st_erase(Buff_A, FMA) # (0 - 0.25 mi) # Note removing overlapping FMA zone because buffer/band should start after FMA zone ends - at 0
Band_B <- st_difference(Buff_B, Buff_A) # (0.25 - 0.50 mi)
Band_C <- st_difference(Buff_C, Buff_B) # (0.50 - 1 mi)
Band_D <- st_difference(Buff_D, Buff_C) # (1 - 2 mi)

# visual confirmation of band.
ggplot() +
  geom_sf(data = Band_D, fill = "black") + # adding others to double check
  geom_sf(data = Band_C, fill = "yellow") + # note that running individually shows bands
  geom_sf(data = Band_B, fill = "green") +
  geom_sf(data = Band_A, fill = "red") +
  geom_sf(data = FMA, fill = "blue")

# drop buffers
rm(Buff_A, Buff_B, Buff_C, Buff_D)

# STEP 3: Find tracts whose centroids intersect with the bands ---------------------------------------
# First, get centroids of the tracts from ACS (pick one statistic so we don't have duplicate geometry)
Tract_df <- get_acs(geography = "tract",
                    year = years,
                    survey = "acs5",
                    state = c("NY", "PA", "NJ", "DE"), 
                    variables = "DP04_0089",
                    geometry = T) 
Tract_df <- Tract_df |>
  st_transform("ESRI:102008") |>
  st_centroid(Tract_df) |> 
  st_zm()

# Next, find the centroids of tracts that lie within each band
# tracts in band A (0 - 0.25 mi)
int_Tract_A <- st_filter(Tract_df, y = Band_A, .predicate = st_intersects) # 20
# tracts in band B (0.25 - 0.50 mi)
int_Tract_B <- st_filter(Tract_df, y = Band_B, .predicate = st_intersects) # 26
# tracts in band C (0.50 - 1 mi)
int_Tract_C <- st_filter(Tract_df, y = Band_C, .predicate = st_intersects) # 66
# tracts in band D (1 - 2 mi)
int_Tract_D <- st_filter(Tract_df, y = Band_D, .predicate = st_intersects) # 136

# visual confirmation of the tract centroids and buffers
ggplot() +
  geom_sf(data = Band_A, color = 'red') +
  geom_sf(data = int_Tract_A, color = "black")

# STEP 4: For each tract intersecting the bands, determine the number of single-family -------------
# residential homes and median housing value. ------------------------------------------------------

# merge ACS statistics dataframe with tracts within each band.
ACS_Tract_A <- merge(int_Tract_A, ACS_Stats) %>%
  st_drop_geometry()
ACS_Tract_A <- ACS_Tract_A[,c(1, 6:11)] # 20

ACS_Tract_B <- merge(int_Tract_B, ACS_Stats) %>%
  st_drop_geometry()
ACS_Tract_B <- ACS_Tract_B[,c(1, 6:11)] # 26

ACS_Tract_C <- merge(int_Tract_C, ACS_Stats) %>%
  st_drop_geometry()
ACS_Tract_C <- ACS_Tract_C[,c(1, 6:11)] # 66

ACS_Tract_D <- merge(int_Tract_D, ACS_Stats) %>%
  st_drop_geometry()
ACS_Tract_D <- ACS_Tract_D[,c(1, 6:11)] # 136

# calculate the number of single-family homes (i.e., owner-occupied one-unit attached and detached).
# Occupied households/Total Households)*(Number of 1-unit households [attached + detached])
ACS_Tract_A$SF_HOMES <- 0
ACS_Tract_A <- ACS_Tract_A %>%
  mutate(SF_HOMES = ifelse(TOT_HH_UNITS != 0 , (OCC_HH_UNITS/TOT_HH_UNITS)*(ONE_UNIT_DET+ONE_UNIT_ATT), TOT_HH_UNITS)) # 20      
ACS_Tract_A$SF_HOMES <- floor(ACS_Tract_A$SF_HOMES)

ACS_Tract_B$SF_HOMES <- 0
ACS_Tract_B <- ACS_Tract_B %>%
  mutate(SF_HOMES = ifelse(TOT_HH_UNITS != 0 , (OCC_HH_UNITS/TOT_HH_UNITS)*(ONE_UNIT_DET+ONE_UNIT_ATT), TOT_HH_UNITS)) # 26
ACS_Tract_B$SF_HOMES <- floor(ACS_Tract_B$SF_HOMES)

ACS_Tract_C$SF_HOMES <- 0
ACS_Tract_C <- ACS_Tract_C %>%
  mutate(SF_HOMES = ifelse(TOT_HH_UNITS != 0 , (OCC_HH_UNITS/TOT_HH_UNITS)*(ONE_UNIT_DET+ONE_UNIT_ATT), TOT_HH_UNITS)) # 66
ACS_Tract_C$SF_HOMES <- floor(ACS_Tract_C$SF_HOMES)

ACS_Tract_D$SF_HOMES <- 0
ACS_Tract_D <- ACS_Tract_D %>%
  mutate(SF_HOMES = ifelse(TOT_HH_UNITS != 0 , (OCC_HH_UNITS/TOT_HH_UNITS)*(ONE_UNIT_DET+ONE_UNIT_ATT), TOT_HH_UNITS)) # 136
ACS_Tract_D$SF_HOMES <- floor(ACS_Tract_D$SF_HOMES)

# STEP 5: Calculate a weighted average of the median home values in all Census tracts intersecting the bands ---------
# weighted average = sum of (Tract median home value * (number of 1-unit households in tract / total number ----------
# of 1-unit households across intersecting tracts)) across intersecting tracts for each. -----------------------------

# calculate the weights and the weighted median home values.
ACS_Tract_A$Weight <- (ACS_Tract_A$ONE_UNIT_DET + ACS_Tract_A$ONE_UNIT_ATT)/sum(ACS_Tract_A$ONE_UNIT_DET + ACS_Tract_A$ONE_UNIT_ATT)
ACS_Tract_A$WGT_MED_HH_VAL <- ACS_Tract_A$MED_HH_VAL * ACS_Tract_A$Weight

ACS_Tract_B$Weight <- (ACS_Tract_B$ONE_UNIT_DET + ACS_Tract_B$ONE_UNIT_ATT)/sum(ACS_Tract_B$ONE_UNIT_DET + ACS_Tract_B$ONE_UNIT_ATT)
ACS_Tract_B$WGT_MED_HH_VAL <- ACS_Tract_B$MED_HH_VAL * ACS_Tract_B$Weight

ACS_Tract_C$Weight <- (ACS_Tract_C$ONE_UNIT_DET + ACS_Tract_C$ONE_UNIT_ATT)/sum(ACS_Tract_C$ONE_UNIT_DET + ACS_Tract_C$ONE_UNIT_ATT)
ACS_Tract_C$WGT_MED_HH_VAL <- ACS_Tract_C$MED_HH_VAL * ACS_Tract_C$Weight

ACS_Tract_D$Weight <- (ACS_Tract_D$ONE_UNIT_DET + ACS_Tract_D$ONE_UNIT_ATT)/sum(ACS_Tract_D$ONE_UNIT_DET + ACS_Tract_D$ONE_UNIT_ATT)
ACS_Tract_D$WGT_MED_HH_VAL <- ACS_Tract_D$MED_HH_VAL * ACS_Tract_D$Weight

# sum the weighted median home values within each band to obtain the band-specific weighted average median home values.
WGT_AVG_MED_HH_VALUE_LIST <- c(sum(ACS_Tract_A$WGT_MED_HH_VAL), sum(ACS_Tract_B$WGT_MED_HH_VAL), 
                               sum(ACS_Tract_C$WGT_MED_HH_VAL), sum(ACS_Tract_D$WGT_MED_HH_VAL))

# STEP 6: Estimate the potential gain in average annual rental value (2021$) from DO improvements. -------------------
# Estimated effect on Property Sale Prices from a 1mg/L increase in Dissolved Oxygen (%) * Change in -----------------
# Dissolved Oxygen Levels under Scenarios * Average Annual Rental Value (2021$) --------------------------------------

# Calculate the average annual rental values within each band. Multiply weighted average median home value by 3% discount rate.
AVG_ANNUAL_RENTAL_VALUE_LIST <- as.data.frame(0.03*WGT_AVG_MED_HH_VALUE_LIST)

# Take the estimated effects on property sales price from DO changes in Netusil
# Burnt Bridge Creek:
# For 0 - 0.25 mi:    0.0123
# For 0.25 - 0.50 mi: 0.0449
# For 0.50 - 1 mi:    0.0295
# For 1 - 2 mi:       0.0317
Burnt_Bridge <- as.data.frame(c(0.0123, 0.0449, 0.0295, 0.0317))

# Johnson Creek:
# For 0 - 0.25 mi:    0.1371
# For 0.25 - 0.50 mi: 0.0705
# For 0.50 - 1 mi:    0.0818
# For 1 - 2 mi:       0.0312
Johnson_Creek <- as.data.frame(c(0.1371, 0.0705, 0.0818, 0.0312))

# Change in DO level (based on actual flows) under EPA standard = 0.7965 mg/L
# Updated to 0.799 on 12/4/2023 based on EPA guidance
DO_Change <- 0.799

Rental_value_Gain <- cbind(AVG_ANNUAL_RENTAL_VALUE_LIST, Burnt_Bridge, Johnson_Creek)
names(Rental_value_Gain) <- c("AVG_ANN_RENTAL_VAL", "PROP_EFFECT_BB", "PROP_EFFECT_JC")

# calculate potential gain in rental value
Rental_value_Gain <- Rental_value_Gain %>%
  mutate(RENTAL_GAIN_BB = AVG_ANN_RENTAL_VAL*PROP_EFFECT_BB*DO_Change) %>%
  mutate(RENTAL_GAIN_JC = AVG_ANN_RENTAL_VAL*PROP_EFFECT_JC*DO_Change)

# STEP 7: Estimate the total annual rental value change. ----------------------
# potential rental value gain * total number of 1-unit households across intersecting tracts in each revised buffer

# total number of one-unit households for tracts in each band.
TOT_ONE_UNIT_HH <- as.data.frame(c(sum(ACS_Tract_A$ONE_UNIT_DET + ACS_Tract_A$ONE_UNIT_ATT),
                                   sum(ACS_Tract_B$ONE_UNIT_DET + ACS_Tract_B$ONE_UNIT_ATT),
                                   sum(ACS_Tract_C$ONE_UNIT_DET + ACS_Tract_C$ONE_UNIT_ATT),
                                   sum(ACS_Tract_D$ONE_UNIT_DET + ACS_Tract_D$ONE_UNIT_ATT)))
names(TOT_ONE_UNIT_HH) <- c("TOT_ONE_UNIT_HH")

Rental_value_Gain <- cbind(Rental_value_Gain, TOT_ONE_UNIT_HH)

# calculate the total annual rental value change
Rental_value_Gain$TOT_AN_RENTAL_VAL_BB <- Rental_value_Gain$TOT_ONE_UNIT_HH * Rental_value_Gain$RENTAL_GAIN_BB
Rental_value_Gain$TOT_AN_RENTAL_VAL_JC <- Rental_value_Gain$TOT_ONE_UNIT_HH * Rental_value_Gain$RENTAL_GAIN_JC

# STEP 8: Potential Gain in Average Property Value Per Residence (2021$) ------------------------------------
# Estimated Effect on Property Sale Prices from a 1mg/L increase in Dissolved Oxygen (%) * Change in Dissolved 
# Oxygen Levels under Scenarios * Weighted average median home value (2021$)

WGT_AVG_MED_HH_VALUE_LIST <- as.data.frame(WGT_AVG_MED_HH_VALUE_LIST)

Prop_Value_Gain <- cbind(Rental_value_Gain[,c(2:3, 6)], WGT_AVG_MED_HH_VALUE_LIST)

Prop_Value_Gain <- Prop_Value_Gain %>%
  mutate(AVG_PROP_VAL_GAIN_BB = WGT_AVG_MED_HH_VALUE_LIST*PROP_EFFECT_BB*DO_Change) %>%
  mutate(AVG_PROP_VAL_GAIN_JC = WGT_AVG_MED_HH_VALUE_LIST*PROP_EFFECT_JC*DO_Change)

# STEP 9: Estimate the total property value change -----------------------------------------------------------
# Average Property Value Gain Per Residence * total number of 1-unit households 
Prop_Value_Gain$TOT_PROP_VAL_BB <- Prop_Value_Gain$AVG_PROP_VAL_GAIN_BB * Prop_Value_Gain$TOT_ONE_UNIT_HH
Prop_Value_Gain$TOT_PROP_VAL_JC <- Prop_Value_Gain$AVG_PROP_VAL_GAIN_JC * Prop_Value_Gain$TOT_ONE_UNIT_HH

# STEP 10: Convert from 2021$ to 2022$  ----------------------------------------------------------------------
# historical CPI-U taken from BLS - https://www.bls.gov/cpi/tables/supplemental-files/historical-cpi-u-202305.pdf
CPI <- 292.655/270.970 # 2022$/2021$

Rental_value_Gain_22 <- Rental_value_Gain %>%
  mutate(RENTAL_GAIN_BB = RENTAL_GAIN_BB*CPI) %>%
  mutate(RENTAL_GAIN_JC = RENTAL_GAIN_JC*CPI) %>%
  mutate(TOT_AN_RENTAL_VAL_BB = TOT_AN_RENTAL_VAL_BB*CPI) %>%
  mutate(TOT_AN_RENTAL_VAL_JC = TOT_AN_RENTAL_VAL_JC*CPI)
  
row.names(Rental_value_Gain_22) <- c("miles 0.00 - 0.25", "miles 0.25 - 0.50", "miles 0.50 - 1.00", "miles 1.00 - 2.00")

Prop_Value_Gain_22 <- Prop_Value_Gain %>%
  mutate(AVG_PROP_VAL_GAIN_BB = AVG_PROP_VAL_GAIN_BB*CPI) %>%
  mutate(AVG_PROP_VAL_GAIN_JC = AVG_PROP_VAL_GAIN_JC*CPI) %>%
  mutate(TOT_PROP_VAL_BB = TOT_PROP_VAL_BB*CPI) %>%
  mutate(TOT_PROP_VAL_JC = TOT_PROP_VAL_JC*CPI)

row.names(Prop_Value_Gain_22) <- c("miles 0.00 - 0.25", "miles 0.25 - 0.50", "miles 0.50 - 1.00", "miles 1.00 - 2.00")

datasets <- list('Rental Value Benefits' = Rental_value_Gain_22, 'Property Value Benefits' = Prop_Value_Gain_22)

# export as excel file.
write.xlsx(datasets, paste0(outpath,"Property_Value_Benefit_Transfer_",Sys.Date(),".xlsx"), rowNames = TRUE)

proc.time()-tic #capture run time