# This is for the pooled model with 52 observations, lumpsum = 0,
# and a 30-mile radius definition of local

################################################################################
# Predictions script for BT using MRM2_CAN_52obs_SSVS_v2 (52 obs pooled/local=original def)

# ##############################################################################
# % general structure of script:
#   %
# % for each test, we have 16 versions based on prov, reg, cult, and
# % volunt
# % each of these now needs to be combined with the 4 settings for q0 and q1,
# % plus corresponding settings for the dummy indicators for local and forest
# % this produces 16*4 = 64 predictions (each with r2 draws, of course)
# % then, for each of the four blocks of 16, generate the weighted sum to
# % obtain the six main predictions for WTP per HH and year.

# Preliminary housekeeping
################################################################################
# set working directory to keep file paths short
setwd('C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Wetland MRM - R Code/')

rm(list = ls(all = TRUE)) #first, clear R's workspace
options(prompt = "R> ", digits = 4) # show 4 digits by default
options(continue=" ") #remove continuation prompt
#
set.seed(37)  #sets the random number generator so we can reproduce results
#
tic<-proc.time() #start stop watch
#
# load packages we'll need
############################
library(openxlsx)
library(readxl)
# Start with procedures that can be done outside the loop to avoid
# unnecessary repetition
################################################################################

################################################################################
################################################################################
# STEP 1: Load original data, re-construct X, and label each original
# variable in X
################################################################################
################################################################################
#
#load("./data/wetlanddataFeb2022.rda")  #already in R-format
#load("./data/wetlanddataJune2022.rda")  #already in R-format
load("./data/wetlanddataJune2022_corrected.rda")
#
#
# % contents of data
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   studyid         original unique study id
# % 2   obsid           original unique observation id
# % 3   lnwtp           log of wtp2021
# % 4   wlfresh         1=freshwater wetland, 0 = salt/coastal
# % 5   lnyear          log of yr of data collection - 1988
# % 6   lninc           log of inc2021
# % 7   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 8   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 9   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 10  CAN             1 = Canadian study
# % 11  local           f1= local scale (target pop at sub-state level)
# % 12  prov            1=provisioning function affected
# % 13  reg             1=regulating function affected
# % 14  cult            1=cultural function affected
# % 15  forest          1=forested wetland            
# % 16  q0              baseline acreage
# % 17  q1              policy acreage
# % 18  volunt          1= pay mech = voluntary contribution
# % 19  lumpsum         1=single payment
# % 20  ce              choice experiment
# % 21  nrev            1=not peer-reviewed
# % 22  median          1=orig. wtp based on econometric medians
# % 23  q0alt            alternative definition of baseline acres - see notes
# % 24  q1alt            alternative definition of policy acres - see notes

# Drop Rudd et al
################################
# data<-subset(data,data[,1]!=128)

# Drop Kim & Petrolia 2013 observations (replaced with Petrolia & Kim 2011)
################################
data<-subset(data,data[,1]!=110)

#attach(data)

# keep freshwater only
###############################
#data<-subset(data,data[,4]==1)

# define y and X
###############################
q0<-data[,16]/1000
q1<-data[,17]/1000

y<-as.matrix(data[,3]-log(q1-q0))
n<-length(y)

X<-as.matrix(cbind(rep(1,n),data[,5:15],(q0+q1)/2, data[,18:19]))
k<-ncol(X)

# % contents of X
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   const           constant
# % 2   lnyear          log of yr of data collection - 1988
# % 3   lninc           log of inc2021
# % 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 7   CAN             1 = Canadian study
# % 8   local           f1= local scale (target pop at sub-state level)
# % 9   prov            1=provisioning function affected
# % 10  reg             1=regulating function affected
# % 11  cult            1=cultural function affected
# % 12  forest          1=forested wetland
# % 13  (q0+q1)/2
# % 14  volunt          1= pay mech = voluntary contribution
# % 15  lumpsum         1=single payment


k1<-13 #context-defining
k2<-2 #nuisance
k3<-9 #interactions (I checked in stata to make sure these are identified)


# Interaction terms
#########################
fresh<-data[,4] # grabs the freshwater variable
nf<-matrix(1-fresh) # uses it to create the saltwater variable and creates a matrix
int<-matrix(rep(nf,k3),nrow(nf),) # creates a matrix that will be multiplied by the 9 interaction terms

# creates a matrix of the variables to be interacted with:
# constant, lnyear, lninc, sagulf, local, prov, reg, cult, ((q0+q1)/2) 
Xadd<-as.matrix(cbind(X[,1:4],X[,8:11],X[,13]))

add<-as.matrix(int*Xadd) # creates the interactions by taking the product of the two matrices.
X<-as.matrix(cbind(X,add)) # now redefines X to include the interactions

colnames(X)<- c("const","lnyear","lninc","sagulf","nema","nmw", "CAN","local",
                             "prov","reg","cult","forest","(q0+q1/2)","volunt","lumpsum",
                             "const*salt", "lnyear*salt", "lninc*salt", "sagulf*salt",
                             "local*salt", "prov*salt", "reg*salt", "cult*salt", "(q0+q1)/2*salt")

# mean check
tt<-as.matrix(colMeans(X)) #OK, identical to Matlab

# contents of augmented X
######################################################
# 1   const           constant
# 2   lnyear          log of yr of data collection - 1988
# 3   lninc           log of inc2021
# 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# 7   CAN             1 = Canadian study
# 8   local           f1= local scale (target pop at sub-state level)
# 9   prov            1=provisioning function affected
# 10  reg             1=regulating function affected
# 11  cult            1=cultural function affected
# 12  forest          1=forested wetland
# 13  (q0+q1)/2
# 14  volunt          1= pay mech = voluntary contribution
# 15  lumpsum         1=single payment

# 16  const*salt
# 17  lnyear*salt
# 18  lninc*salt
# 19  sagulf*salt
# 20  local*salt
# 21  prov*salt
# 22  reg*salt
# 23  cult*salt
# 24  (q0+q1)/2* salt

k<-ncol(X)

################################################################################
################################################################################
# STEP 2: Re-shuffle (conceptually) elements of X to separate variables
# with fixed BT settings from those over which we want to mix;
# Then load GS draws and re-shuffle to mirror this new ordering
################################################################################
################################################################################
  
# % Variables with specific settings in all 6 cases, not subject to mixing
# % (other than acres)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   const           constant
# % 2   lnyear          log of yr of data collection - 1988
# % 3   lninc           log of inc2021
# % 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 7   CAN             1 = Canadian study
# % 14  volunt          1= pay mech = voluntary contribution
# % 15  lumpsum         1=single payment
# 
# % Variables for mixing in all 6 cases
# % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 9    prov            1=provisioning function affected
# % 10   reg             1=regulating function affected
# % 11   cult            1=cultural function affected

# load GS results
#################################
#load("./output using Feb22 data/MRM2_CAN_52obs_SSVS_v2.rda")
#load("./output using June22 data/MRM2_CAN_52obs_SSVS_v2.rda")
load("./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_v2.rda")

# The last 9 rows of betamat are for interactions and are discarded. 
# The first 15 rows go with the following X.

betamat<-betamat[1:15,]

# contents of augmented X
#################################
# 1   const           constant
# 2   lnyear          log of yr of data collection - 1988
# 3   lninc           log of inc2021
# 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# 7   CAN             1 = Canadian study
# 8   local           f1= local scale (target pop at sub-state level)
# 9   prov            1=provisioning function affected
# 10  reg             1=regulating function affected
# 11  cult            1=cultural function affected
# 12  forest          1=forested wetland
# 13  (q0+q1)/2
# 14  volunt          1= pay mech = voluntary contribution
# 15  lumpsum         1=single payment

bmatfix<-rbind(betamat[1:7,], betamat[14:15,])
bmatmix<-betamat[9:11,]

bmatq<-betamat[13,]
bmatloc<-betamat[8,]
bmatfor<-betamat[12,]

bmatnew<-rbind(bmatfix, bmatmix, bmatq, bmatloc, bmatfor) 
#reshuffled coefficient matrix from GS

k1<-nrow(bmatfix)
k2<-nrow(bmatmix)
k<-k1 + k2 + 3  # add acres, local, and forested
r2<-ncol(betamat)

#mean check for bmatnew: close to Matlab, OK

################################################################################
################################################################################
# STEP 3: Determine mixing weights for each unique combination of the
# mixing variables
################################################################################
################################################################################

source("./functions/graycode.R")  #call my graycode function - binaries of 0,1
Jmat<-as.matrix(graycode(3)) #8 by 3 combos of 0's and 1's

kver<-nrow(Jmat) #number of unique rows, here 8

pprov<- mean(X[,9]); # sample proportion of 1's
preg<-  mean(X[,10]);  # sample proportion of 1's
pcult<- mean(X[,11]); # sample proportion of 1's

porig<-matrix(c(pprov,preg,pcult),1,) #1 by 3
# replicate this row vector kver times in the row dimension
# see my "cheat sheet" repmat_help
int<-matrix(rep(porig,kver),kver,byrow=TRUE) #8 by 3 

# Below: since Jmat is a matrix of 0's and 1's, anything ^0 is 1 (replaces all 0's with 1's), 
# anything ^1 is itself (replaces 1's with porig). int must be same dimensions as Jmat,
# each element in int is taken to the correponding power of the element in Jmat.

int1<-int^Jmat #this replaces all 0's with 1's, and all 1's with porig

# Below: In (1-Jmat), all the 1's become 0's, and all the 0's become 1's. So now, relative
# to elements in Jmat, 0's become (1-orig) and 1's stay as 1's (b/c doing (1-orig)^0)

int2<-(1-int)^(1-Jmat) #this replaces all 0's with (1-orig), and leaves 1's unchanged

# Below: each row provides different combination of probabilities (e.g., pprov, !preg, !pcult)

int3<-int1*int2 #this gives the correct probability for each variable setting 
# OK, same as Matlab
# int3 is 8 by 3

# we want the product of each row (P(a, b, c))- giving an 8 by 1
pvec<-apply(int3,1,prod)
pvec<-as.matrix(pvec) # OK, same as Matlab
# check if it sums to 1 (since now have probability of each combination)
tt<-sum(pvec)  #OK!

#OK, pvec and Jmat identical to Matlab

################################################################################
################################################################################
# STEP 4: Determine truncation point, if desired, 
# draw error term
# load state-specific settings
################################################################################
################################################################################

# truncation point
###############################
tp<-0.99  #R requires fractions, not percent

# Prepare stochastic terms - only needed to do this once
########################################################
epmat<-rnorm(r2,mean=0, sd=sqrt(sig2mat))
epmat<-t(as.matrix(epmat))  # 1 by r2

#mean about 0, std very close to Mlab, OK

# Load State-specific data
##########################

load("./data/USdata_Rev5.rda")

# set Alaska baseline to 2 million
USdata[1, 8] <- 2000000

USdata <- USdata[, -c(13)]
names(USdata)[names(USdata)=="PERM_TOTAL_NWPR"] <- "NWPR_perm"
names(USdata)[names(USdata)=="TEMP_TOTAL_NWPR"] <- "NWPR_temp"
names(USdata)[names(USdata)=="PERM_TOTAL_Rapanos"] <- "Rapanos_perm"
names(USdata)[names(USdata)=="TEMP_TOTAL_Rapanos"] <- "Rapanos_temp"

sk<-nrow(USdata) # loop through states

# Note: for permanent impacts, have j start at year 1 and loop until year 5.
# for permanent + temporary impacts, have j start at year 6 and loop until year 20
j <- 1 # loop through years of permanent impacts
l <- 0 # loop through years of temporary impacts

# % contents of USdata:
#   %%%%%%%%%%%%%%%%%%%%%%
# % 1  statenum                running state id, 1-49
# % 2  lninc                   float   %9.0g                 
# % 3  sagulf                  1=south Atlantic / gulf state
# % 4  nema                    1= northeast / mid-Atlantic
# % 5  nmw                     1=northern / mid-west
# % 6  propfor                 fraction of forested acres
# % 7  proploc                 fraction of local acres - assumed at 0.0318 (NOT USED!!!)
# % 8  q0                      baseline acres

# NOTE: perm only is applied in years 2023, 2024, 2025, 2026, 2027.
# Need to run a loop 5 times using #9 and #11 below to get HH WTP for each year.
# perm + temp is applied in years 2028 - 2042.
# Need to run a loop 15 times using #9 and #11 below to get HH WTP for each year.

# % 9  NWPR_perm              permanent reduction in acres under NWPR
# % 10 NWPR_temp              temporary reduction in acres under NWPR
# % 11 Rapanos_perm           permanent reduction in acres under Rapanos
# % 12 Rapanos_temp           temporary reduction in acres under Rapanos
# % 13 avg.prop.local_100_30  average proportion of baseline wetlands within 30-miles out of 100-mile county buffers.
# % 14 avg.prop.local_100_50  average proportion of baseline wetlands within 50-miles out of 100-mile county buffers.
# % 15 avg.prop.local.impacts.NWPR.perm_100_30     average proportion of permanent impacts under NWPR within 30-miles out of 100-mile county buffers.
# % 16 avg.prop.local.impacts.Rapanos.perm_100_30  average proportion of permanent impacts under Rapanos within 30-miles out of 100-mile county buffers.
# % 17 avg.prop.local.impacts.NWPR.temp_100_30     average proportion of temporary impacts under NWPR within 30-miles out of 100-mile county buffers.
# % 18 avg.prop.local.impacts.Rapanos.temp_100_30  average proportion of temporary impacts under Rapanos within 30-miles out of 100-mile county buffers.
# % 19 avg.prop.local.impacts.NWPR.perm_100_50     average proportion of permanent impacts under NWPR within 50-miles out of 100-mile county buffers.
# % 20 avg.prop.local.impacts.Rapanos.perm_100_50  average proportion of permanent impacts under Rapanos within 50-miles out of 100-mile county buffers.
# % 21 avg.prop.local.impacts.NWPR.temp_100_50     average proportion of temporary impacts under NWPR within 50-miles out of 100-mile county buffers.
# % 22 avg.prop.local.impacts.Rapanos.temp_100_50  average proportion of temporary impacts under Rapanos within 50-miles out of 100-mile county buffers.

#OK, data compare to Matlab checks out

################################################################################
################################################################################
# STEP 6: Create matrices to eventually be filled in with low, mean, median, 
# and high total HH WTP for all wetland types (low and high from 95% HPDI)
################################################################################
################################################################################
# create a matrix to be filled in with HH WTP values (permanent only impacts in years
# 2023-2027, permanent+temp in years 2028-2042).

WTP_mat <- matrix(0,nrow(USdata),ncol=20)
colnames(WTP_mat) <- c("2023", "2024","2025","2026","2027","2028", "2029","2030","2031","2032","2033","2034","2035","2036",
                       "2037", "2038","2039","2040","2041","2042")

HH_WTP_MEAN_LOCAL <- WTP_mat
WETLAND_LOCAL <- WTP_mat
HH_WTP_MEAN_NONLOCAL <- WTP_mat
WETLAND_NONLOCAL <- WTP_mat
HH_WTP_MEAN_TOT <- WTP_mat
WETLAND_TOT <- WTP_mat
################################################################################
################################################################################
# STEP 7: Loop over counties and years
# to generate predictions (each row is a county and each column is a year)
################################################################################
################################################################################
while (j<=20) {
for (i in 1:sk) {
  
  xs<-as.matrix(USdata[i,]) #pick county-specific row
  # crucially important to immediately turn this into a matrix, else can't perform
  # matrix algebra on anything that includes xs or its elements down the line
  
  # recall settings for fixed coefficients
  ########################################
  # % 1   const           1
  # % 2   lnyear          log(2021-1988) = 3.4965
  # % 3   lninc           county-specific
  # % 4   sagulf          county-specific
  # % 5   nema            county-sepcific
  # % 6   nmw             county-specific
  # % 7   CAN             0
  # % 14  volunt          0
  # % 15  lumpsum         0 # preference for the annual payment approach
  
  
  xpfix<-matrix(c(1, 3.4965, xs[2], xs[3], xs[4], xs[5], 0, 0, 0),nrow=1)
  #1 by 9
  #replicate this row vector 4*kver times - "4" for the four forested / local
  #combos down the line, "kver" for the 8 combos of prov, reg, cult
  # see my "cheat sheet" repmat_help
  Xpfix<-matrix(rep(xpfix,4*kver),4*kver,byrow=TRUE) #32 identical rows
  
  #now stack Jmat 4 times in the row dimension
  # see my "cheat sheet" repmat_help
  Xpmix<-matrix(rep(t(Jmat),4),4*nrow(Jmat),byrow=TRUE)
  #Xpmix is 32 by 3
  
  Xp<-as.matrix(cbind(Xpfix,Xpmix)) 
  #32 by 12 - all possible combos of [prov,cult,reg] and [forested/local]
  # Xp identical to Matlab, OK
  
  # add stuff for (q0+q1)/2), local, forest, and offset (log(q1-q0))
  ############################################################################
  addmat<-matrix(0,4,3)  #4 by 3
  offsetvec<-matrix(0,4,1) #4 by 1

  q0T<-xs[8] #total baseline acres
  
  ##############################################################################
  # KEY step:
  # permanent or "temp. plus permanent" scenario
  ##############################################################################
  # NOTE: Remove comments below if using 50-mile definition of local.
  a<-xs[13] #proportion of local baseline acres (30-miles)
  #a<-xs[14] #proportion of local baseline acres (50-miles)
  b<-xs[6] #proportion of forested acres
  NWPR_perm <- xs[9]
  NWPR_temp <- xs[10]
  Rapanos_perm <- xs[11]
  Rapanos_temp <- xs[12]
  a_NWPR_perm <- xs[15] # proportion of NWPR perm local (30-miles)
  a_Rapanos_perm <- xs[16] # proportion of Rapanos perm local (30-miles)
  a_NWPR_temp <- xs[17] # proportion of NWPR temp local (30-miles)
  a_Rapanos_temp <- xs[18] # proportion of Rapanos temp local (30-miles)
  #a_NWPR_perm <- xs[19] # proportion of NWPR perm local (50-miles)
  #a_Rapanos_perm <- xs[20] # proportion of Rapanos perm local (50-miles)
  #a_NWPR_temp <- xs[21] # proportion of NWPR temp local (50-miles)
  #a_Rapanos_temp <- xs[22] # proportion of Rapanos temp local (50-miles)
  
  # 1) settings for local, forested acres, offset term
  ####################################################
  # gives proportion of baseline acres that is forested and local under NWPR
  q0<- a*b*q0T - a_NWPR_perm*b*NWPR_perm*j - a_NWPR_temp*b*NWPR_temp*l
  # gives proportion of policy acres that is forested and local under Rapanos
  q1<- a*b*q0T - a_Rapanos_perm*b*Rapanos_perm*j - a_Rapanos_temp*b*Rapanos_temp*l
  dL <- 1  #dummy for "local"
  dF <- 1  #dummy for "forested"
  
  addmat[1,]=c(((q0+q1)/2)/1000, dL, dF); # don't forget scaling by 1000
  offsetvec[1]=log((q1-q0)/1000)
  
  # 2) settings for local, non-forested acres, offset term
  ####################################################
  q0<- a*(1-b)*q0T - a_NWPR_perm*(1-b)*NWPR_perm*j - a_NWPR_temp*(1-b)*NWPR_temp*l
  q1<- a*(1-b)*q0T - a_Rapanos_perm*(1-b)*Rapanos_perm*j - a_Rapanos_temp*(1-b)*Rapanos_temp*l
  dL <- 1  #dummy for "local"
  dF <- 0  #dummy for "forested"
  
  addmat[2,]=c(((q0+q1)/2)/1000, dL, dF); # don't forget scaling by 1000
  offsetvec[2]=log((q1-q0)/1000)
  
  # 3) settings for non-local, forested acres, offset term
  ####################################################
  q0<- (1-a)*b*q0T - (1-a_NWPR_perm)*b*NWPR_perm*j - (1-a_NWPR_temp)*b*NWPR_temp*l
  q1<- (1-a)*b*q0T - (1-a_Rapanos_perm)*b*Rapanos_perm*j - (1-a_Rapanos_temp)*b*Rapanos_temp*l
  dL <- 0  #dummy for "local"
  dF <- 1  #dummy for "forested"
  
  addmat[3,]=c(((q0+q1)/2)/1000, dL, dF); # don't forget scaling by 1000
  offsetvec[3]=log((q1-q0)/1000)
  
  # 4) settings for non-local, non-forested acres, offset term
  ####################################################
  q0<- (1-a)*(1-b)*q0T - (1-a_NWPR_perm)*(1-b)*NWPR_perm*j - (1-a_NWPR_temp)*(1-b)*NWPR_temp*l
  q1<- (1-a)*(1-b)*q0T - (1-a_Rapanos_perm)*(1-b)*Rapanos_perm*j - (1-a_Rapanos_temp)*(1-b)*Rapanos_temp*l
  dL <- 0  #dummy for "local"
  dF <- 0  #dummy for "forested"
  
  addmat[4,]=c(((q0+q1)/2)/1000, dL, dF); # don't forget scaling by 1000
  offsetvec[4]=log((q1-q0)/1000)
  #OK, addmat and offsetvec identical to Matlab

  ##############################################################################
  # generate Posterior Predictive Distribution (PPD) for each possible combo of X
  ##############################################################################
  # replicate EACH ROW of addmat kver(=8) times
  # see my "cheat sheet" repmat_help
  intadd<-matrix(rep(addmat,each=kver),kver*nrow(addmat),byrow=FALSE)
  # OK, same as matlab, 32 by 3
  
  Xpscen<-cbind(Xp,intadd) #32 x 15
  # mean check with Matlab - OK
  
  # note Xpscen is now 32 x 15
  # bmatnew is 15 x r2
  # if we multiply the two we get a 32 x r2, where each row gives r2
  # predictions for a given mix
  
  Xbmat<-Xpscen %*% bmatnew
  
  # replicate EACH ELEMENT of offsetvec kver times
  # see my "cheat sheet" repmat_help
  intoff<-matrix(rep(offsetvec,each=kver),kver*nrow(offsetvec),)
  # OK, looks good, 32 x 1, same as Matlab
  

  # now replicate intoff r2 times in the column dimension
  # see my "cheat sheet" repmat_help
  intoff2<-matrix(rep(intoff,r2),nrow(intoff),)
  # OK, identical to Matlab
  
  # replicate error vector 4 * kver times in the row dimension
  # see my "cheat sheet" repmat_help
  epmat2<-matrix(rep(epmat,4*kver),4*kver,byrow=TRUE)
  
  # compute logged and un-logged predictions
  logy<-Xbmat + intoff2 + epmat2
  ypmat=exp(logy)
  
  # truncation step - need to truncate each row of ypmat at "tp" percentile
  #########################################################################
  ypmatt<-matrix(0,4*kver,r2*tp)  #pre-allocate, here 32 by 99000
  
  for (tpi in 1:(4*kver)){
    inti<-ypmat[tpi,]
    rr<-quantile(inti,tp) #get 99th percentile
    inti<-inti[inti<=rr] #find obs. that fall below quantile
    ypmatt[tpi,]<-inti
  }
  # OK, checked row means, similar to Matlab
  r2t<-ncol(ypmatt)
  ypmat<-ypmatt
  r2<-r2t
  
  ########################################################################
  # obtain probablity-weighted average prediction, 
  # for each of the four combos for forested and local
  ########################################################################
  # this implies we need to weigh each row in ypmat according to the
  # multiplicative weights for prov, reg, cult
  # then take average over each block of 8 rows
  
  # bring in pvec from above
  # recall pvec = 8 x 1
  
  # first stack 4 copies of pvec in the row dimension
  # see my "cheat sheet" repmat_help
  pvec2<-matrix(rep(pvec,4),4*nrow(pvec),)
  
  #now replicate "new r2" times in the column dimension
  # see my "cheat sheet" repmat_help
  pvec3<-matrix(rep(pvec2,r2),nrow(pvec2),)
  
  ypmatw=ypmat*pvec3  #still 32 by r2
  # OK, row means look similar to Matlab
  
  # Now sum draws for each forest / local combo - this produces a
  # weighted average
  #######################################################################
  yLF<-matrix(colSums(ypmatw[1:8,]),1,)
  yLG<-matrix(colSums(ypmatw[9:16,]),1,)
  yNF<-matrix(colSums(ypmatw[17:24,]),1,)
  yNG<-matrix(colSums(ypmatw[25:32,]),1,)
  
  # add up for sub-totals and total wtp
  yL=yLF+yLG; # all local
  #yF=yLF+yNF; # all forested
  yN=yNF+yNG; # all nonlocal
  #yG=yLG+yNG; # all non-forested
  yT=yLF+yLG+yNF+yNG; # grand total WTP
  
################################################################################
# Generate output tables
################################################################################

# generic stats
#############################
allmat<-matrix(rbind(yL,yN,yT),3,)
  
outm_all<-matrix(rowMeans(allmat),3,1)
    
#outm<-matrix(rowMeans(allmat),9,1)
#outmed<-matrix(apply(allmat, 1, median),9,1) #1 means "over rows"
#outsd<-matrix(apply(allmat, 1, sd),9,1) 
#outmin<-matrix(apply(allmat, 1, min),9,1)
#outmax<-matrix(apply(allmat, 1, max),9,1) 

#outstats<-matrix(cbind(outm,outmed,outsd,outmin,outmax),9,)


#outstats_df<-data.frame(outstats) 
#colnames(outstats_df)<-c("mean","median","std","min","max")
#rownames(outstats_df)<- c("local,forested","local,non-forested","non-local,forested",
#                          "non-local,non-forested","all local","all forested","all non-local",
#                          "all non-forested","all types")


# mean and 95% HPDI (main result)
##################################
#source("./functions/klaus_hpdi_v3.R")  #call the function

#lowvec<-matrix(0,9,1) #pre-assign vector for low bounds
#hivec<-matrix(0,9,1) #pre-assign vector for high bounds

#1000 bins should be enough for the HPDI function, 
#things don't change much boosting to 10,000, but MUCH slower

#int<-klaus_hpdi(yLF,0.05,5000)
#lowvec[1]<-int[[1]]  #note double-brackets since int is a list
#hivec[1]<-int[[2]]

#int<-klaus_hpdi(yLG,0.05,5000)
#lowvec[2]<-int[[1]]  #note double-brackets since int is a list
#hivec[2]<-int[[2]]

#int<-klaus_hpdi(yNF,0.05,5000)
#lowvec[3]<-int[[1]]  #note double-brackets since int is a list
#hivec[3]<-int[[2]]

#int<-klaus_hpdi(yNG,0.05,5000)
#lowvec[4]<-int[[1]]  #note double-brackets since int is a list
#hivec[4]<-int[[2]]

#int<-klaus_hpdi(yL,0.05,5000)
#lowvec[5]<-int[[1]]  #note double-brackets since int is a list
#hivec[5]<-int[[2]]

#int<-klaus_hpdi(yF,0.05,5000)
#lowvec[6]<-int[[1]]  #note double-brackets since int is a list
#hivec[6]<-int[[2]]

#int<-klaus_hpdi(yN,0.05,5000)
#lowvec[7]<-int[[1]]  #note double-brackets since int is a list
#hivec[7]<-int[[2]]

#int<-klaus_hpdi(yG,0.05,5000)
#lowvec[8]<-int[[1]]  #note double-brackets since int is a list
#hivec[8]<-int[[2]]

#int<-klaus_hpdi(yT,0.05,5000)
#lowvec[9]<-int[[1]]  #note double-brackets since int is a list
#hivec[9]<-int[[2]]

# capture q0, q1's for each case
################################################

# local wetland impact
q0L <- a*q0T - a_NWPR_perm*NWPR_perm*j - a_NWPR_temp*NWPR_temp*l
q1L <- a*q0T - a_Rapanos_perm*Rapanos_perm*j - a_Rapanos_temp*Rapanos_temp*l
d_qL <- q1L - q0L
# non-local wetland impact
q0N <- (1-a)*q0T - (1-a_NWPR_perm)*NWPR_perm*j - (1-a_NWPR_temp)*NWPR_temp*l
q1N <- (1-a)*q0T - (1-a_Rapanos_perm)*Rapanos_perm*j - (1-a_Rapanos_temp)*Rapanos_temp*l
d_qN <- q1N - q0N
# total wetland impact
q0_tot <- q0T - NWPR_perm*j - NWPR_temp*l
q1_tot <- q0T - Rapanos_perm*j - Rapanos_temp*l
d_q_tot <- q1_tot - q0_tot

#outq<-matrix(rbind(c(q0LF,q1LF),c(q0LG,q1LG),c(q0NF,q1NF),c(q0NG,q1NG)),4,2)
#outq_df<-data.frame(outq) 
#colnames(outq_df)<-c("q0","q1")
#rownames(outq_df)<- c("local,forested","local,non-forested","non-local,forested",
#                          "non-local,non-forested")

#collect everything in a list

################################################################################
# start to fill in matrix with low, mean, median, high HH WTP (low and high from 95% HPDI)
################################################################################
HH_WTP_MEAN_LOCAL[i,j] <- outm_all[1,1]
WETLAND_LOCAL[i,j] <- d_qL
HH_WTP_MEAN_NONLOCAL[i,j] <- outm_all[2,1]
WETLAND_NONLOCAL[i,j] <- d_qN
HH_WTP_MEAN_TOT[i,j] <- outm_all[3,1]
WETLAND_TOT[i,j] <- d_q_tot
 
r2<-ncol(betamat)  #re-set r2 since we re-defined it after truncation
} #end of loop over counties
  print(j)  #so we follow along as it loops
  j <- j + 1 # increment over years
  if (j > 5) {
    l <- l + 1 # start counting temporary impacts
  }
} # end of loop over years

# for writing HH WTP to csv
#write.csv(HH_WTP_MEAN,"./output final/2021 dollars/HH_WTP_52obs_State_mean_SS.csv")
#save(HH_WTP_MEAN, file = "./output final/2021 dollars/HH_WTP_52obs_State_mean_SS.rda")
#write.csv(HH_WTP_MEDIAN,"./output final/2021 dollars/HH_WTP_52obs_State_median_SS.csv")
#save(HH_WTP_MEDIAN, file = "./output final/2021 dollars/HH_WTP_52obs_State_median_SS.rda")

# create dataframes to place into sheets in excel
statename <- read_xlsx("./data/State_Numbering.xlsx") # to name the rows of data

HH_WTP_loc_df <- as.data.frame(cbind(statename, HH_WTP_MEAN_LOCAL))
HH_WTP_nonloc_df <- as.data.frame(cbind(statename, HH_WTP_MEAN_NONLOCAL))
HH_WTP_tot_df <- as.data.frame(cbind(statename, HH_WTP_MEAN_TOT))

wetland_loc_df <- as.data.frame(cbind(statename, WETLAND_LOCAL))
wetland_nonloc_df <- as.data.frame(cbind(statename, WETLAND_NONLOCAL))
wetland_tot_df <- as.data.frame(cbind(statename, WETLAND_TOT))

HH_WTPperacre_loc_df <- as.data.frame(cbind(statename, HH_WTP_MEAN_LOCAL/WETLAND_LOCAL))
HH_WTPperacre_nonloc_df <- as.data.frame(cbind(statename, HH_WTP_MEAN_NONLOCAL/WETLAND_NONLOCAL))
HH_WTPperacre_tot_df <- as.data.frame(cbind(statename, HH_WTP_MEAN_TOT/WETLAND_TOT))

wtp_names <- list('HH_WTPperacre_local' = HH_WTPperacre_loc_df, 'HH_WTP_local' = HH_WTP_loc_df, 'wetland_local' = wetland_loc_df, 
                  'HH_WTPperacre_nonlocal' = HH_WTPperacre_nonloc_df, 'HH_WTP_nonlocal' = HH_WTP_nonloc_df, 'wetland_nonlocal' = wetland_nonloc_df, 
                  'HH_WTPperacre_tot' = HH_WTPperacre_tot_df, 'HH_WTP_tot' = HH_WTP_tot_df, 'wetland_total' = wetland_tot_df)

openxlsx::write.xlsx(wtp_names, file = 'output final/2021 dollars/HH_WTP_all_52obs_30mi_State_mean_SS_11-2-22.xlsx')
save(HH_WTP_MEAN_LOCAL, HH_WTP_MEAN_NONLOCAL, HH_WTP_MEAN_TOT, WETLAND_LOCAL, WETLAND_NONLOCAL, WETLAND_TOT, file = "./output final/2021 dollars/HH_WTP_all_52obs_30mi_State_mean_SS_11-2-22.rda")

proc.time()-tic #capture run time

# RUNTIME = appx 10 minutes
#user  system elapsed 
#492.8   105.6   601.9

# load rda files so you don't need to run the loop again
#load("./output final/2021 dollars/HH_WTP_all_52obs_30mi_State_mean_SS_11-2-22.rda")

################################################################################
################################################################################
# STEP 8: Apply Woods and Pool or SEDAC Population Projections
################################################################################
################################################################################
#load("./data/HHdataFull_v2.rda")
# for SEDAC
load("./data/HHdataFull_SEDAC.rda")
pop_matrix <- HHpop

WTP_MEAN <- HH_WTP_MEAN_TOT*pop_matrix

################################################################################
################################################################################
# STEP 9: Apply TPV and Annualization
################################################################################
################################################################################
source("./functions/TPV_func.R")  #call the function
source("./functions/annualization.R")  #call the function

discount_3pct <- 0.03
discount_7pct <- 0.07
promulg_yr <- 2023
period <- 20
Year_vec = c(2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,
             2034,2035,2036,2037,2038,2039,2040,2041,2042)

#using mean WTP
TOT_BEN_MEAN_3pct <- tot_pv(WTP_MEAN, discount_3pct, Year_vec, promulg_yr)
AN_BEN_MEAN_3pct <- annualize(TOT_BEN_MEAN_3pct, discount_3pct, period, 1) 
TOT_BEN_MEAN_7pct <- tot_pv(WTP_MEAN, discount_7pct, Year_vec, promulg_yr)
AN_BEN_MEAN_7pct <- annualize(TOT_BEN_MEAN_7pct, discount_7pct, period, 1) 

WTP_mean_df <- as.data.frame(cbind(statename, TOT_BEN_MEAN_3pct, AN_BEN_MEAN_3pct, TOT_BEN_MEAN_7pct, AN_BEN_MEAN_7pct))

#define sheet names for each data frame
dataset_names <- list('Sheet1' = WTP_mean_df)

# for annualized benefits using SEDAC HH projections
openxlsx::write.xlsx(dataset_names, file = 'output final/2021 dollars/Total_Annualized_Benefits_52_obs_30mi_State_SS_SEDAC_11-2-22.xlsx')