################################################################################
################################################################################
# 
# MRM2 model including Canadian studies, incl. Rudd et al.
# SSVS
# Prep script for Chib method
################################################################################
################################################################################

# This is for the pooled model with 52 observations and local defined using
# a 30-mile threshold (can get MRM results with original local definition using 
# wetlanddataFeb2022.rda)
  
# Preliminary housekeeping
################################################################################
# set working directory to keep file paths short
setwd('C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Wetland MRM - R Code/')

rm(list = ls(all = TRUE)) #first, clear R's workspace
options(prompt = "R> ", digits = 4) # show 4 digits by default
options(continue=" ") #remove continuation prompt
#
set.seed(37)  #sets the random number generator so we can reproduce results
#
tic<-proc.time() #start stop watch
#
# load packages we'll need
############################
library("xtable")
library("MASS")   #for draws from the multivariate normal
library("stats")  #for draws from the gamma
library("mvtnorm")#for draws from the multivariate t
library("MCMCpack") #for draws from the inverse gamma
library("openxlsx") #to copy output to excel

################################################################################
#Load and Prepare Data
################################################################################
#
# read tab-delimited text data (easiest way to load)
# header=TRUE also loads variable names
#load("./data/wetlanddataFeb2022_corrected.rda") # set 'local' = 1 for study ID 102 and 109 (the 30-mile definition case)
#load("./data/wetlanddataFeb2022.rda") # original 'local' definition

load("./data/wetlanddataJune2022_corrected.rda") # set 'local' = 1 for study ID 102 and 109 (the 30-mile definition case)
#load("./data/wetlanddataJune2022.rda") # original 'local' definition

# % contents of data
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   studyid         original unique study id
# % 2   obsid           original unique observation id
# % 3   lnwtp           log of wtp2021
# % 4   wlfresh         1=freshwater wetland, 0 = salt/coastal
# % 5   lnyear          log of yr of data collection - 1988
# % 6   lninc           log of inc2021
# % 7   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 8   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 9   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 10  CAN             1 = Canadian study
# % 11  local           f1= local scale (target pop at sub-state level)
# % 12  prov            1=provisioning function affected
# % 13  reg             1=regulating function affected
# % 14  cult            1=cultural function affected
# % 15  forest          1=forested wetland            
# % 16  q0              baseline acreage
# % 17  q1              policy acreage
# % 18  volunt          1= pay mech = voluntary contribution
# % 19  lumpsum         1=single payment
# % 20  ce              choice experiment
# % 21  nrev            1=not peer-reviewed
# % 22  median          1=orig. wtp based on econometric medians
# % 23  q0alt           alternative definition of baseline acres - see notes
# % 24  q1alt           alternative definition of policy acres - see notes

# only included in 'wetlanddataFeb2022_wGeo_corrected'.
# % 25  wetdens         affected waterbody area divided by area of geographic scope
# % 26  distdec         population-weighted age distance of all CBGs (or counties) in samples area to affected water body

# note: missing values for wetdens and distdec coded as "-999"

# Drop Rudd et al
################################
# data<-subset(data,data[,1]!=128)

# Drop Kim & Petrolia 2013 observations (replaced with Petrolia & Kim 2011)
################################
data<-subset(data,data[,1]!=110)

attach(data)

# keep freshwater only
###############################
#data<-subset(data,data[,4]==1)

# define y and X
###############################
q0<-data[,16]/1000
q1<-data[,17]/1000

y<-as.matrix(data[,3]-log(q1-q0))
n<-length(y)

X<-as.matrix(cbind(rep(1,n),data[,5:15],(q0+q1)/2, data[,18:19]))

# % contents of X
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   const           constant
# % 2   lnyear          log of yr of data collection - 1988
# % 3   lninc           log of inc2017
# % 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 7   CAN             1 = Canadian study
# % 8   local           f1= local scale (target pop at sub-state level)
# % 9   prov            1=provisioning function affected
# % 10  reg             1=regulating function affected
# % 11  cult            1=cultural function affected
# % 12  forest          1=forested wetland
# % 13  (q0+q1)/2
# % 14  volunt          1= pay mech = voluntary contribution
# % 15  lumpsum         1=single payment


k1<-13 #context-defining
k2<-2 #nuisance
k3<-9 #interactions (I checked in stata to make sure these are identified)


# Interaction terms
#########################
fresh<-data[,4] # grabs the freshwater variable
nf<-matrix(1-fresh) # uses it to create the saltwater variable and creates a matrix
int<-matrix(rep(nf,k3),nrow(nf),) # creates a matrix that will be multiplied by the 9 interaction terms

# creates a matrix of the variables to be interacted with:
# constant, lnyear, lninc, sagulf, local, prov, reg, cult, ((q0+q1)/2) 
Xadd<-as.matrix(cbind(X[,1:4],X[,8:11],X[,13]))

add<-as.matrix(int*Xadd) # creates the interactions by taking the product of the two matrices.
X<-as.matrix(cbind(X,add)) # now redefines X to include the interactions

# mean check
tt<-as.matrix(colMeans(X)) #OK, identical to Matlab

# contents of augmented X
######################################################
# 1   const           constant
# 2   lnyear          log of yr of data collection - 1988
# 3   lninc           log of inc2021
# 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# 7   CAN             1 = Canadian study
# 8   local           f1= local scale (target pop at sub-state level)
# 9   prov            1=provisioning function affected
# 10  reg             1=regulating function affected
# 11  cult            1=cultural function affected
# 12  forest          1=forested wetland
# 13  (q0+q1)/2
# 14  volunt          1= pay mech = voluntary contribution
# 15  lumpsum         1=single payment

# 16  const*salt
# 17  lnyear*salt
# 18  lninc*salt
# 19  sagulf*salt
# 20  local*salt
# 21  prov*salt
# 22  reg*salt
# 23  cult*salt
# 24  (q0+q1)/2* salt

k<-ncol(X)


################################################################################
# Starting values, priors, and tuners
################################################################################

r1<-0  #burn-ins - don't need any for Chib
r2<-100000  #keepers
R<-r1+r2
  
# generic OLS to get starting values
####################################
bols<-solve((t(X)) %*% X) %*% (t(X) %*% y)
e<-y-X%*%bols 
SSR<-(t(e)%*%e)
s2<-(t(e)%*%e)/(n-k) 
  
#PRIORS:
###################
#for beta:
mu0<-rep(0,k)
V0<-10*diag(k)

#for sig2:
v0<-1/2
tau0<-1/2


# Load GS results
######################
#load("./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_v2.rda")
#load("./output using Feb22 data/MRM2_CAN_52obs_SSVS_v2.rda")

load("./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_v2.rda")
#load("./output using June22 data/MRM2_CAN_52obs_SSVS_v2.rda")
################################################################################
# call Chib function
################################################################################
#I generally adopt the name of the original GS and add "_chib"
source("./functions/gs_normal_independent_chib.R")  #call the function

int<-gs_normal_independent_chib(X,y,n,k,r1,r2,mu0,V0,betadraw,v0,tau0,sig2draw,betamat,sig2mat)

# contents of int:
# logpmarg
# lp
# llf
# lpost


################################################################################
# output table
################################################################################
# coefficients, followed by sig2
chibout<-as.data.frame(as.matrix(int)) #4 by 1
colnames(chibout)<-c("chib output")
rownames(chibout)<- c("log-marg-lh","log prior", "log-lh", "log-posterior")

# you can now open the table in the "Global Environment" window and copy/paste it
# to Excel etc.

################################################################################
# save your output for further processing
################################################################################
#save(chibout,file = "./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_chib_v2.rda")

################################################################################
# OR send directly to excel
################################################################################
#write.xlsx(chibout,"./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_chib_v2.xlsx",row.names=TRUE)
#write.xlsx(chibout,"./output using Feb22 data/MRM2_CAN_52obs_SSVS_chib_v2.xlsx",row.names=TRUE)

write.xlsx(chibout,"./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_chib_v2.xlsx",row.names=TRUE)
#write.xlsx(chibout,"./output using June22 data/MRM2_CAN_52obs_SSVS_chib_v2.xlsx",row.names=TRUE)

proc.time()-tic #capture run time

