################################################################################
################################################################################
# 
# MRM2 model including Canadian studies, incl. Rudd et al.
# SSVS
################################################################################
################################################################################

# This is for the pooled model with 52 observations and local defined using
# a 30-mile threshold. Can get MRM results with original local definition using 
# wetlanddataFeb2022.rda. To do this, swap lines 39 for 40, 262 for 263, and 285-6
# for 288-9.
  
# Preliminary housekeeping
################################################################################
# set working directory to keep file paths short
setwd('C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Wetland MRM - R Code/')

rm(list = ls(all = TRUE)) #first, clear R's workspace
options(prompt = "R> ", digits = 4) # show 4 digits by default
options(continue=" ") #remove continuation prompt
#
set.seed(37)  #sets the random number generator so we can reproduce results
#
tic<-proc.time() #start stop watch
#
# load packages we'll need
############################
library("xtable")
library("MASS")   #for draws from the multivariate normal
library("stats")  #for draws from the gamma
library("mvtnorm")#for draws from the multivariate t
library("MCMCpack") #for draws from the inverse gamma
library("openxlsx") #to copy output to excel

################################################################################
#Load and Prepare Data
################################################################################
#
#load("./data/wetlanddataFeb2022_corrected.rda") # set 'local' = 1 for study ID 102 and 109 (the 30-mile definition case)
#load("./data/wetlanddataFeb2022.rda") # original 'local' definition

load("./data/wetlanddataJune2022_corrected.rda") # set 'local' = 1 for study ID 102 and 109 (the 30-mile definition case)
#load("./data/wetlanddataJune2022.rda") # original 'local' definition

# % contents of data
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   studyid         original unique study id
# % 2   obsid           original unique observation id
# % 3   lnwtp           log of wtp2021
# % 4   wlfresh         1=freshwater wetland, 0 = salt/coastal
# % 5   lnyear          log of yr of data collection - 1988
# % 6   lninc           log of inc2021
# % 7   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 8   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 9   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 10  CAN             1 = Canadian study
# % 11  local           f1= local scale (target pop at sub-state level)
# % 12  prov            1=provisioning function affected
# % 13  reg             1=regulating function affected
# % 14  cult            1=cultural function affected
# % 15  forest          1=forested wetland            
# % 16  q0              baseline acreage
# % 17  q1              policy acreage
# % 18  volunt          1= pay mech = voluntary contribution
# % 19  lumpsum         1=single payment
# % 20  ce              choice experiment
# % 21  nrev            1=not peer-reviewed
# % 22  median          1=orig. wtp based on econometric medians
# % 23  q0alt           alternative definition of baseline acres - see notes
# % 24  q1alt           alternative definition of policy acres - see notes

# only included in 'wetlanddataFeb2022_wGeo_corrected'.
# % 25  wetdens         affected waterbody area divided by area of geographic scope
# % 26  distdec         population-weighted age distance of all CBGs (or counties) in samples area to affected water body

# note: missing values for wetdens and distdec coded as "-999"

# Drop Rudd et al
################################
# data<-subset(data,data[,1]!=128)

# Drop Kim & Petrolia 2013 observations (replaced with Petrolia & Kim 2011)
################################
data<-subset(data,data[,1]!=110)

attach(data)

# keep freshwater only
###############################
#data<-subset(data,data[,4]==1)

# define y and X
###############################
q0<-data[,16]/1000
q1<-data[,17]/1000

y<-as.matrix(data[,3]-log(q1-q0))
n<-length(y)

X<-as.matrix(cbind(rep(1,n),data[,5:15],(q0+q1)/2, data[,18:19]))
k<-ncol(X)

# % contents of X
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   const           constant
# % 2   lnyear          log of yr of data collection - 1988
# % 3   lninc           log of inc2017
# % 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 7   CAN             1 = Canadian study
# % 8   local           f1= local scale (target pop at sub-state level)
# % 9   prov            1=provisioning function affected
# % 10  reg             1=regulating function affected
# % 11  cult            1=cultural function affected
# % 12  forest          1=forested wetland
# % 13  (q0+q1)/2
# % 14  volunt          1= pay mech = voluntary contribution
# % 15  lumpsum         1=single payment


k1<-13 #context-defining
k2<-2 #nuisance
k3<-9 #interactions (I checked in stata to make sure these are identified)


# Interaction terms
#########################
fresh<-data[,4] # grabs the freshwater variable
nf<-matrix(1-fresh) # uses it to create the saltwater variable and creates a matrix
int<-matrix(rep(nf,k3),nrow(nf),) # creates a matrix that will be multiplied by the 9 interaction terms

# creates a matrix of the variables to be interacted with:
# constant, lnyear, lninc, sagulf, local, prov, reg, cult, ((q0+q1)/2) 
Xadd<-as.matrix(cbind(X[,1:4],X[,8:11],X[,13]))

add<-as.matrix(int*Xadd) # creates the interactions by taking the product of the two matrices.
X<-as.matrix(cbind(X,add)) # now redefines X to include the interactions

# mean check
tt<-as.matrix(colMeans(X)) #OK, identical to Matlab

# contents of augmented X
######################################################
# 1   const           constant
# 2   lnyear          log of yr of data collection - 1988
# 3   lninc           log of inc2017
# 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# 7   CAN             1 = Canadian study
# 8   local           f1= local scale (target pop at sub-state level)
# 9   prov            1=provisioning function affected
# 10  reg             1=regulating function affected
# 11  cult            1=cultural function affected
# 12  forest          1=forested wetland
# 13  (q0+q1)/2
# 14  volunt          1= pay mech = voluntary contribution
# 15  lumpsum         1=single payment

# 16  const*salt
# 17  lnyear*salt
# 18  lninc*salt
# 19  sagulf*salt
# 20  local*salt
# 21  prov*salt
# 22  reg*salt
# 23  cult*salt
# 24  (q0+q1)/2* salt

k<-ncol(X)

Xf<-X[,1:k1] # context variables
Me<-X[,(k1+1):(k1+k2)] # nuisance variables
Z<-X[,(k1+k2+1):k] # interactions

################################################################################
# Starting values, priors, and tuners
################################################################################

r1<-500000  #burn-ins
r2<-100000  #keepers
R<-r1+r2
  
# generic OLS to get starting values
####################################
bols<-solve((t(X)) %*% X) %*% (t(X) %*% y)
e<-y-X%*%bols 
SSR<-(t(e)%*%e)
s2<-(t(e)%*%e)/(n-k) 


#SSVS tuners
####################################
p<-0.5 #prior probability that interaction term comes from a "healthy" distribution
t2<-0.01 #variance of degenerate (close-to-zero) distribution
c2<-10/t2 #c2 * t2 = 10 = variance of the healthy distribution
  
#PRIORS for coefficients:
##########################
#for beta:
mu0<-rep(0,k) #prior mean for all coefficients
Vx<-as.matrix(rep(1,k1+k2)*(c2*t2)) #prior variance for fixed elements of X

gamvec<-as.matrix(rbinom(k3,1,p)) #draw a 0/1 value for each interaction coefficient
int<-gamvec*c2*t2 + (1-gamvec)*t2 #set starting variance for each interaction term
dvec<-as.vector(rbind(Vx,int)) #combine prior variances for main effects and interactions
# note: "as.vector" is crucial here, else diag() EXTRACTS a diagonal, instead of creating a diag. matrix
# stupid R-ism...
V0<-diag(dvec) #looks OK, k by k, will be updated in the GS

#for sig2:
#######################################
v0<-1/2
tau0<-1/2


# STARTING VALUES
######################
betadraw<-bols
sig2draw<-as.vector(s2)

################################################################################
# call Gibbs Sampler function and run sampler
################################################################################

source("./functions/gs_MRM2_ssvs.R")  #call the function

int<-gs_MRM2_ssvs(X,y,k1,k2,k3,k,n,r1,r2,mu0,Vx,V0,betadraw,v0,tau0,sig2draw,p,c2,t2)
# run the sampler - you should see a flurry of counters in the console,
# in steps of 1000's

betamat<-int[[1]]
sig2mat<-int[[2]]
phimat<-int[[3]]


################################################################################
# get convergence diagnostics
################################################################################

M<-rbind(betamat,sig2mat)

source("./functions/kdiagnostics_greater0.R")  #call the function
diagnostics=kdiagnostics_greater0(M) #apply the function

################################################################################
# output table
################################################################################
# coefficients, followed by sig2
#################################
metaout_df<-data.frame(rbind(diagnostics[1:(k1+k2),],diagnostics[(k+1),])) 
colnames(metaout_df)<-c("post. mean","post.std","p(>0)","nse","IEF","M*","CD")
rownames(metaout_df)<- c("const","lnyear","lninc","sagulf","nema","nmw", "CAN","local",
                         "prov","reg","cult","forest","(q0+q1/2)","volunt","lumpsum","sig2")

# includes interaction terms (for tables)
metaout_df_tbl<-data.frame(diagnostics[1:(k+1),]) 
colnames(metaout_df_tbl)<-c("post. mean","post.std","p(>0)","nse","IEF","M*","CD")
rownames(metaout_df_tbl)<- c("const","lnyear","lninc","sagulf","nema","nmw", "CAN","local",
                  "prov","reg","cult","forest","(q0+q1/2)","volunt","lumpsum",
                  "const*salt", "lnyear*salt", "lninc*salt", "sagulf*salt",
                  "local*salt", "prov*salt", "reg*salt", "cult*salt", "(q0+q1)/2*salt",
                  "sig2")
#write.xlsx(metaout_df_tbl,"./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_main_tbl_v2.xlsx", row.names=TRUE)
#write.xlsx(metaout_df_tbl,"./output using Feb22 data/MRM2_CAN_52obs_SSVS_main_tbl_v2.xlsx", row.names=TRUE)

#write.xlsx(metaout_df_tbl,"./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_main_tbl_v2.xlsx", row.names=TRUE)
write.xlsx(metaout_df_tbl,"./output using June22 data/MRM2_CAN_52obs_SSVS_main_tbl_v2.xlsx", row.names=TRUE)

#inclusion probabilities
############################
phiout_df<-data.frame(rowMeans(phimat))
colnames(phiout_df)<-c("prob(in)")
rownames(phiout_df)<- c("const*salt","lnyear*salt","lninc*salt",
                        "sagulf*salt","local*salt","prov*salt",
                        "reg*salt","cult*salt","(q0+q1)/2* salt")

################################################################################
# save your output for further processing
################################################################################
#save(X,y,betamat,sig2mat,phimat,metaout_df,phiout_df,file = "./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_v2.rda")
#save(X,y,betamat,sig2mat,phimat,metaout_df,phiout_df,file = "./output using Feb22 data/MRM2_CAN_52obs_SSVS_v2.rda")

save(X,y,betamat,sig2mat,phimat,metaout_df,phiout_df,file = "./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_v2.rda")
#save(X,y,betamat,sig2mat,phimat,metaout_df,phiout_df,file = "./output using June22 data/MRM2_CAN_52obs_SSVS_v2.rda")

################################################################################
# OR send directly to excel
################################################################################
#write.xlsx(metaout_df,"./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_main_v2.xlsx", row.names=TRUE)
#write.xlsx(phiout_df,"./output using Feb22 corrected data/MRM2_CAN_52obs_SSVS_30mile_phi_v2.xlsx", row.names=TRUE)

#write.xlsx(metaout_df,"./output using Feb22 data/MRM2_CAN_52obs_SSVS_main_v2.xlsx", row.names=TRUE)
#write.xlsx(phiout_df,"./output using Feb22 data/MRM2_CAN_52obs_SSVS_phi_v2.xlsx", row.names=TRUE)

write.xlsx(metaout_df,"./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_main_v2.xlsx", row.names=TRUE)
write.xlsx(phiout_df,"./output using June22 corrected data/MRM2_CAN_52obs_SSVS_30mile_phi_v2.xlsx", row.names=TRUE)

#write.xlsx(metaout_df,"./output using June22 data/MRM2_CAN_52obs_SSVS_main_v2.xlsx", row.names=TRUE)
#write.xlsx(phiout_df,"./output using June22 data/MRM2_CAN_52obs_SSVS_phi_v2.xlsx", row.names=TRUE)


proc.time()-tic #capture run time
