################################################################################
################################################################################
# 
# MRM2 model including Canadian studies, but not Rudd et al.
# Prep script for Chib method
################################################################################
################################################################################

# Note: to run the entire script, select everything, then click on "Run"
# otherwise "Run" will execute only the selected line
  
# Preliminary housekeeping
################################################################################
# set working directory to keep file paths short
setwd('C:/Users/55338/ICF/WOTUS Reconsideration (ICF Internal Site) - General/Wetland Meta-Analysis/Wetland MRM - R Code/')

rm(list = ls(all = TRUE)) #first, clear R's workspace
options(prompt = "R> ", digits = 4) # show 4 digits by default
options(continue=" ") #remove continuation prompt
#
set.seed(37)  #sets the random number generator so we can reproduce results
#
tic<-proc.time() #start stop watch
#
# load packages we'll need
############################
library("xtable")
library("MASS")   #for draws from the multivariate normal
library("stats")  #for draws from the gamma
library("mvtnorm")#for draws from the multivariate t
library("MCMCpack") #for draws from the inverse gamma
library("openxlsx") #to copy output to excel

################################################################################
#Load and Prepare Data
################################################################################
#
# read tab-delimited text data (easiest way to load)
# header=TRUE also loads variable names
#data<- read.table('./data/wetlanddataFeb2022.txt', sep="\t", header=TRUE)
data<- read.table('./data/wetlanddataJune2022_corrected.txt', sep="\t", header=TRUE)
#
# save in R-format, for future loading
#save(data, file = "./data/wetlanddataFeb2022.rda")
save(data, file = "./data/wetlanddataJune2022.rda")

# % contents of data
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   studyid         original unique study id
# % 2   obsid           original unique observation id
# % 3   lnwtp           log of wtp2021
# % 4   wlfresh         1=freshwater wetland, 0 = salt/coastal
# % 5   lnyear          log of yr of data collection - 1988
# % 6   lninc           log of inc2021
# % 7   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 8   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 9   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 10  CAN             1 = Canadian study
# % 11  local           f1= local scale (target pop at sub-state level)
# % 12  prov            1=provisioning function affected
# % 13  reg             1=regulating function affected
# % 14  cult            1=cultural function affected
# % 15  forest          1=forested wetland            
# % 16  q0              baseline acreage
# % 17  q1              policy acreage
# % 18  volunt          1= pay mech = voluntary contribution
# % 19  lumpsum         1=single payment
# % 20  ce              choice experiment
# % 21  nrev            1=not peer-reviewed
# % 22  median          1=orig. wtp based on econometric medians
# % 23  q0alt           alternative definition of baseline acres - see notes
# % 24  q1alt           alternative definition of policy acres - see notes
# % 25  wetdens         affected waterbody area divided by area of geographic scope
# % 26  distdec         population-weighted age distance of all CBGs (or counties) in samples area to affected water body

# note: missing values for wetdens and distdec coded as "-999"

# Drop Rudd et al observations
################################
#data<-subset(data,data[,1]!=128)

# Drop Hindsley and Yoskowitz observations (remove comment to drop)
################################
#data<-subset(data,data[,1]!=130)

# Drop Nijhum thesis observations (remove comment to drop)
################################
#data<-subset(data,data[,1]!=132)

# keep freshwater only
###############################
data<-subset(data,data[,4]==1)

# define y and X
###############################
q0<-data[,16]/1000
q1<-data[,17]/1000

y<-as.matrix(data[,3]-log(q1-q0))
n<-length(y)

X<-as.matrix(cbind(rep(1,n),data[,5:15],(q0+q1)/2, data[18:19]))
k<-ncol(X)

# % contents of X
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# % 1   const           constant
# % 2   lnyear          log of yr of data collection - 1988
# % 3   lninc           log of inc2021
# % 4   sagulf          1 = S-Atlantic/Gulf, study involves AL,GA,SC,LA
# % 5   nema            1 = ME/mid-Atlantic, study involves DE,MD,NJ,PA,RI
# % 6   nmw             N/Mid-West, study involves KY,MI,NE,OH,WI
# % 7   CAN             1 = Canadian study
# % 8   local           f1= local scale (target pop at sub-state level)
# % 9   prov            1=provisioning function affected
# % 10  reg             1=regulating function affected
# % 11  cult            1=cultural function affected
# % 12  forest          1=forested wetland
# % 13  (q0+q1)/2
# % 14  volunt          1= pay mech = voluntary contribution
# % 15  lumpsum         1=single payment

################################################################################
# Starting values, priors, and tuners
################################################################################

r1<-0  #burn-ins - don't need any for Chib
r2<-100000  #keepers
R<-r1+r2
  
# generic OLS to get starting values
####################################
bols<-solve((t(X)) %*% X) %*% (t(X) %*% y)
e<-y-X%*%bols 
SSR<-(t(e)%*%e)
s2<-(t(e)%*%e)/(n-k) 
  
#PRIORS:
###################
#for beta:
mu0<-rep(0,k)
V0<-10*diag(k)

#for sig2:
v0<-1/2
tau0<-1/2


# Load GS results
######################
#load("./output using Feb22 data/MRM2_CAN_32obs.rda")
load("./output using June22 data/MRM2_CAN_32obs.rda")


################################################################################
# call Chib function
################################################################################
#I generally adopt the name of the original GS and add "_chib"
source("./functions/gs_normal_independent_chib.R")  #call the function

int<-gs_normal_independent_chib(X,y,n,k,r1,r2,mu0,V0,betadraw,v0,tau0,sig2draw,betamat,sig2mat)

# contents of int:
# logpmarg
# lp
# llf
# lpost


################################################################################
# output table
################################################################################
# coefficients, followed by sig2
chibout<-as.data.frame(as.matrix(int)) #4 by 1
colnames(chibout)<-c("chib output")
rownames(chibout)<- c("log-marg-lh","log prior", "log-lh", "log-posterior")

# you can now open the table in the "Global Environment" window and copy/paste it
# to Excel etc.

################################################################################
# save your output for further processing
################################################################################
#save(chibout,file = "./output using Feb22 data/MRM2_CAN_32obs_chib.rda")
save(chibout,file = "./output using June22 data/MRM2_CAN_32obs_chib.rda")

################################################################################
# OR send directly to excel
################################################################################
#write.xlsx(chibout,"./output using Feb22 data/MRM2_CAN_32obs_chib.xlsx",row.names=TRUE)
write.xlsx(chibout,"./output using June22 data/MRM2_CAN_32obs_chib.xlsx",row.names=TRUE)

proc.time()-tic #capture run time

