## clean and commented
dochlmod <- function(dat.merge, modat0, fitout = NULL, runmod = T) {

    require(mgcv)
    require(rMR)
    require(mgcv)
    load("modairtemp.rda")  # model of mean air temperature from tempmod.R
    load("mod.watertemp.rda")
    load("modmintemp.rda")

    dat.merge$site.id.c <- factor(dat.merge$site.id.c)

    ## set number of MCMC chains here
    nchains <- 6

    ## parameters for criteria calculation
    lat.site <- 41.41
    lon.site <- -84.32
    elev.site <- 1350
    crittemp <- 19
    doc.site <- 7.2
    depth.site <- 13
    credint <- 0.8
    refuge.depth <- 0.3
    DOtarget <- 4

    ## function to compute saturation DO concentration
    DO.sat <- function (temp.C, elevation.m = NULL) {
        tk <- 273.15 + temp.C
        bar.press.atm <- exp(-9.80665*0.0289644*elevation.m/(8.31447*288.15))
        A1 <- -139.34411
        A2 <- 157570.1
        A3 <- 66423080
        A4 <- 1.2438e+10
        A5 <- 862194900000
        DO <- exp(A1 + (A2/tk) - (A3/(tk^2)) + (A4/(tk^3)) -
            (A5/(tk^4)))

        theta <- 0.000975 - temp.C * 1.426e-05 + (temp.C^2) * 6.436e-08
        u <- exp(11.8571 - (3840.7/tk) - (216961/(tk^2)))
        Fp <- ((bar.press.atm - u) * (1-(theta * bar.press.atm)))/
            ((1 - u) * (1 - theta))
        Cp <- DO *  Fp
        return(Cp)
    }

    ## Prepare raw data
    # drop littoral duplicates
    dat.merge <- subset(dat.merge, sample.type == "MICX")
    dat.merge.sav <- dat.merge
    # select variables used in the model
    dat.merge <- dat.merge[, c("site.id.c", "doc.result", "therm.sav", "chl",
                               "domean", "tempmean","tbot",
                               "do.hyp.fail", "do.hyp.pass",
                               "yday.x", "st.nla2012","year", "tmax",
                               "index.lat.dd", "index.lon.dd",
                               "index.site.depth",
                               "elevation", "area.ha", "tmean.pt",
                               "tmin.pt")]
    sitemean <- function(x, site, varname,dolog = F) {
        if (! dolog) {
            xm <- tapply(x, site, mean, na.rm = T)
        }
        else {
            xm <- tapply(x, site, function(x) mean(log(x), na.rm = T))
        }
        return(xm)
    }
    ## compute mean values of lake characteristics and merge
    varlist <- c("doc.result", "index.lat.dd", "tmean.pt", "tmin.pt",
                 "index.lon.dd", "elevation", "area.ha", "index.site.depth")
    matout <- matrix(NA, ncol = length(varlist),
                     nrow = length(levels(dat.merge$site.id.c)))
    dolog <- c(T, rep(F, times = 7))
    for (i in 1:length(varlist)) {
        matout[,i] <- sitemean(dat.merge[, varlist[i]],
                               dat.merge$site.id.c, dolog = dolog[i])
    }
    dat.mean <- data.frame(levels(dat.merge$site.id.c), matout)
    names(dat.mean) <- c("site.id.c", "docmean", "Lat", "Temp", "TempMin",
                         "Lon", "Elev", "Area", "Depth")
    dat.mean$LakeRatio <- (dat.mean$Area*10000)^0.25/dat.mean$Depth

    ## model tempmin and fill in missing values
    df2 <- na.omit(dat.mean[, c("Elev", "Lat", "Lon", "TempMin")])
    mod <- gam(TempMin ~ s(Lat, Lon, k = 50) + Elev, data = df2)
    predout <- predict(mod, dat.mean[, c("Lat", "Lon", "Elev")])
    dat.mean$TempMin <- predout
    dat.merge<-merge(dat.merge, dat.mean,by = "site.id.c")

    ## compute mean depth below thermocline
    dat.merge$hyp.depth <-dat.merge$Depth - dat.merge$therm.sav
    incvec <- dat.merge$hyp.depth < 0
    incvec[is.na(incvec)] <- F
    dat.merge$hyp.depth[incvec] <- NA # drop sites with negative hyp depth
    hypdepth.mn <- tapply(dat.merge$hyp.depth, dat.merge$site.id.c, mean,
                          na.rm = T)
    dftemp <- data.frame(site.id.c = names(hypdepth.mn),
                         hypdepth.mn = as.vector(hypdepth.mn))
    dat.merge <- merge(dat.merge, dftemp, by = "site.id.c")

    ## drop missing data
    incvec <- ! is.na(dat.merge$hyp.depth) & ! is.na(dat.merge$domean) &
        ! is.na(dat.merge$chl) & ! is.na(dat.merge$docmean)
    dat.merge <- dat.merge[incvec,]

    ## set bottom temperature less than 4 to 4 for the purposes
    ## of calculating beginning DO
    incvec <- dat.merge$TempMin < 4
    incvec[is.na(incvec)] <- F
    dat.merge$TempMin[incvec] <- 4

    ## get beginning DO concentration at turnover
    dat.merge$maxDO <- DO.sat(dat.merge$TempMin, elevation.m = dat.merge$Elev)

    ## select lakes that seasonally stratify based on lake ratio
    dat.merge <- subset(dat.merge, LakeRatio < 3)

    ## drop 7 partial profiles depth > 25 m and number of measurements < 16
    incvec <- (dat.merge$do.hyp.pass + dat.merge$do.hyp.fail < 16) &
        dat.merge$hyp.depth > 25
    dat.merge <- dat.merge[!incvec,]

    ## ND alkaline lake dropped, WA super saturated lake, IN metalimnetic DO max
    ## these are big outliers in the model
    droplist <- c("NLA12_ND-R17", "NLA12_WA-R10", "NLA12_IN-131")
    for (i in droplist) {
        incvec <- dat.merge$site.id.c == i
        if (sum(incvec) == 1){
            dat.merge <- dat.merge[!incvec,]
        }
    }

    ## compute adjusted latitude for finding dimictic lakes
    lat0 <- seq(0, 90, by = 10)
    adj <- c(0.27, 0.31, 0.34, 0.39, 0.46, 0.54, 0.68, 0.89, 1.3, 2.4)
    dat.merge$adjloc <- approx(lat0, adj, dat.merge$Lat)$y
    dat.merge$adjlat <- dat.merge$Lat + dat.merge$adjloc*dat.merge$Elev/100

    dat.merge <- subset(dat.merge, adjlat > 40) # drop monomictic lakes

    ## standardize variables for model
    dat.merge$chl <- log(dat.merge$chl)
    varlist <- c("hypdepth.mn", "docmean", "chl", "yday.x", "Temp")
    mnsav <- apply(dat.merge[, varlist], 2, mean)
    sdsav <- apply(dat.merge[, varlist], 2, sd)
    for (i in varlist) {
        dat.merge[,paste(i, "sc", sep = ".")] <- (dat.merge[,i] - mnsav[i])/sdsav[i]
    }

    dat.merge$site.id.c <- factor(dat.merge$site.id.c)
    dat.merge$sitenum <- as.numeric(dat.merge$site.id.c)

    ## sitedata
    dfsite <- unique.data.frame(dat.merge[, c("sitenum", "hypdepth.mn.sc",
                                           "docmean.sc","maxDO",
                                           "Lat", "Temp.sc")])
    dfsite <- dfsite[order(dfsite$sitenum),]

    # output chl and DOm data
    dffull0 <- dat.merge
#    save(dffull0, file = "dffull0.rda")

    ## split data into missing (DO < 2) or observed
    incvec <- dat.merge$domean < 2
    dftemp1 <- dat.merge[!incvec,]
    dftemp2 <- dat.merge[incvec,]

    df1 <- modat0

    ## get mean air temperature at MO lakes using Lat, Lon, and Elev
    df1$Lon <- df1$Long
    df1$Elev <- df1$elev*0.3048
    df1$Temp <- predict(modairtemp, df1)

    ## standardize MO data by nla vals
    df1$chlmean.sc <- (df1$chlmean - mnsav["chl"])/sdsav["chl"]
    df1$docmean.sc <- (df1$docmean - mnsav["docmean"])/sdsav["docmean"]
    df1$yday.sc <- (df1$yday - mnsav["yday.x"])/sdsav["yday.x"]
    df1$depth.hm.sc<- (df1$depth.hm - mnsav["hypdepth.mn"])/sdsav["hypdepth.mn"]
    df1$Temp.sc <- (df1$Temp - mnsav["Temp"])/sdsav["Temp"]

    ## get state average value for Temp and elev to inform maxDO for MO data
    dftemp.state <- unique.data.frame(dat.merge.all[, c("site.id.c",
                                                        "st.nla2012")])
    dftemp.state <-merge(dftemp.state, dat.mean[, c("site.id.c", "TempMin",
                                                    "Elev")])
    incvec <- dftemp.state$st.nla2012 == "MO"
    mo.mn <- apply(dftemp.state[incvec, c("TempMin", "Elev")], 2, mean)
    mo.maxDO <- DO.sat(mo.mn["TempMin"], elevation.m = mo.mn["Elev"])

    ## set up site ids
    df1$id <- factor(as.character(df1$id))
    df1$sitenum <- as.numeric(df1$id)
    df1$idyear <- factor(df1$idyear)
    df1$siteyrnum <- as.numeric(df1$idyear)

    lookup0 <- unique.data.frame(df1[, c("siteyrnum", "Temp.sc")])
    lookup0 <- lookup0[order(lookup0$siteyrnum),]

    dfsite0 <- unique.data.frame(df1[, c("sitenum", "depth.hm.sc", "chlmean.sc",
                                         "docmean.sc", "Temp.sc")])
    dfsite0 <- dfsite0[order(dfsite0$sitenum),]

    datstan <- list(n = c(nrow(dftemp1), nrow(dftemp2), nrow(dat.merge)),
                    nsite = nrow(dfsite),
                    chl = dat.merge$chl.sc,
                    temp = dfsite$Temp.sc,
                    depth = dfsite$hypdepth.mn.sc,
                    doc = dfsite$docmean.sc,
                    dmax = dfsite$maxDO,
                    t1 = dftemp1$yday.x.sc,
                    t2 = dftemp2$yday.x.sc,
                    domean = dftemp1$domean,
                    sitenum = dat.merge$sitenum,
                    sitenum1 = dftemp1$sitenum,
                    sitenum2 = dftemp2$sitenum,
                    n0 = nrow(df1),
                    t0 = df1$yday.sc,
                    domean0 = df1$domean.all,
                    dmax0 = mo.maxDO,
                    nsite0 = max(dfsite0$sitenum),
                    chl0 = dfsite0$chlmean.sc,
                    temp0 = as.vector(lookup0$Temp.sc),
                    depth0 = dfsite0$depth.hm.sc,
                    doc0 = dfsite0$docmean.sc,
                    sitenum0 = df1$sitenum,
                    nsiteyr0 = max(df1$siteyrnum),
                    siteyrnum0 = df1$siteyrnum)


    print(str(datstan))

    ## bayesian model code
    modstan <- '
         data {
             int n[3];                    // N above 2, below 2, and all
             int nsite;                   // number of samples
             vector[n[3]] chl;            // sample Chl

             vector[nsite] temp;          // mean air temperature
             vector[nsite] depth;         // depth below thermocline
             vector[nsite] doc;           // mean doc
             vector[nsite] dmax;          // maximum DO

             vector[n[1]] t1;             // sample day above 2s
             vector[n[2]] t2;             // sample day below 2s
             vector[n[1]] domean;         // mean DO
             int sitenum[n[3]];           // sitenums for different samples
             int sitenum1[n[1]];
             int sitenum2[n[2]];

             int n0;                      // N samples for MO
             vector[n0] t0;               // sampling day for MO
             vector[n0] domean0;          // mean DO for MO
             real dmax0;                  // maximum DO for MO
             int nsite0;                  // N sites
             int nsiteyr0;                // N site-year
             vector[nsiteyr0] temp0;      // Air temperature
             vector[nsite0] chl0;         // Chl
             vector[nsite0] depth0;       // depth below thermocline
             vector[nsite0] doc0;         // mean DOC
             int sitenum0[n0];
             int siteyrnum0[n0];

         }
         parameters {
             real muchl;                       // model for mean site chl
             real<lower =0> sigchl[2];         // variance terms for chl model
             vector[nsite] etachl;

             real mud[4];                      // Coefficients for VOD model
             vector<upper = 2>[n[2]] domean2;  // Estimates for censored DO

             real<upper = 0> b[2];             // Coefficients for strat day model
             real<lower = 0> sigt;             // Variance in strat day model
             vector[nsite] etat;
             vector[nsiteyr0] etat0;
             real<lower = 0> sigt0;

             real<lower = 0> sig;              // variance in predicted DO
             real<lower = 0> sig0;             // variance in predicted DO MO
             real<lower = 0> sigd;            // variance in VOD
             vector[nsite0] etad;
         }
         transformed parameters {
             vector[nsite] chlsite;
             vector[nsite] tstart;
             vector[n[1]] dopred1;
             vector[n[2]] dopred2;
             vector[nsiteyr0] tstart0;

             vector[n0] dopred0;
             vector[nsite0] d2;

             chlsite = muchl + etachl*sigchl[1];

             tstart = b[1] + b[2]*temp + etat*sigt;  // strat day model
             tstart0 = b[1] + b[2]*temp0 + etat0*sigt0; // strat day for MO

             // VOD model in MO
             d2 = -exp(mud[1]) + mud[2]*chl0 + mud[3]*depth0+mud[4]*doc0 + etad*sigd;

             // Predictions of mean DO in MO
             dopred0 = dmax0 +d2[sitenum0] .* (t0 - tstart0[siteyrnum0]);
             // Eqn 10 and 12, model for VOD and DO
             dopred1 = dmax[sitenum1] + (-exp(mud[1]) + mud[2]*chlsite[sitenum1] +
                            mud[3]*depth[sitenum1] + mud[4]*doc[sitenum1]).*
                            (t1 - tstart[sitenum1]);
             // Same model equation for DO < 2mg/L
             dopred2 = dmax[sitenum2] + (-exp(mud[1]) + mud[2]*chlsite[sitenum2] +
                            mud[3]*depth[sitenum2] + mud[4]*doc[sitenum2]).*
                            (t2 - tstart[sitenum2]);
         }
         model {
             muchl ~ normal(0,3);
             sigchl ~ cauchy(0,3);
             etachl ~ normal(0,1);

             sig ~ cauchy(0,3);

             // Priors for strat day based on Demers and Kalff coef scaled
             // Scaled by centering applied to mean annual temeprature
             b[1] ~ normal(-3.6, 2);
             b[2] ~ normal(-0.71, 0.4);
             // Priors for variance in strat day based on Demers and Kalff
             // scaled
             sigt ~ normal(0.78, 0.3);
             sigt0 ~ normal(0.78, 0.3);

             etat ~ normal(0,1);
             etat0 ~ normal(0,1);

             mud ~ normal(0,2);

             etad ~ normal(0,1);
             sigd ~ cauchy(0,2);

             sig0 ~ cauchy(0,3);

             // sampling statements
             chl ~ normal(chlsite[sitenum], sigchl[2]);
             domean ~ normal(dopred1,  sig);
             domean2 ~ normal(dopred2, sig);
             domean0 ~ normal(dopred0, sig0);

         }
         generated quantities {
             vector[n[1]] dop;     // predictive posterior national
             vector[n0] do0p;      // predictive posterior MO
             for (i in 1:n[1]) dop[i] = normal_rng(dopred1[i], sig);
             for (i in 1:n0) do0p[i] = normal_rng(dopred0[i], sig0);
         }
     '


    if (runmod) {
        require(rstan)
        rstan_options(auto_write = TRUE)
        nchains <- 6
        options(mc.cores = nchains)

        fit <- stan(model_code = modstan, data = datstan,
                    iter = 900, chains = nchains, warmup = 300,
                    control = list(max_treedepth = 15,
                                   adapt_delta = 0.9), thin = 2)

        return(fit)
    }
    else {
        ## plot median posterior prediction vs. observation
        varout.p <- extract(fitout, pars = c("do0p", "dop"))
        varout <- extract(fitout, pars = c("b", "mud"))
        dopmed <- apply(varout.p$dop, 2, median)
        do0pmed <- apply(varout.p$do0p, 2, median)
        dev.new()
        par(mar = c(4,4,1,1), mfrow = c(1,2), mgp = c(2.3,1,0),
            bty = "l")
        plot(dopmed, dftemp1$domean, xlab = "Predicted DO",
             ylab = "Observed DO", pch = 21, col = "grey39",
             bg = "white", main = "NLA")
        abline(0,1)
        plot(do0pmed, df1$domean.all, xlab = "Predicted DO",
             ylab = "Observed DO", pch = 21, col= "grey39",
             bg = "white", main = "MO")
        abline(0,1)

        ## Computations to derive a Chl criteria.
        ## Requires lake location to estimate stratification start
        ## date, release of temperature constraint in surface waters
        ##
        ## compute spring stratification data from air temp based on
        ## lake location and model parameters
        new.data <- data.frame(Lat = lat.site, Lon = lon.site,
                               Elev = elev.site)
        airtemp.raw<- predict(modairtemp, new.data)
        airtemp <- (airtemp.raw  - mnsav["Temp"])/sdsav["Temp"]
        ## compute estimate of spring strat day
        stratstart.sc <- varout$b[,1] +varout$b[,2]*as.vector(airtemp)

        ## get predicted surface water temperature by day for selected location
        ## only computing temps when temps start declining
        yday.all <- seq(204, 290, by = 1)
        new.data <- data.frame(yday.x = yday.all)
        new.data$Lat <- lat.site
        new.data$Lon <- lon.site
        new.data$Elev <- elev.site
        predout.all <- predict(mod.watertemp, new.data, se.fit = T)
        up <- predout.all$fit + predout.all$se.fit
        dn <- predout.all$fit - predout.all$se.fit
        endday <- approx(predout.all$fit,yday.all, crittemp)$y
        endday.up <- approx(up, yday.all, crittemp)$y
        endday.dn <- approx(dn, yday.all, crittemp)$y
        ## estimate of SD from half of +/- se.fit
        val <- c(endday, 0.5*(endday.up - endday.dn))

        endday.sc <- (endday -  mnsav["yday.x"])/sdsav["yday.x"]
        endday.se <- val[2]/sdsav["yday.x"]
        ## scaled number of days between start of stratification
        ## and temperature release in surface layer
        ## Distribution of values available from posterior coefs for
        ## stratstart.  Create a normal distribution for
        ## predicted temperature release based on mean = endday.sc
        ## and sd = endday.se
        nsamp <- length(stratstart.sc)
        drange <- rnorm(nsamp, mean = endday.sc, sd = endday.se) - stratstart.sc

        ## compute initial DO from location
        new.data <- data.frame(Lat = lat.site, Lon = lon.site,
                               Elev = elev.site)
        tempmin <- predict(modmintemp, new.data)
        maxDO <- DO.sat(as.vector(tempmin), elevation.m = elev.site)

        # convert other model inputs to scaled values
        doc.pick <- (log(doc.site) - mnsav["docmean"])/sdsav["docmean"]
        depth.pick <- (depth.site-mnsav["hypdepth.mn"])/
            sdsav["hypdepth.mn"]

        x.sc <- seq(min(dat.merge$chl.sc), max(dat.merge$chl.sc), length = 40)
        predout <- matrix(NA, ncol = 3, nrow = length(x.sc))

        for (k in 1:length(x.sc)) {
            y <- maxDO + (-exp(varout$mud[,1]) + varout$mud[,2]*x.sc[k]+
                               varout$mud[,3]*depth.pick +
                                   varout$mud[,4]*doc.pick)*drange

            predout[k,] <- quantile(y, prob = c(0.5*(1-credint), 0.5,
                                           1-0.5*(1-credint)))
        }

        ## compute depth-averaged DO target from DO threshold
        ## and refugia depth
        ## DO in surface layer based on critical temperature
        maxDO <- DO.sat(crittemp, elev.site)
        DOtarg.da <- 0.5*maxDO^2/(maxDO-DOtarget)*refuge.depth/depth.site
        DOtarg.da

        dev.new()
        par(mar = c(4,4,1,1), mgp = c(2.3,1,0), bty = "l")
        xraw <- x.sc*sdsav["chl"] + mnsav["chl"]
        plot(xraw, predout[,2], type = "n", ylim = c(0, max(predout)),
             axes = F, xlab = expression(Chl~italic(a)~(mu*g/L)),
             ylab = expression(DO[m]~(mg/L)))
        logtick.exp(0.001, 10, c(1), c(F,F))
        axis(2)
        polygon(c(xraw, rev(xraw)),
                c(predout[,1], rev(predout[,3])), col = "grey", border = NA)
        lines(xraw, predout[,2])
        abline(h = DOtarg.da, lty = "dashed")
        chlcrit <- approx(predout[,1], xraw, DOtarg.da)$y
        cat("Chl criterion:", round(exp(chlcrit), digits = 1), "\n")


    }
}

## runmod variable set to T to run simulation and set to F to
##  run post processing.
#fitout <- dochlmod(dat.merge.all, modat0, runmod = T)

## post process
dochlmod(dat.merge.all, modat0, fitout, runmod = F)
