## 12.9.2019
## Cleaned and commented
## Inputs:
## dat.merge.all: master data file
chlzoopmodel <- function(df1, varout = NULL, runmod = T) {

    require(rstan)
    nchains <- 6           # number of chains for mcmc simulation

    ## specify parameters here to calculate criteria
    credint <- 0.99         # credible interval for criteria derivation
    targ <- 0.0              # targeted threshold
    j0 <- 3             # select depth class for output (1-3)

   # source("binv.R")
   # source("logtick.exp.R")
    ## drop zero chl
    incvec <- df1$chl == 0
    incvec[is.na(incvec)] <- F
    df1$chl[incvec] <- NA

    ## drop samples missing phyt biov, chl, or zoop biomass
    incvec <- ! is.na(df1$chl) & ! is.na(df1$biov.tot) & ! is.na(df1$zbiomass)
    df1 <- df1[incvec,]

    ## drop samples missing depth
    incvec <- is.na(df1$index.site.depth)
    df1 <- df1[!incvec,]

    ## drop HI (not in conterminous US)
    incvec <- df1$st.nla2012 == "HI"
    df1 <- df1[!incvec,]

    ## drop very shallow lakes
    incvec <- df1$index.site.depth > 1.6
    df1<- df1[incvec,]

    ## compute the mean zbiomass and zdensity at each
    ## site.id.c and visit no. Cleans out duplicate entries of zoop
    ## at littoral and index samples while
    ## retaining samples in which only littoral zone phytoplankton is
    ## available with an index zoop sample
    df1$sitevis <- paste(df1$site.id.c, df1$visit.no, sep = "--")
    zbiomass.mn <- tapply(df1$zbiomass, df1$sitevis, mean)
    zdensity.mn <- tapply(df1$zdensity, df1$sitevis, mean)
    df2 <- data.frame(sitevis = names(zbiomass.mn),
                      zbiomass = as.vector(zbiomass.mn),
                      zdensity = as.vector(zdensity.mn))

    # recover site.id.c and visit.no in df2
    w <- regexpr("--", as.character(df2$sitevis))
    df2$site.id.c <- substring(as.character(df2$sitevis), 1, w-1)
    nch0 <- nchar(as.character(df2$sitevis))
    df2$visit.no <- as.numeric(substring(as.character(df2$sitevis),
                                         nch0, nch0))

    # log transform and center
    df1$biov.log <- log(df1$biov.tot)
    mn.biov <- mean(df1$biov.log)
    df1$biov.log <- df1$biov.log - mn.biov

    df1$chl.log <- log(df1$chl)
    mn.chl <- mean(df1$chl.log)
    df1$chl.log <- df1$chl.log - mn.chl

    df2$zbiomass <- log(df2$zbiomass)
    mn.zbiomass <- mean(df2$zbiomass)
    df2$zbiomass <- df2$zbiomass - mn.zbiomass

    df2$zdensity <- log(df2$zdensity)
    mn.zdensity <- mean(df2$zdensity)
    df2$zdensity <- df2$zdensity - mn.zdensity

    mnval <- c(mn.biov, mn.chl, mn.zdensity, mn.zbiomass)
    names(mnval) <- c("biov", "chl", "zdensity", "zbiomass")

    ## set sitenums
    df1$site.id.c <- factor(df1$site.id.c)
    df1$sitenum <- as.numeric(df1$site.id.c)
    dflookup <- unique.data.frame(df1[, c("site.id.c", "sitenum")])

    ## merge sitenums back into zoop data
    df2 <- merge(df2, dflookup[, c("site.id.c", "sitenum")], by = "site.id.c")

    ## define depth classes
    dfdeep <- unique.data.frame(df1[, c("sitenum", "index.site.depth")])
    ngrp <- 3
    cutp <- quantile(dfdeep$index.site.depth, prob = seq(0, 1, length =ngrp + 1))
    cutm <- 0.5*(cutp[-1] + cutp[-length(cutp)])
    dfdeep$depthclass <- cut(dfdeep$index.site.depth, cutp, include.lowest = T)
    dfdeep$depthnum <- as.numeric(dfdeep$depthclass)
    dfdeep <- dfdeep[order(dfdeep$sitenum),]
    print("Depth classes and N within each")
    print(table(dfdeep$depthclass))

    zoopdat <- merge(df2, dfdeep, by = "sitenum")
    ## save files for shiny
    save(zoopdat, mnval, cutp, file = "shinydat.rda")

    xnew <- seq(from = min(df1$chl.log), to = max(df1$chl.log), length = 60)

    datstan <- list(nsamp = nrow(df1),
                    nsite = max(df1$sitenum),
                    sitenum = df1$sitenum,
                    biov = df1$biov.log,
                    chl = df1$chl.log,
                    nz = nrow(df2),
                    zoop = df2$zdensity,
                    zoopb = df2$zbiomass,
                    sitenumz = df2$sitenum,
                    ngrp = max(dfdeep$depthnum),
                    depthf = dfdeep$depthnum)


    print(str(datstan))

    modstan <- '
        data {
           int nsite;              // number of distinct sites
           int nsamp;              // number of distinct samples
           vector[nsamp] chl;      // chl
           vector[nsamp] biov;     // biovolume
           int sitenum[nsamp];     // sitenums associated with each sample
           int nz;                 // number of zoop samples
           vector[nz] zoop;        // zoop density
           vector[nz] zoopb;       // zoop biomass
           int sitenumz[nz];       // zoop sitenum
           int depthf[nsite];      // depth class for each site
           int ngrp;               // number of depth classes
        }
        parameters {
           real<lower = 0> sigk;     // sd of chl about biov
           real<lower = 0> sig[2];   // biov sd among site, biov sd within site
           vector[nsite] eta1;

           real<lower = 0> sigz[2]; //zoop density and biomass sd

           matrix[ngrp,3] a;
           vector[ngrp] cp;
           matrix[ngrp,3] f;
           vector[ngrp] b;
        }
        transformed parameters {
           vector[nsite] biov_site;   // estimate mean phyto biov at site
           vector[nsite] zoopmean;    // estimated mean site z density
           vector[nsite] zoopbmean;   // estimate mean site z biomass
           vector[ngrp] g;

           biov_site = sig[1]*eta1;

           g = f[,3] - f[,2];

           // Eqn (6)
           for (i in 1:nsite) zoopmean[i] = a[depthf[i],1] +
                     a[depthf[i],2]*log(1+exp(-(biov_site[i] -
                       cp[depthf[i]])/exp(b[depthf[i]]))) +
                     a[depthf[i],3]*biov_site[i];
          // Eqn (5)
          for (i in 1:nsite) zoopbmean[i] = f[depthf[i],1] +
                      f[depthf[i], 3]*biov_site[i] +
                      g[depthf[i]]*exp(b[depthf[i]])*log(1+exp(-(biov_site[i]-
                        cp[depthf[i]])/exp(b[depthf[i]])));
        }
        model {
           b ~ normal(0, 1);
           sigk ~ cauchy(0,3);
           eta1 ~ normal(0,1);
           sig ~ normal(0,2);

           cp[1] ~ normal(2,0.5); //priors for breakpoints from explor analysis
           cp[2] ~ normal(2, 0.5);
           cp[3] ~ normal(-0.5, 0.5);
           for (i in 1:3) a[,i] ~ normal(0,3);

           f[,1] ~ normal(0,3);
           f[,2] ~ normal(1, 0.2); // prior for initial slope is based on
                                   // z/p = k in oligo lakes
           f[,3] ~ normal(0,3);
           sigz ~ cauchy(0,3);

           biov ~ student_t(4,biov_site[sitenum], sig[2]);  // Eqn (3)
           chl ~ normal(biov_site[sitenum], sigk);  // Eqn (1)
           zoopb ~ student_t(4,zoopbmean[sitenumz], sigz[2]); // Eqn (8)
           zoop ~ student_t(4,zoopmean[sitenumz], sigz[1]); // Eqn (7)
        }
    '


    if (runmod) {

        rstan_options(auto_write = TRUE)
        options(mc.cores = nchains)

        fit <- stan(model_code = modstan, data = datstan,
                    chains = nchains, iter = 2000, warmup = 500,
                    control = list(adapt_delta = 0.95,
                        max_treedepth = 12), thin = 5)
        return(fit)
    }


    grey.t <- adjustcolor("grey39", alpha.f = 0.5)

    ## compute site mean values for z biomass and density for plotting
    zbiomass <- tapply(df2$zbiomass, df2$sitenum, mean) + mnval["zbiomass"]
    zdensity <- tapply(df2$zdensity, df2$sitenum, mean) + mnval["zdensity"]

    ## compute mean estimated site biov
    chl.site.obs <- tapply(df1$chl.log, df1$sitenum, mean)
    biov.site.obs <- tapply(df1$biov.log, df1$sitenum, mean)

    dobiom <- F  # set to true to plot Chl and biov comparisons (Fig 5)
    if (dobiom) {
        png(width = 4.5, height = 2.25, pointsize = 7, units = "in", res = 600,
            file = "chl.p.png")
        biov.site <- apply(varout$biov_site, 2, mean)
        par(mar = c(4,4,1,1), mfrow = c(1,2), mgp = c(2.3,1,0))
        plot(biov.site + mnval["biov"] + log(1.e-6),
             biov.site.obs + mnval["biov"] + log(1.e-6),
             pch = 21, col = "grey", bg = "white", axes = F,
         xlab = expression(Mean~biovolume~(mm^3/L)),
             ylab = expression(Measured~biovolume~(mm^3/L)))
        logtick.exp(0.001, 10, c(1,2), c(F,F))
        abline(0,1)
        plot(biov.site + mnval["biov"] + log(1.e-6), chl.site.obs + mnval["chl"],
             pch = 21, col = "grey", bg = "white", axes = F,
             xlab = expression(Mean~biovolume~(mm^3/L)),
             ylab = expression(Chl~italic(a)~(mu*g/L)))
        logtick.exp(0.001, 10, c(1,2), c(F,F))
        abline(-mnval["biov"] + mnval["chl"] - log(1.e-6), 1)
        dev.off()
    }

    dftemp <- data.frame(biov.site = biov.site.obs,
                         zbiomass = zbiomass,
                         zdensity = zdensity,
                         sitenum = 1:length(zbiomass),
                         chl.site = chl.site.obs)
    dftemp <- merge(dftemp, dfdeep, by = "sitenum")

    nit <- nrow(varout$cp)

    zooppred <- matrix(NA, ncol = nit, nrow = length(xnew))
    biomasspred <- matrix(NA, ncol = nit, nrow = length(xnew))

    # compute binned data for each group
    bout1 <- as.list(rep(NA, times = ngrp))
    bout2 <- as.list(rep(NA, times = ngrp))
    for (j in 1:ngrp) {
        incvec <- dftemp$depthnum == j
        bout1[[j]] <- binv(dftemp$chl.site[incvec] + mnval["chl"],
                           dftemp$zbiomass[incvec], 5) # average 5 measurements for each bin
        bout2[[j]] <- binv(dftemp$chl.site[incvec] + mnval["chl"],
                           dftemp$zdensity[incvec], 5)

    }
    xlim1 <- range(sapply(bout1, function(x) range(x$xb)))
    ylim1 <- range(sapply(bout1, function(x) range(x$yb)))
    xlim2 <- range(sapply(bout2, function(x) range(x$xb)))
    ylim2 <- range(sapply(bout2, function(x) range(x$yb)))


    png(width = 6.5*2/3, height = 2, pointsize = 8, units = "in", res = 600,
        file = "zoop.orig.png")
#    dev.new()  # Figure 6 in document
    par(mar = c(4,4,1,1), mfrow = c(1,2), mgp = c(2.3,1,0))

    for (j in j0) {
        # compute predictions
        for (i in 1:nrow(zooppred)) {
            zooppred[i,] <- varout$a[,j,1] +
                varout$a[,j,2]*log(1+exp(-(xnew[i] - varout$cp[,j])/
                                         exp(varout$b[,j]))) +
                    varout$a[,j,3]*xnew[i]
            biomasspred[i,] <- varout$f[,j,1] +
                varout$f[,j,3]*xnew[i] +
                (varout$f[,j,3] - varout$f[,j,2])*exp(varout$b[,j])*
                    log(1+exp(-(xnew[i] - varout$cp[,j])/exp(varout$b[,j])))
        }

        zoop.q <- apply(zooppred, 1, quantile, prob = c(0.5*(1-credint), 0.5,
                                                   1-0.5*(1-credint))) +
            mnval["zdensity"]

        zoopb.q <- apply(biomasspred, 1, quantile, prob = c(0.5*(1-credint),
                                               0.5, 1- 0.5*(1-credint))) +
            mnval["zbiomass"]

        ynew.raw <- xnew +  mnval["chl"]
        xnew.raw <- xnew + mnval["chl"]

        ## plot zoop density vs. Chl
        dodens <- F
        if (dodens) {
            plot(bout2[[j]]$xb, bout2[[j]]$yb, xlim=xlim2, ylim = ylim2,
                 xlab = expression(Chl~italic(a)~(mu*g/L)),
                 ylab = "Zooplankton abundance (count/L)", axes = F,
                 pch = 21, col = "grey", bg = "white")
            logtick.exp(0.00001, 10, c(1,2), c(F,F))
            polygon(c(ynew.raw,rev(ynew.raw)), c(zoop.q[1,],rev(zoop.q[3,])),
                    col = grey.t, border = NA)
            lines(ynew.raw,zoop.q[2,])
        }
        ## plot zoop biomass vs. Chl
        plot(bout1[[j]]$xb, bout1[[j]]$yb,xlim = xlim1,ylim = ylim1,
             xlab = expression(Chl~italic(a)~(mu*g/L)),
             ylab = expression(Zooplankton~biomass~(mu*g/L)), axes = F,
             pch = 21, col = "grey", bg = "white", type = "p")
        logtick.exp(0.00001, 10, c(1,2), c(F,F))
        polygon(c(ynew.raw,rev(ynew.raw)), c(zoopb.q[1,],rev(zoopb.q[3,])),
                col = grey.t, border = NA)
        lines(ynew.raw,zoopb.q[2,])

        ## plot slope vs.Chl
        delt <- xnew.raw[2]-xnew.raw[1]
        zoopgrad <- matrix(NA, ncol = nit, nrow = length(xnew)-1)
        for (i in 1:nrow(zoopgrad)) {
            zoopgrad[i,] <- (biomasspred[i+1,] - biomasspred[i,])/delt
        }

        xmean <- 0.5*(xnew.raw[-1] + xnew.raw[-length(xnew.raw)])
        ymean <- 0.5*(ynew.raw[-1] + ynew.raw[-length(ynew.raw)])
        zoopgq <- apply(zoopgrad, 1, quantile,prob = c(0.5*(1-credint),
                                                  0.5, 1-0.5*(1-credint)))

        plot(ymean, zoopgq[2,],  ylim = range(zoopgq),
             type = "n", axes = F,
             ylab = expression(Slope~(Delta~log(zoop)/Delta~log(phyt))),
             xlab = expression(Chl~italic(a)~(mu*g/L)))
        logtick.exp(0.0001, 10, c(1), c(F,F))
        axis(2)
        lines(ymean, zoopgq[1,], lty = "dashed")
        lines(ymean, zoopgq[3,], lty = "dashed")
        lines(ymean,zoopgq[2,])
        abline(h = targ, lty = "dotted")


        critout <- exp(approx(zoopgq[1,], ymean, targ)$y)

        cat("Criterion value:", critout, "\n")

    }
    dev.off()
    return()

}

## uncomment next statement to run MCMC sim
fitout <- chlzoopmodel(dat.merge.all, runmod = T)

## uncomment next two statements to post process
#varout <- extract(fitout, pars = c("a",  "f",  "cp", "biov_site", "b"))
#chlzoopmodel(dat.merge.all, varout, runmod = F)

