# 12.9.2019
# Cleaned and commented
# Inputs:
# dat.merge.all: master data file
chlzoopmodel <- function(df1, fit = NULL, runmod = T) {
    ## Set the number of chains for MCMC simulation. This also sets the
    ## number of processors used to run the simulation (see below).
    nchains <- 6

    require(rstan)
    # select one of the following parameters as true to make a six panel plot
    # of the model results
    domassplot <- T     # for biomass plots
    dodensityplot <- F   # for zoop density plots
    doslpplot <- T # for biomass slope plots

    # drop zero chl
    incvec <- df1$chl == 0
    incvec[is.na(incvec)] <- F
    df1$chl[incvec] <- NA

    # drop samples missing phyt biov, chl, or zoop biomass
    incvec <- ! is.na(df1$chl) & ! is.na(df1$biov.tot) & ! is.na(df1$zbiomass)
    df1 <- df1[incvec,]

    # drop samples missing depth
    incvec <- is.na(df1$index.site.depth)
    df1 <- df1[!incvec,]

    # compute the mean zbiomass and zdensity at each
    # site.id.c and visit no. Cleans out duplicate entries of zoop
    # at littoral and index samples while
    # retaining samples in which only littoral zone phytoplankton is
    # available with an index zoop sample
    df1$sitevis <- paste(df1$site.id.c, df1$visit.no, sep = "--")
    zbiomass.mn <- tapply(df1$zbiomass, df1$sitevis, mean)
    zdensity.mn <- tapply(df1$zdensity, df1$sitevis, mean)
    df2 <- data.frame(sitevis = names(zbiomass.mn),
                      zbiomass = as.vector(zbiomass.mn),
                      zdensity = as.vector(zdensity.mn))

    # recover site.id.c and visit.no in df2
    w <- regexpr("--", as.character(df2$sitevis))
    df2$site.id.c <- substring(as.character(df2$sitevis), 1, w-1)
    nch0 <- nchar(as.character(df2$sitevis))
    df2$visit.no <- as.numeric(substring(as.character(df2$sitevis),
                                         nch0, nch0))

    # log transform and center
    df1$biov.log <- log(df1$biov.tot)
    mn.biov <- mean(df1$biov.log)
    df1$biov.log <- df1$biov.log - mn.biov

    df1$chl.log <- log(df1$chl)
    mn.chl <- mean(df1$chl.log)
    df1$chl.log <- df1$chl.log - mn.chl

    df2$zbiomass <- log(df2$zbiomass)
    mn.zbiomass <- mean(df2$zbiomass)
    df2$zbiomass <- df2$zbiomass - mn.zbiomass

    df2$zdensity <- log(df2$zdensity)
    mn.zdensity <- mean(df2$zdensity)
    df2$zdensity <- df2$zdensity - mn.zdensity

    mnval <- c(mn.biov, mn.chl, mn.zdensity, mn.zbiomass)
    names(mnval) <- c("biov", "chl", "zdensity", "zbiomass")

    # set sitenums
    df1$site.id.c <- factor(df1$site.id.c)
    df1$sitenum <- as.numeric(df1$site.id.c)
    dflookup <- unique.data.frame(df1[, c("site.id.c", "sitenum")])

    # merge sitenums back into zoop data
    df2 <- merge(df2, dflookup[, c("site.id.c", "sitenum")], by = "site.id.c")

    # define depth classes
    dfdeep <- unique.data.frame(df1[, c("sitenum", "index.site.depth")])
    ngrp <- 3
    cutp <- quantile(dfdeep$index.site.depth, prob = seq(0, 1, length =ngrp + 1))
    cutm <- 0.5*(cutp[-1] + cutp[-length(cutp)])
    dfdeep$depthclass <- cut(dfdeep$index.site.depth, cutp, include.lowest = T)
    dfdeep$depthnum <- as.numeric(dfdeep$depthclass)
    print("Depth classes and N within each")
    print(table(dfdeep$depthclass))

    zoopdat <- merge(df2, dfdeep, by = "sitenum")
    # save files for shiny
    save(zoopdat, mnval, cutp, file = "shinydat.rda")

    xnew <- seq(from = min(df1$chl.log), to = max(df1$chl.log), length = 60)

    datstan <- list(nsamp = nrow(df1),
                    nsite = max(df1$sitenum),
                    sitenum = df1$sitenum,
                    chl = df1$chl.log,
                    nz = nrow(df2),
                    zoop = df2$zdensity,
                    zoopb = df2$zbiomass,
                    sitenumz = df2$sitenum,
                    ngrp = max(dfdeep$depthnum),
                    depthf = dfdeep$depthnum,
                    b1 = 1,
                    b2 = 1)

    print(str(datstan))

    modstan <- '
        data {
           int nsite;              // number of distinct sites
           int nsamp;              // number of distinct samples
           vector[nsamp] chl;      // chl
           int sitenum[nsamp];     // sitenums associated with each sample
           int nz;                 // number of zoop samples
           vector[nz] zoop;        // zoop density
           vector[nz] zoopb;       // zoop biomass
           int sitenumz[nz];       // zoop sitenum
           int depthf[nsite];      // depth class for each site
           int ngrp;               // number of depth classes
           real b1;
           real b2;
        }
        parameters {
           real<lower = 0> sigk;     // sd of chl about biov
           real<lower = 0> sig;   // biov sd among site
           vector[nsite] eta1;

           real<lower = 0> sigz[2]; //zoop density and biomass sd

           matrix[ngrp,3] a;
           vector[ngrp] cp;
           matrix[ngrp,3] f;
        }
        transformed parameters {
           vector[nsite] biov_site;   // estimate mean phyto biov at site
           vector[nsite] zoopmean;    // estimated mean site z density
           vector[nsite] zoopbmean;   // estimate mean site z biomass

           biov_site = sig*eta1;

           for (i in 1:nsite) zoopmean[i] = a[depthf[i],1] +
                     a[depthf[i],2]*log(1+exp(-(biov_site[i] -
                       cp[depthf[i]])/b1)) +
                     a[depthf[i],3]*biov_site[i];

          for (i in 1:nsite) zoopbmean[i] = f[depthf[i],1] +
                      f[depthf[i], 3]*biov_site[i] +
                      f[depthf[i], 2]*b2*log(1+exp(-(biov_site[i]-
                        cp[depthf[i]])/b2));
        }
        model {
           sigk ~ cauchy(0,3);
           eta1 ~ normal(0,1);
           sig ~ normal(0,2);

           cp ~ normal(0,0.5);
           for (i in 1:3) a[,i] ~ normal(0,3);

           f[,1] ~ normal(0,2);
           f[,2] ~ normal(-1, 0.5); // prior for initial slope is based on
                                   // z/p = k in oligo lakes
           f[,3] ~ normal(0,3);
           sigz ~ cauchy(0,3);

           chl ~ normal(biov_site[sitenum], sigk);
           zoopb ~ normal(zoopbmean[sitenumz], sigz[2]);
           zoop ~ normal(zoopmean[sitenumz], sigz[1]);
        }
    '


    if (runmod) {

        rstan_options(auto_write = TRUE)
        options(mc.cores = nchains)

        fit <- stan(model_code = modstan, data = datstan,
                    chains = nchains, iter = 900, warmup = 300,
                    control = list(adapt_delta = 0.9,
                        max_treedepth = 12), thin = 2)
        return(fit)
    }

    source("logtick.exp.R")
    source("binv.R")

    varout <- extract(fit, pars = c("a", "f",  "cp", "biov_site"))

    grey.t <- adjustcolor("grey39", alpha.f = 0.5)

    # compute site mean values for z biomass and density for plotting
    zbiomass <- tapply(df2$zbiomass, df2$sitenum, mean) + mnval["zbiomass"]
    zdensity <- tapply(df2$zdensity, df2$sitenum, mean) + mnval["zdensity"]

    # compute mean estimated site biov
    biov.site <- tapply(df1$chl.log, df1$sitenum, mean)

    dftemp <- data.frame(biov.site = biov.site,
                         zbiomass = zbiomass,
                         zdensity = zdensity,
                         sitenum = 1:length(zbiomass))
    dftemp <- merge(dftemp, dfdeep, by = "sitenum")

    nit <- nrow(varout$cp)

    zooppred <- matrix(NA, ncol = nit, nrow = length(xnew))
    biomasspred <- matrix(NA, ncol = nit, nrow = length(xnew))

    # compute binned data for each group
    source("binv.R")
    bout1 <- as.list(rep(NA, times = ngrp))
    bout2 <- as.list(rep(NA, times = ngrp))
    for (j in 1:ngrp) {
        incvec <- dftemp$depthnum == j
        bout1[[j]] <- binv(dftemp$biov.site[incvec] + mnval["chl"],
                           dftemp$zbiomass[incvec], 5) # average 5 measurements for each bin
        bout2[[j]] <- binv(dftemp$biov.site[incvec] + mnval["chl"],
                           dftemp$zdensity[incvec], 5)

    }
    xlim1 <- range(sapply(bout1[[j]]$xb, range))
    ylim1 <- range(sapply(bout1[[j]]$yb, range))
    xlim2 <- range(sapply(bout2[[j]]$xb, range))
    ylim2 <- range(sapply(bout2[[j]]$yb, range))


#    png(width = 6.5, height = 2, pointsize = 8, units = "in", res = 600,
#        file = "zoopchl.fig.png")
    dev.new()
    par(mar = c(4,4,1,1), mfrow = c(2,3), mgp = c(2.3,1,0))

    critout <- matrix(NA, ncol = 2, nrow = ngrp) # storage space for candidate criteria

    b1 <- 1
    b2 <- 1

    for (j in 1:ngrp) {
        # compute predictions
        for (i in 1:nrow(zooppred)) {
            zooppred[i,] <- varout$a[,j,1] +
                varout$a[,j,2]*log(1+exp(-(xnew[i] - varout$cp[,j])/b1)) +
                    varout$a[,j,3]*xnew[i]
            biomasspred[i,] <- varout$f[,j,1] +
                varout$f[,j,3]*xnew[i] +
                varout$f[,j,2]*b2*
                    log(1+exp(-(xnew[i] - varout$cp[,j])/b2))
        }

        zoop.q <- apply(zooppred, 1, quantile, prob = c(0.1, 0.5, 0.9)) +
            mnval["zdensity"]

        zoopb.q <- apply(biomasspred, 1, quantile, prob = c(0.1, 0.5, 0.9)) +
            mnval["zbiomass"]

        ynew.raw <- xnew +  mnval["chl"]
        xnew.raw <- xnew + mnval["chl"]

        if (dodensityplot) {
            plot(bout2[[j]]$xb, bout2[[j]]$yb,
                 xlab = expression(Chl~italic(a)~(mu*g/L)),
                 ylab = "Zooplankton abundance (count/L)", axes = F,
                 pch = 21, col = "grey", bg = "white")
            logtick.exp(0.00001, 10, c(1,2), c(F,F))
            polygon(c(ynew.raw,rev(ynew.raw)), c(zoop.q[1,],rev(zoop.q[3,])),
                    col = grey.t, border = NA)
            lines(ynew.raw,zoop.q[2,])
        }

        if (domassplot) {
            plot(bout1[[j]]$xb, bout1[[j]]$yb,
                 xlab = expression(Chl~italic(a)~(mu*g/L)),
                 ylab = expression(Zooplankton~biomass~(mu*g/L)), axes = F,
                 pch = 21, col = "grey", bg = "white", type = "p")
            logtick.exp(0.00001, 10, c(1,2), c(F,F))
            polygon(c(ynew.raw,rev(ynew.raw)), c(zoopb.q[1,],rev(zoopb.q[3,])),
                    col = grey.t, border = NA)
            lines(ynew.raw,zoopb.q[2,])
        }
        if (doslpplot) {

            delt <- xnew.raw[2]-xnew.raw[1]
            zoopgrad <- matrix(NA, ncol = nit, nrow = length(xnew)-1)
            for (i in 1:nrow(zoopgrad)) {
                zoopgrad[i,] <- (biomasspred[i+1,] - biomasspred[i,])/delt
            }

            xmean <- 0.5*(xnew.raw[-1] + xnew.raw[-length(xnew.raw)])
            ymean <- 0.5*(ynew.raw[-1] + ynew.raw[-length(ynew.raw)])
            zoopgq <- apply(zoopgrad, 1, quantile,prob = c(0.10, 0.5, 0.75))

            plot(ymean, zoopgq[2,],  ylim = range(zoopgq),
                 type = "n", axes = F,
                 ylab = expression(Slope~(Delta~log(zoop)/Delta~log(phyt))),
                 xlab = expression(Chl~italic(a)~(mu*g/L)))
            logtick.exp(0.0001, 10, c(1), c(F,F))
            axis(2)
            lines(ymean, zoopgq[1,], lty = "dashed")
            lines(ymean, zoopgq[3,], lty = "dashed")
            lines(ymean,zoopgq[2,])
            abline(h = 0.0, lty = "dotted")

            targ <- 0
            critout[j,1] <- exp(approx(zoopgq[1,], ymean, targ)$y)
            critout[j,2] <- exp(approx(zoopgq[2,], ymean, targ)$y)

        }
    }
    if (doslpplot) {
        print(critout)
    }

#    dev.off()




}


## run model and save to fitout
## runmod variable set to T to run simulation and set to F to
##  run post processing.
fitout <- chlzoopmodel(dat.merge.all, runmod = T)

## post process
chlzoopmodel(dat.merge.all, fitout, runmod = F)
