################################################
################################################
########## Hierarchical bayesian model #########
##### J. Papaix, E. Walker and V. Journé #######
################################################
################################################
################################################

#clean space
rm(list  = ls())

#specify workind dir 
#setwd("/home/vjourne/Documents/Upload/")
#setwd("/media/journe/DDlabo/Thesis/Bayesian/modele/version4_CASTANEA/Upload/")
#Library bayesian analysis
library(rjags) #here Jags 4.10
library(coda)
#for small analysis of HBM 
library(forcats)
library(ggmcmc)

########################
#load dataset
#see metadatafile for variable description and the main text article
#data from F. Courbet and F. Lefevre - INRAE URFM
#simulation castanea from H. Davi - INRAE URFM
data <- read.csv("Dataset/Dataset_cedrus_atlantica.csv",header=T,sep=";")

########################
#create JAGS model file
#see main text and supporting information for parameters description
#note: Jags use precision rather than variance (sigma might differ than tau in main article and supporting information)

cat("
    model{
        
        ##prior for the mod1 mod 2 selection 
        Y ~ dbern(pY)
        pY ~dunif(0,1)
        
        ##a priori distribution
        ##some are for each density stands with [1] and [2]
        delta_1[1] ~ dnorm(0,0.001)
        delta_1[2] ~ dnorm(0,0.001)
        tau_BAI <- 1/(sd_BAI*sd_BAI) 
        sd_BAI ~ dunif(0,10)


        #for reproductive pool energetic sink, PG is for sexual type 
        PG_bar ~ dnorm(0,0.0001)
        tauPG <- 1/(sigma*sigma)
        sigma ~ dunif(0,10)

        
        tau_proba <- 1/(sigma_proba*sigma_proba)
        sigma_proba ~ dunif(0,50) 
        
        gamma_1[1] ~ dnorm(0,0.001)
        beta_1[1] ~ dnorm(0,0.001)
        gamma_1[2] ~ dnorm(0,0.001)
        beta_1[2] ~ dnorm(0,0.001)
        
        #other intercept option, if assumption no energy = no growth no reproduction 
        delta0 <- 0
        gamma0 <- 0
        beta0 <- -10 #old version : beta0 ~ dnorm(0,0.001)
        
        
        
        #probability to reproduce
        pr_X ~ dunif(0,1)
        #rFC ~ dgamma(0.01,0.01) #check with negative binomial as reviewer suggestions

        #two model, here mod1 and mod 2 (respectively modA andmodB in the main text)
        for (i in 1:3){
            mu.epsmod1[i] <- 0
            mu.epsmod2[i] <- 0
        }
        Id[1,1] <- 1
        Id[2,2] <- 1
        Id[3,3] <- 1
        Id[1,2] <- 0
        Id[1,3] <- 0
        Id[2,1] <- 0
        Id[2,3] <- 0
        Id[3,1] <- 0
        Id[3,2] <- 0

        inv.SIGMAmod1 ~ dwish(Id,4)
        inv.SIGMAmod2 ~ dwish(Id,4)
        
        for (i in 1:(Nind-1)){#Nind){#
            epsmod1[1:3,i] ~ dmnorm(mu.epsmod1, inv.SIGMAmod1)
            epsmod2[1:3,i] ~ dmnorm(mu.epsmod2, inv.SIGMAmod2)
        }
        epsmod1[1,Nind] <- -sum(epsmod1[1,1:(Nind-1)])
        epsmod1[2,Nind] <- -sum(epsmod1[2,1:(Nind-1)])
        epsmod1[3,Nind] <- -sum(epsmod1[3,1:(Nind-1)])
        epsmod2[1,Nind] <- -sum(epsmod2[1,1:(Nind-1)])
        epsmod2[2,Nind] <- -sum(epsmod2[2,1:(Nind-1)])
        epsmod2[3,Nind] <- -sum(epsmod2[3,1:(Nind-1)])
        SIGMAmod1 <- inverse(inv.SIGMAmod1)
        SIGMAmod2 <- inverse(inv.SIGMAmod2)
        
	for(k in 1:3){
            for (k.prime in 1:3){
                rhomod1[k,k.prime] <- SIGMAmod1[k,k.prime]/sqrt(SIGMAmod1[k,k]*SIGMAmod1[k.prime,k.prime])
                rhomod2[k,k.prime] <- SIGMAmod2[k,k.prime]/sqrt(SIGMAmod2[k,k]*SIGMAmod2[k.prime,k.prime])
            }
	}
        

        ##likelihood
        for (i in 1:Nind){

           
            X[i,1] ~ dbern(pr_X)
            IB[i,1] ~ dpois(X[i,1]*meanIB[i,1])
            log(meanIB[i,1]) <- (gamma0 + (gamma_1[dens[i]] + Y*epsmod1[2,i])*NPP[i,1]+(1-Y)*epsmod2[2,i])
            PG_bartot[i,1] ~ dnorm(PG_bar,tauPG)
            logit(PG[i,1]) <- PG_bartot[i,1] #PG is for sexual type

            for (t in 2:Ntemps){
                
                mu_BAI[i,t] <- delta0 + (delta_1[dens[i]] + Y*epsmod1[1,i])*NPP[i,t]+(1-Y)*epsmod2[1,i]
                BAI[i,t] ~ dnorm(mu_BAI[i,t],tau_BAI)
                
                ##male cones -- male flower
                #first sexual type then male cone notation with Multinomial
                PG_bartot[i,t] ~ dnorm(PG_bar,tauPG)
                logit(PG[i,t]) <- PG_bartot[i,t]
                X[i,t] ~ dbern(pr_X)
                IB[i,t] ~ dpois(X[i,t]*meanIB[i,t])
                log(meanIB[i,t]) <- (gamma0 + (gamma_1[dens[i]] + Y*epsmod1[2,i])*NPP[i,t] + (1-Y)*epsmod2[2,i])

                pr_IMC[i,t] <- PG[i,t]*IB[i,t]
                proba[i,t,1] <- pnorm(10,pr_IMC[i,t],tau_proba)
                proba[i,t,2] <- pnorm(20,pr_IMC[i,t],tau_proba)-pnorm(10,pr_IMC[i,t],tau_proba)
                proba[i,t,3] <- pnorm(30,pr_IMC[i,t],tau_proba)-pnorm(20,pr_IMC[i,t],tau_proba)
                proba[i,t,4] <- pnorm(40,pr_IMC[i,t],tau_proba)-pnorm(30,pr_IMC[i,t],tau_proba)
                proba[i,t,5] <- 1-pnorm(40,pr_IMC[i,t],tau_proba)
                M[i,t,1:5] ~ dmulti(proba[i,t,1:5],1)
                
                ###female cones -- female flowers (based on previous year and current year)
                logit(pi[i,t]) <- beta0 + (beta_1[dens[i]] + Y*epsmod1[3,i])*NPP[i,t] + (1-Y)*epsmod2[3,i]
                mFC[i,t] <- pi[i,t]*(1-PG[i,t-1])*IB[i,t-1]
                FC[i,t] ~ dpois(mFC[i,t])
                #FC[i,t] ~ dnegbin(rFC/(rFC+mFC[i,t]),rFC)#check model with neg bin as reviewer suggestions
                
                ##simulating data
                BAI.rep[i,t] ~ dnorm(mu_BAI[i,t],tau_BAI)
                eval.BAI[i,t] <- mu_BAI[i,t]
                E.BAI[i,t] <- pow(BAI[i,t]-eval.BAI[i,t],2)/(eval.BAI[i,t]+0.5)
                Erep.BAI[i,t] <- pow(BAI.rep[i,t]-eval.BAI[i,t],2)/(eval.BAI[i,t]+0.5)
                
                M.rep[i,t,1:5] ~ dmulti(proba[i,t,1:5],1)
                eval.M[i,t,1:5] <- proba[i,t,1:5]
                E.M1[i,t] <- pow(-M[i,t,1]+eval.M[i,t,1],2)     #/(eval.M[i,t,1]+0.5)
                Erep.M1[i,t] <- pow(-M.rep[i,t,1]+eval.M[i,t,1],2)  #/(eval.M[i,t,1]+0.5)
                E.M2[i,t] <- pow(-M[i,t,2]+eval.M[i,t,2],2)     #/(eval.M[i,t,2]+0.5)
                Erep.M2[i,t] <- pow(-M.rep[i,t,2]+eval.M[i,t,2],2)  #/(eval.M[i,t,2]+0.5)
                E.M3[i,t] <- pow(-M[i,t,3]+eval.M[i,t,3],2)     #/(eval.M[i,t,3]+0.5)
                Erep.M3[i,t] <- pow(-M.rep[i,t,3]+eval.M[i,t,3],2)  #/(eval.M[i,t,3]+0.5)
                E.M4[i,t] <- pow(-M[i,t,4]+eval.M[i,t,4],2)     #/(eval.M[i,t,4]+0.5)
                Erep.M4[i,t] <- pow(-M.rep[i,t,4]+eval.M[i,t,4],2)  #/(eval.M[i,t,4]+0.5)
                E.M5[i,t] <- pow(-M[i,t,5]+eval.M[i,t,5],2)     #/(eval.M[i,t,5]+0.5)
                Erep.M5[i,t] <- pow(-M.rep[i,t,5]+eval.M[i,t,5],2)  #/(eval.M[i,t,5]+0.5)
                                
                FC.rep[i,t] ~ dpois(pi[i,t]*(1-PG[i,t-1])*IB[i,t-1])
                eval.FC[i,t] <- mFC[i,t]
                E.FC[i,t] <- pow(FC[i,t]-eval.FC[i,t],2)/(eval.FC[i,t]+0.5)
                Erep.FC[i,t] <- pow(FC.rep[i,t]-eval.FC[i,t],2)/(eval.FC[i,t]+0.5)
                
            }
        }

        ##simulating data: needed for model validation (see model evaluation in SI)
        fit.BAI <- sum(E.BAI[,2:8])
        fitrep.BAI <- sum(Erep.BAI[,2:8])
        fit.M1 <- sum(E.M1[,4:7])/(Nind*4)
        fitrep.M1 <- sum(Erep.M1[,4:7])/(Nind*4)
        fit.M2 <- sum(E.M2[,4:7])/(Nind*4)
        fitrep.M2 <- sum(Erep.M2[,4:7])/(Nind*4)
        fit.M3 <- sum(E.M3[,4:7])/(Nind*4)
        fitrep.M3 <- sum(Erep.M3[,4:7])/(Nind*4)
        fit.M4 <- sum(E.M4[,4:7])/(Nind*4)
        fitrep.M4 <- sum(Erep.M4[,4:7])/(Nind*4)
        fit.M5 <- sum(E.M5[,4:7])/(Nind*4)
        fitrep.M5 <- sum(Erep.M5[,4:7])/(Nind*4)
        fit.FC <- sum(E.FC[,4:7])
        fitrep.FC <- sum(Erep.FC[,4:7])
        
    }
", file="Bayesian_model_studycase.jags")

########################
#define and convert observations
#individual, year and obs
Nind <- length(unique(data$tree_ID))
Ntemps <- length(unique(data$year))
castNPP<-matrix(data$resources_NPP_castanea,ncol=Ntemps,byrow=T)
castNPP <- castNPP/max(castNPP) #range(1/(1+exp(-(0+0*as.vector(castNPP)))))

BAI <- matrix(data$BAI_growth,ncol=Ntemps,byrow=T)
M <- matrix(data$pollen_index,ncol=Ntemps,byrow=T)
FC <- matrix(data$cone_count,ncol=Ntemps,byrow=T)

#for density 
dens <-matrix(data$density_plot,ncol=Ntemps,byrow=T)
dens <- as.numeric(as.factor(dens[,1]))

#convert pollen dataset
Mmatrice <- array(NA,dim=c(nrow(M),ncol(M),5))
Mmatrice[,,1] <- (M==0)+0
Mmatrice[,,2] <- (M==1)+0
Mmatrice[,,3] <- (M==2)+0
Mmatrice[,,4] <- (M==3)+0
Mmatrice[,,5] <- (M==4)+0

X <- ((M+cbind(FC[,-1],rep(NA,Nind)))>0)+0
Xinit <- matrix(NA,ncol=ncol(X),nrow=nrow(X))
Xinit[,3] <- (FC[,4]>0)+0 #to avoid problem non numerical initial values

########################
#model implementation 
#can take an hour and half (with Intel Xeon CPU E3-1240 3.50GHz, memory system 16G)
n.chains<-3
n.iter<-100000
n.burn<-100000
n.thin<-100 #in the article I've presented n.thin at 5, but saved object can be too big to be analyzed

#check time begining
time0=Sys.time()

########################
#run model and select parameters of interest
modele <- jags.model(file="Bayesian_model_studycase.jags", 
                     data=list(Nind=Nind,Ntemps=Ntemps,BAI=BAI,M=Mmatrice,FC=FC,NPP=castNPP,X=X,dens=dens), 
                     inits=list(PG_bartot=matrix(0,ncol=Ntemps,nrow=Nind),X=Xinit), 
                     n.chains=n.chains, quiet=FALSE)

update(modele, n.iter=n.burn)
RES <- coda.samples(model=modele, variable.names=c("pY","Y", "delta_1","sd_BAI","pr_X","PG_bar","PG","sigma","beta0","gamma_1","beta_1","rhomod1","rhomod2","SIGMAmod1","SIGMAmod2","SIGMA","epsmod1","epsmod2", "fit.BAI","fitrep.BAI","fit.M1","fitrep.M1","fit.M2","fitrep.M2","fit.M3","fitrep.M3","fit.M4","fitrep.M4","fit.M5","fitrep.M5","fit.FC","fitrep.FC","sigma_proba","FC.rep","BAI.rep"),n.iter=n.iter,thin=n.thin)

########################
#save results
save(RES,file="results_HBM.rda")

#Time difference of 1.3 hours
print(Sys.time()-time0)



########################
#short check model 
########################
load("results_HBM.rda") #load model parameters outputs and chains 
#select the 3 chains 
C1 <- as.matrix(RES[[1]])
C2 <- as.matrix(RES[[2]])
C3 <- as.matrix(RES[[3]])
plot(C1[,"rhomod2[2,3]"],type="l")
lines(C2[,"rhomod2[2,3]"],col=2)
lines(C3[,"rhomod2[2,3]"],col=3)

plot(RES[,"rhomod2[1,2]"])
plot(RES[,"rhomod2[1,3]"])
plot(RES[,"rhomod2[2,3]"])
plot(RES[,"pr_X"])
plot(RES[,"sigma_proba"])
plot(RES[,"sigma"])
plot(RES[,"PG_bar"])
plot(RES[,"sd_BAI"])
#check differences between stand densities 
#for growth 
plot(RES[,"delta_1[1]"])#density 1 
plot(RES[,"delta_1[2]"])#plot density 2
#for bud initiation
plot(RES[,"gamma_1[1]"])#density 1 
plot(RES[,"gamma_1[2]"])#plot density 2
#for cone maturation
plot(RES[,"beta_1[1]"])#density 1 
plot(RES[,"beta_1[2]"])#plot density 2


#check model 
res <- ggs(RES) #transform to ggmcmc object
mod1iteration <- res %>% filter(Parameter %in% c("rhomod2[1,2]", "rhomod2[1,3]", "rhomod2[2,3]")) %>% 
    ggs_traceplot()
mod1iteration

#GRB plot 

GRBcheck <- res %>% filter(!grepl('rep', Parameter) & 
                               !grepl('eps', Parameter) & 
                               !grepl('fit', Parameter) & 
                               !grepl('prop', Parameter) & 
                               !grepl('mod1', Parameter)& 
                               !grepl('Y', Parameter) &
                               !grepl('gamma0', Parameter) & 
                               !grepl('delta0', Parameter) & 
                               !grepl('beta0', Parameter)) %>% 
    filter(Parameter != "rhomod2[3,3]" & Parameter != "rhomod2[1,1]"& Parameter != "rhomod2[2,2]")%>% 
    ggs_grb() #version_rhat = "BG98"
GRBcheck

Effectivesample <- res %>% filter(!grepl('rep', Parameter) & 
                   !grepl('eps', Parameter) & 
                   !grepl('fit', Parameter) & 
                   !grepl('prop', Parameter) & 
                   !grepl('mod1', Parameter)& 
                   !grepl('Y', Parameter) &
                   !grepl('gamma0', Parameter) & 
                   !grepl('delta0', Parameter) & 
                   !grepl('beta0', Parameter)) %>% 
    filter(Parameter != "rhomod2[3,3]" & Parameter != "rhomod2[1,1]"& Parameter != "rhomod2[2,2]")%>% 
    ggs_effective()
Effectivesample