#***************************************************************************----
# Packages and basic properties of the case study ----
library(ggplot2);library(gridExtra)
library(coda);library(evd);library(mvtnorm);library(trend)
library(RSTooDs) # https://github.com/STooDs-tools/RSTooDs
library(BFunk) # https://github.com/benRenard/BFunk
period=1918:2017 # Analyzed period
SCInames=c('NINO','DMI','SAM')
nCIdef=length(SCInames)

#***************************************************************************----
# Functions for building the model ----

#' Generic function to build a process with a Gaussian hyper-distribution
#' @param pname string, name of the process
#' @param dim dimension object, dimension associated with the process
#' @param init numeric, inital value used for the hyper-mean process
#' @return a process object
getGaussianProcess <- function(pname,dim,init=0){ 
  # define hyperparameter names
  m.name <- paste0(pname,'_m') # hyper-mean
  s.name <- paste0(pname,'_s') # hyper-standard-deviation
  r.name <- paste0(pname,'_r') # hyper-range
  # create parameter objects
  m <- parameter(name=m.name,init=init)
  s <- parameter(name=s.name,init=0.1,priorDist='FlatPrior+')
  r <- parameter(name=r.name,init=100,priorDist='Exponential',priorPar=c(0,1000))
  # Formula for computing the covariance matrix
  fCovar <- paste0('(',s.name,'^2)','*exp(-D',dim$name,'/',r.name,')')
  # Create process object
  lambda <- process(name=pname,dim=dim,init=init,dist='Gaussian',par=list(m,s,r),
                    fMean=m.name,fCovar=fCovar)
  return(lambda)
}

#' Generic function to create the building blocks of a model (parentDist, process, par and formulas)
#' @param what string, name of the variable (Qd, Pd, Tn or Tx)
#' @param dim dimension object, spatial dimension associated with this variable
#' @param nCI integer, number of HCIs/SCIs used in the model 
#' @return a list containing the parent distributions, the list of processes,
#'   the list of parameters and the formulas
getModelComponents <- function(what,dim,nCI=nCIdef){
  # initialize
  parentDist=NULL;plist=NULL;formula=NULL;parlist=NULL
  # try all available options
  if(what %in% c('Qd','Pd')){
    parentDist='Gaussian'
    plist=vector(mode='list',length=2+nCI)
    plist[[1]] <- getGaussianProcess(pname=paste0(what,'_m'),dim=dim) # intercept for mean
    plist[[2]] <- getGaussianProcess(pname=paste0(what,'_s'),dim=dim) # intercept for sdev
    if(nCI>0){
      for(i in 1:nCI){
        plist[[2+i]] <- getGaussianProcess(pname=paste0(what,'_m',i),dim=dim) # effect of ith CI
      }
    }
    formula_s=paste0(what,'_scale=exp(',what,'_s)')
    formula_m=paste0(what,'_loc=',what,'_m')
    if(nCI>0){
      formula_m=paste0(formula_m,'+',
                       paste0(paste0(what,'_m',1:nCI,'*tau',1:nCI),collapse='+'))
    }
    formula=c(formula_m,formula_s)
  }
  if(what %in% c('Tx')){
    parentDist='GPD'
    parlist=vector(mode='list',length=1)
    parlist[[1]] <- parameter(name=paste0(what,'_m'),init=0,priorDist='FIX')
    plist=vector(mode='list',length=2+nCI)
    plist[[1]] <- getGaussianProcess(pname=paste0(what,'_s'),dim=dim) # intercept for scale
    plist[[2]] <- getGaussianProcess(pname=paste0(what,'_x'),dim=dim) # intercept for shape
    if(nCI>0){
      for(i in 1:nCI){
        plist[[2+i]] <- getGaussianProcess(pname=paste0(what,'_s',i),dim=dim) # effect of ith CI
      }
    }
    formula_m=paste0(what,'_loc=',what,'_m')
    formula_x=paste0(what,'_shape=',what,'_x')
    formula_s=paste0(what,'_scale=exp(',what,'_s')
    if(nCI>0){
      formula_s=paste0(formula_s,'+',
                       paste0(paste0(what,'_s',1:nCI,'*tau',1:nCI),collapse='+'),
                       ')')
    }
    formula=c(formula_m,formula_s,formula_x)
  }
  if(what %in% c('Tn')){
    parentDist='Poisson'
    plist=vector(mode='list',length=1+nCI)
    plist[[1]] <- getGaussianProcess(pname=paste0(what,'_m'),dim=dim) # intercept
    if(nCI>0){
      for(i in 1:nCI){
        plist[[1+i]] <- getGaussianProcess(pname=paste0(what,'_m',i),dim=dim) # effect of ith CI
      }
    }
    formula_m=paste0(what,'_loc=exp(',what,'_m')
    if(nCI>0){
      formula_m=paste0(formula_m,'+',
                       paste0(paste0(what,'_m',1:nCI,'*tau',1:nCI),collapse='+'),
                       ')')
    } else {
      formula_m=paste0(formula_m,')')
    }
    formula=c(formula_m)
  }
  return(list(parentDist=parentDist,plist=plist,formula=formula,parlist=parlist))
}

#' Get the name of components being treated as fixed in the stepwise inference
#' @param varnames string vector, variables names
#' @param kCHI integer, the index of the HCI being inferred
#' @return a string vector containing the requested names
getStepwiseComponents <- function(varnames,kHCI){
  out=paste0('tau',kHCI)
  for(j in 1:length(varnames)){
    w=varnames[j]
    letter = ifelse(w %in% c('Tx'),'s','m')
    if(kHCI==1) out=c(out,paste0(w,'_',letter))
    out=c(out,paste0(w,'_',letter,kHCI))
  }
  return(out)
}

#***************************************************************************----
# Functions for applying the model ----

#' Get values simulated from a GPD. Needed because 'rgpd' does not allow 
#' for a vector shape parameter. 
#' https://en.wikipedia.org/wiki/Generalized_Pareto_distribution#Generating_GPD_random_variables
#' @param nsim integer, number of simulated values
#' @param loc,scale,shape numeric vectors of size nsim, parameters
#' @return a vector containing nsim simulated values
myrgpd <- function(nsim,loc,scale,shape){
  u=runif(nsim)
  z=loc+(scale*(u^(-1*shape)-1))/shape
  if(any(shape==0)){
    mask=shape==0
    z[mask]=loc[mask]-scale[mask]*log(u[mask])
  }
  return(z)
}

#' Get values simulated from a distribution
#' @param dist string, distribution name
#' @param var string, variable name
#' @param par matrix or data.frame, nsim*npar
#' @return a vector containing nsim simulated values
getSimulatedValues <- function(dist,var,par){
  nsim=NROW(par)
  out <- switch(dist,
                Gaussian=rnorm(nsim,mean=par[,1],sd=par[,2]),
                GPD=myrgpd(nsim,loc=par[,1],scale=par[,2],shape=-1*par[,3]), # NOTE: shape is multiplied by -1 because STooDs use the 'hydrologist' parameterization where negative shape = heavy right tail. 
                Poisson=rpois(nsim,lambda=par[,1]),
                rep(NA,nsim))
  # apply censoring
  if(var %in% c('Qd','Pd')){
    out[out<0] <- 0
    out[out>1] <- 1
  }
  return(out)
}

#' Get the names of all inferred quantities associated with a variable at a site and a time step
#' @param varname string, variable name
#' @param site integer, site number
#' @param tstep integer, time step. If NULL, tau-related names are not returned 
#' @param nCI integer, number of climate indices
#' @return a list containing two string vectors: the requested names as they appear in the
#'   MCMC samples and a same names as they should be used in the formulas (i.e. site/time step removed)
getEnvNames <- function(varname,site,tstep=NULL,nCI=nCIdef){
  inF=NULL;inMCMC=NULL
  # define names depending on the variable
  # Qd, Pd: location m and scale s
  if(varname %in% c('Qd','Pd')){
    inF=c(paste0(varname,'_m',c('',1:nCI)),paste0(varname,'_s'))
    inMCMC=paste0(inF,'_',site) # site is added
  }
  # Tn: location m
  if(varname %in% c('Tn')){
    inF=c(paste0(varname,'_m',c('',1:nCI)))
    inMCMC=paste0(inF,'_',site) # site is added
  } 
  # Tx: location m, scale s and shape c
  if(varname %in% c('Tx')){
    inF=c(paste0(varname,'_m'),paste0(varname,'_s',c('',1:nCI)),paste0(varname,'_x'))
    inMCMC=c(paste0(varname,'_m'),paste0(inF[-1],'_',site)) # site is added except for threshold Tx_m which is fixed at zero
  } 
  if(!is.null(tstep)){
    # Add taus
    inF=c(inF,paste0('tau',1:nCI))
    inMCMC=c(inMCMC,paste0('tau',1:nCI,'_',tstep))
  }
  return(list(inMCMC=inMCMC,inF=inF))
}

#' Compute the parent parameters of a variable, using the formulas of the model and values for CIs and their effects
#' @param varname string, variable name
#' @param mod model object, the model containing in particular the formulas
#' @param CIs named vector, CI values. Typically: getCIs()[timestep,] 
#' @param effects named vector, CI effects Typically: getCIeffects(varname)[site,] 
#' @return a numeric vector containing the parent parameters
getParentPar <- function(varname,mod,CIs,effects){
  nCI=NCOL(CIs)
  v=(which(mod$varName==varname)) # index of the variable
  isHidden=any(getNames(mod$process) %in% paste0('tau',1:nCI))
  indx=unlist(getFormulaIndices(mod)[v])
  f=applyFormula(mod,cbind(CIs,effects))
  param=as.numeric(f[indx])
  return(param)
}

#' Alternative computation of the parent parameters of a variable, 
#' allowing the use of MCMC ensembles rather than point-estimates for CIs and their effects
#' @param mcmc dataframe, mcmc samples
#' @param varname string, variable name
#' @param site integer, site index
#' @param tstep integer, time step
#' @param mod model object, the model containing in particular the formulas
#' @return a dataframe containing the parent parameters
getParentPar2 <- function(mcmc,varname,site,tstep,mod,nCI=nCIdef){
  lookup=getEnvNames(varname,site,tstep,nCI)
  env=mcmc[,names(mcmc) %in% lookup$inMCMC]
  # reorder and rename columns to use the environment in formulas
  env=env[,lookup$inMCMC] 
  names(env) <- lookup$inF
  out=getParentParFromEnv(env,varname,mod)
  return(out)
}

#' Engine to get parent parameters from an environment 
#' @param env dataframe, containing MCMC samples from all variables needed to apply formulas.
#'   It has been built outside of this function and should be named correctly - no check here.
#' @param varname string, variable name
#' @param mod model object, the model containing in particular the formulas
#' @return a dataframe containing the parent parameters
getParentParFromEnv <- function(env,varname,mod){
  v=(which(mod$varName==varname)) # index of the variable
  indx=unlist(getFormulaIndices(mod)[v])
  out <- data.frame(matrix(NA,nrow=NROW(env),ncol=length(indx)))
  for(i in 1:length(indx)){
    out[,i]=tryCatch(eval(parse(text=mod$formula[indx[i]]),envir=env),
                     error=function(e) NA)
    names(out)[i] <- strsplit(x=mod$formula[i],split='=')[[1]][1]
  }
  return(out)
}

#' Generate univariate predictions for a variable
#' @param varname string, variable name
#' @param mod STooDs model object
#' @param nrep integer, number of replicaates 
#' @param CIs data frame, CI values. If NULL, will be retrieved from getCI()
#' @param effects data frame, CI effects If NULL, will be retrieved from getCIeffects(varname)
#' @return a list containing a data frame and a matrix. The matrix 'uni' contains all 
#'   generated values, with each row corresponding to a combination site*tstep, and 
#'   each column is a replicate. The data frame uniID identifies each row.
generateUniPred <- function(varname,mod,nrep=1000,CIs=NULL,effects=NULL){
  v=(which(mod$varName==varname)) # index of the variable
  isHidden=any(getNames(mod$process) %in% paste0('tau',1:3))
  if(is.null(CIs)){
    taus=getCIs(isHidden)
  } else {
    taus=CIs
  }
  if(is.null(effects)){
    lambdas=getCIeffects(varname,isHidden)
  } else {
    lambdas=effects
  }
  nsite=NROW(lambdas)
  nt=NROW(taus)
  nr=nsite*nt
  uni=matrix(NA,nr,nrep)
  uniID=data.frame(indx=integer(nr),site=integer(nr),tstep=integer(nr),stringsAsFactors=F)
  # Loop
  k=0
  message(paste0('*********** VAR: ',varname,' ***********'))
  for(site in 1:nsite){
    message(paste0('site ',site,' / ',nsite))
    for(tstep in 1:nt){
      k=k+1
      uniID[k,]=data.frame(indx=k,site=site,tstep=tstep)
      param=getParentPar(varname,mod,taus[tstep,],lambdas[site,])
      parrep=t(matrix(param,ncol=nrep,nrow=length(param)))
      uni[k,]=getSimulatedValues(mod$parentDist[v],varname,parrep)
    }
  }
  return(list(uni=uni,uniID=uniID))
}

#' Alternative method to generate univariate predictions for a variable, 
#' allowing the use of MCMC samples rather than point-estimates.
#' @param varname string, variable name
#' @param mod STooDs model object
#' @param mcmc dataframe, mcmc samples 
#' @return a list containing a data frame and a matrix. The matrix 'uni' contains all 
#'   generated values, with each row corresponding to a combination site*tstep, and 
#'   each column is a replicate. The data frame uniID identifies each row.
generateUniPred2 <- function(varname,mod,mcmc){
  v=(which(mod$varName==varname)) # index of the variable
  isHidden=any(substr(getNames(mod$process),1,3)=='tau')
  CI=getCIs(isHidden=F)
  nt=NROW(CI);nCI=NCOL(CI)
  if(isHidden){ # can use mcmc as is
    M=mcmc
  } else { # need to add columns for SCIs
    nucols=c()
    for(i in 1:nCI){
      foo=CI[,i];names(foo)=paste0('tau',i,'_',1:NROW(foo))
      nucols=c(nucols,foo)
    }
    z=data.frame(t(matrix(nucols,nrow=length(nucols),ncol=NROW(mcmc))))
    names(z) <- names(nucols)
    M=cbind(mcmc,z)
  }
  nrep=NROW(M)
  nsite=getNsite(mod)[v]
  nr=nsite*nt
  uni=matrix(NA,nr,nrep)
  uniID=data.frame(indx=integer(nr),site=integer(nr),tstep=integer(nr),stringsAsFactors=F)
  # Loop
  k=0
  message(paste0('*********** VAR: ',varname,' ***********'))
  for(site in 1:nsite){
    message(paste0('site ',site,' / ',nsite))
    for(tstep in 1:nt){
      k=k+1
      uniID[k,]=data.frame(indx=k,site=site,tstep=tstep)
      param=getParentPar2(M,varname,site,tstep,mod,nCI)
      uni[k,]=getSimulatedValues(mod$parentDist[v],varname,param)
    }
  }
  return(list(uni=uni,uniID=uniID))
}

#***************************************************************************----
# Functions for managing results and observations ----

#' Get the number of sites associated with each variable
#' @param mod STooDs model object
#' @return a named vector with numbers of site for each variable
getNsite <- function(mod){
  vnames=mod$varName
  nv=length(vnames)
  nsite=vector('numeric',nv)
  names(nsite) <- mod$varName
  for(v in 1:nv){
    s=get(paste0('space',substr(vnames[v],1,1)))
    nsite[v]=NROW(s)
  }
  return(nsite)
}

#' Get the time series of obs for a variable at a site
#' @param varname string, variable name
#' @param site integer, site number
#' @param mod STooDs model object
#' @return a x-y dataframe containing the requested time series
getObs <- function(varname,site,mod){
  vn=paste0('ispace',substr(varname,1,1))
  ix=which(mod$dataset$var==varname & mod$dataset$iDim[vn]==site)
  x=mod$dataset$iDim$itime[ix]
  if(is.null(x)) { 
    # SCI model does not have 'itime' stored in data Y, but has it in covariate X
    x=mod$dataset$X$jtime[ix]
  }
  obs=data.frame(x=x,y=mod$dataset$Y$value[ix])
  return(obs)
}

#' Analyze MCMC samples
#' @param folder string, folder containing the MCMC chains
#' @param burnFactor num in [0,1), burn factor
#' @param slimFactor integer, slimming factor  
#' @param doTrace logical, produce trace file? (a bit long)
#' @param readOnly logical, if true will only read MCMC files and exit
#' @return a list containing the MCMC samples (merged in an unique data frame) and the Gelman-Rubin diagnostics
analyseMCMC <- function(folder,burnFactor=0.5,slimFactor=5,doTrace=FALSE,readOnly=FALSE){
  # load model
  load(file.path(folder,'model.RData'))
  # Get subfolders for chains
  f=list.files(folder,pattern='chain')
  # read each chain
  sam=c() # used to store all samples from all chains
  chains=vector(mode='list',length=length(f))
  nsim=rep(0,length(f)) # length of each chain
  for(i in 1:length(f)){
    chains[[i]]=readMCMC(file=file.path(folder,f[i],'MCMC.txt'),model=mod,burnFactor=burnFactor,slimFactor=slimFactor)
    nsim[i]=NROW(chains[[i]])
    sam=rbind(sam,chains[[i]])
  }
  if(readOnly){return(sam)}
  # Make each chain identical length (should already be the case), remove constant columns and transform into coda object 
  coco=vector(mode='list',length=length(f))
  for(i in 1:length(f)){
    keep=apply(chains[[i]],2,sd)>0
    coco[[i]]=coda::mcmc(chains[[i]][1:min(nsim),keep])
  }
  # Get Gelman diagnostics
  foo=gelman.diag(mcmc.list(coco),autoburnin=FALSE,multivariate=FALSE)
  GR=foo$psrf[,1]
  # save posterior median, quantiles and max
  estimate <- apply(sam,2,median)
  save(estimate,file=file.path(folder,'postmed.RData'))
  estimate <- apply(sam,2,quantile,0.05)
  save(estimate,file=file.path(folder,'postq5.RData'))
  estimate <- apply(sam,2,quantile,0.95)
  save(estimate,file=file.path(folder,'postq95.RData'))
  estimate <- sam[which.max(sam$post),]
  save(estimate,file=file.path(folder,'postmax.RData'))
  # do report
  if(doTrace){traceFile=file.path(folder,"MCMCtrace.pdf")} else {traceFile=NULL}
  MCMCreport(mcmc=sam,model=mod,addWorld=TRUE,
             traceFile = traceFile,
             parFile = file.path(folder,"MCMCpar.pdf"),
             processFile1 = file.path(folder,"MCMCprocess1.pdf"),
             processFile2=file.path(folder,"MCMCprocess2.pdf"))
  return(list(mcmc=sam,GR=GR))
}

#' Aggregate estimates from the stepwise procedure used for HCI models
#' @param estimType string, postmed or postmax?
#' @return a named vector with all needed estimates
aggregateHCIestimates <- function(estimType='postmed'){
  # Aggregate estimates from each step of the stepwise procedure
  agg=c();estim=c()
  for(i in seq(length(HCIdir),1,-1)){ # start from the last HCI
    load(file.path(HCIdir[i],paste0(estimType,'.RData')))
    # Do not add components of estimate that are already there
    indx=which(!(names(estimate) %in% agg))
    est=estimate[indx]
    estim=c(estim,est)
    agg=c(agg,names(est))
  }
  return(estim)
}

#' Get climate indices, hidden or not
#' @param isHidden logical, if TRUE HCIs will be returned, otherwise SCIs
#' @return a dataframe containing the CIs
getCIs <- function(isHidden=TRUE){
  if(isHidden){
    CI=matrix(1,nrow=length(period),ncol=length(HCIdir))
    for(i in 1:NCOL(CI)){
      load(file.path(HCIdir[i],'postmed.RData'))
      mask=names(estimate) %in% paste0('tau',i,'_',1:length(period))
      CI[,i]=estimate[mask]
    }
  } else {
    CI=matrix(1,nrow=length(period),ncol=length(SCInames))
    load(file.path(SCIdir,'model.RData'))
    for(i in 1:length(period)){
      CI[i,]=as.numeric(mod$dataset$X[which(mod$dataset$X$jtime==i)[1],1:length(SCInames)])
    }
  }
  out=as.data.frame(CI)
  names(out) <- paste0('tau',1:NCOL(out))
  return(out)
}

#' Get the effects of climate indices (hidden or not)
#' @param varname string, variable name
#' @param isHidden logical, if TRUE HCI effects will be returned, otherwise SCI effects
#' @return a dataframe containing the CI effects
getCIeffects <- function(varname,isHidden=TRUE){
  # get or create estimate vector
  if(isHidden){
    # Aggregate estimates from each step of the stepwise procedure
    estimate=aggregateHCIestimates('postmed')
  } else{
    load(file.path(SCIdir,'postmed.RData'))
  }
  # Get formula parameters at all sites
  allnames=names(estimate)
  DF=data.frame()
  nsite=NROW(get(paste0('space',substr(varname,1,1))))
  for(site in 1:nsite){
    localnames=getEnvNames(varname,site)$inMCMC
    indx=match(localnames, allnames)
    DF=rbind(DF,estimate[indx])
  }
  names(DF) <- getEnvNames(varname,site=1)$inF
  return(DF)
}

#' Compute the "variance explained" from univariate predictions.
#' @param uni numeric matrix, resulting from a call to generateUniPred()
#' @param uniID dataframe, resulting from a call to generateUniPred()
#' @param varOrder string vector, ordering of the variables
#' @param title string, plot title
#' @return a list containg: (i) a dataframe with columns: variable, site, 
#'   and variance explained; (ii) a ggplot object.
varExplained <- function(uni,uniID,varOrder=c('ALL',levels(uniID$var)),title=''){
  varnames=as.character(unique(uniID$var))
  nsite=integer(length(varnames))
  for(i in 1:length(varnames)){
    nsite[i]=max(uniID$site[uniID$var==varnames[i]])
  }
  vx=data.frame(var=character(sum(nsite)),site=integer(sum(nsite)),
                explained=numeric(sum(nsite)),stringsAsFactors=F)
  k=0
  for(i in 1:length(varnames)){
    for(j in 1:nsite[i]){
      indx=which(uniID$var==varnames[i] & uniID$site==j)
      y=uni[indx,]
      mv=mean(apply(y,1,var));vm=var(apply(y,1,mean))
      explained=vm/(vm+mv)
      k=k+1
      vx[k,]=data.frame(var=varnames[i],site=j,explained=explained,stringsAsFactors=F)
    }
  }
  foo=vx;foo$var='ALL'
  vx=rbind(foo,vx)
  vx$var=factor(vx$var,levels=varOrder)
  # plot
  g=ggplot(data=vx,aes(x=var,y=explained,fill=var))
  g=g+geom_violin(color='black',scale='width')+scale_fill_brewer('Variable',type='qual')
  g=g+labs(title=title,x='Variable',y='Sharpness ratio')
  g=g+ylim(0,1)+theme_bw()+theme(aspect.ratio=1)
  return(list(df=vx,g=g))
}

#' Compute the PIT values from observations and univariate predictions.
#' @param obs dataframe, with columns named value, var, site, tstep
#' @param uni numeric matrix, resulting from a call to generateUniPred()
#' @param uniID dataframe, resulting from a call to generateUniPred()
#' @param varOrder string vector, ordering of the variables
#' @param title string, plot title
#' @param g0 ggplot object, to which the plot is added to
#' @return a list containg: (i) a dataframe with columns: var, site,
#'   tstep and PIT; (ii) a ggplot object.
PITvalues <- function(obs,uni,uniID,varOrder=c('ALL',levels(uniID$var)),title='',g0=ggplot()){
  varnames=as.character(unique(obs$var))
  nsite=integer(length(varnames))
  for(i in 1:length(varnames)){
    nsite[i]=max(obs$site[obs$var==varnames[i]])
  }
  n=NROW(obs)
  # init
  pit=data.frame(var=character(n),site=integer(n),
                 tstep=integer(n),PIT=numeric(n),
                 stringsAsFactors=F)
  compt=0 # counter
  for(i in 1:length(varnames)){
    for(j in 1:nsite[i]){
      # get obs
      indx=which(obs$var==varnames[i] & obs$site==j)
      o=obs[indx,]
      if(NROW(o)>0) {
        # get corresponding rows in uni
        mask = (uniID$var==varnames[i] & uniID$site==j)
        # loop on each value
        for(k in 1:NROW(o)){
          indx=which(mask & uniID$tstep==o$tstep[k])
          foo=c(o$value[k],uni[indx,])
          pval=(rank(foo,ties.method='random')[1]-0.5)/(length(foo))
          compt=compt+1
          pit[compt,]=data.frame(var=varnames[i],site=j,tstep=o$tstep[k],
                                 PIT=pval,stringsAsFactors=F)
        }
      }
    }
  }
  # remove NA's
  pit=pit[!is.na(pit$var),]
  out=cbind(pit,freq=(rank(pit$PIT)-0.5)/NROW(pit))
  out$var='ALL'
  for(i in 1:length(varnames)){
    mask = pit$var==varnames[i]
    out=rbind(out,cbind(pit[mask,],freq=(rank(pit$PIT[mask])-0.5)/sum(mask)))
  }
  # plot
  out$var=factor(out$var,levels=varOrder) # force variable order
  g=g0+geom_line(data=out,aes(x=freq,y=PIT,colour=var))
  g=g+scale_color_brewer('Variable',type='qual')
  g=g+labs(title=title,x='Frequency',y='PIT value')
  g=g+theme_bw()+coord_fixed()
  return(list(df=out,g=g))
}

#' Compare observed vs. predicted values (value + interval for the latter)
#' @param obs dataframe, with columns named value, var, site, tstep
#' @param uni numeric matrix, resulting from a call to generateUniPred()
#' @param uniID dataframe, resulting from a call to generateUniPred()
#' @param varname string, name of the studied variable
#' @param probs numeric vector of size 3, probabilities to compute (lower,median,upper) predictions.
#' @param g0 ggplot object, to which the plot is added to
#' @return a list containg: (i) a dataframe column-binding obs and (lower,median,upper) predictions.
#' (ii) a ggplot object.
ObsVsPred <- function(obs,uni,uniID,varname,probs=c(0.05,0.5,0.95),g0=ggplot()){
  n=NROW(obs)
  intervals=data.frame(lower=numeric(n),median=numeric(n),upper=numeric(n))
  for(i in 1:NROW(obs)){
    indx=which(uniID$var==as.character(obs$var[i]) & uniID$site==obs$site[i] & uniID$tstep==obs$tstep[i])
    intervals[i,]=quantile(uni[indx,],probs=probs)
  }
  DF=cbind(obs,intervals)
  DF=DF[!is.na(DF$median),]
  g=g0+geom_linerange(data=DF,aes(x=value,ymin=lower,ymax=upper,color=year),alpha=0.5)
  g=g+geom_point(data=DF,aes(x=value,y=median,color=year))
  g=g+scale_color_distiller(palette='Spectral')
  g=g+geom_abline(slope=1)
  g=g+labs(x=paste('Observed',varname),y=paste('Predicted',varname))
  g=g+theme_bw()+coord_fixed()
  return(list(df=DF,g=g))
}

#' Compute DIC criterion
#' @param mcmc dataframe, MCMC samples
#' @param v2 logical, if TRUE returns the version by Pooley and Marion
#' @return the DIC.
DIC <- function(mcmc,v2=TRUE){
  D=-2*mcmc$lkh
  if(v2==TRUE){
    out=mean(D)+0.5*var(D)
  } else {
    ix=which.max(mcmc$post)
    out=2*mean(D)-D[ix]
  }
  return(out)
}

#***************************************************************************----
# Plotting Functions  ----

#' plot data availability in time
#' @param dat data frame, data
#' @param varnames string vector, names of variables used to count the number of available stations
#' @return a ggplot object
plotAvailability <- function(dat,varnames=c('Tn','Pd','Qd')){
  avail=c()
  for(i in 1:length(varnames)){
    for(j in min(dat$year):max(dat$year)){
      mask= dat$var==varnames[i] & dat$year==j
      nsite=length(unique(dat$site[mask]))
      avail=rbind(avail,data.frame(year=j,n=nsite,var=substr(varnames[i],1,1)))
    }
  }
  g=ggplot(avail, aes(x=year, y=n, fill=var)) + 
    geom_area(colour=NA)+scale_fill_brewer("Variable",palette="Set1")+
    labs(x='Year',y='Available stations')+theme_bw()
  return(g)
}

#' Map measurement networks.
#' @param networks list, list of dataframes describing the measurement networks.
#'   Each data frame should contain at least lon and lat columns.
#' @param colors vector of colors, vector of colors associated with each network
#' @param fill color, color used to fill Australia map
#' @param alpha,size numeric, properties of the points representing stations
#' @return a ggplot object
mapNetwork <- function(networks,colors,fill="gray80",alpha=0.6,size=1){
  gs=vector(mode='list',length=length(networks))
  # Australia map
  world = map_data("world")
  bb=BFunk::getBBox("Australia")
  g=ggplot()
  g=g+geom_polygon(data=world[world$region=='Australia',],aes(long,lat,group=group),
                   fill=fill,color=fill,size=0.1)
  g=g+coord_fixed(ratio=1,xlim=bb[1:2],ylim=bb[3:4],expand=FALSE)
  g=g+theme_void()
  g=g+theme(panel.border=element_rect(fill=NA))
  # Add measurement networks
  for(i in 1:length(networks)){
    gs[[i]]=g+geom_point(data=networks[[i]],aes(x=lon,y=lat),color=colors[i],alpha=alpha,size=size)
  }
  return(gs)
}

#' Plot (and save) HCIs with uncertainty intervals.
#' @param folder string, folder where HCI runs can be found 
#' @param HCInames string vector, name given to each HCI in the plot 
#' @return a ggplot object
plotHCIs <- function(folder,HCInames=paste(c('First','Second','Third'),'HCI')){
  # list "HCI" folders
  f=list.dirs(folder,recursive=FALSE);f=f[grepl('HCI', f)]
  DF=data.frame()
  for(i in 1:length(f)){
    load(file.path(f[i],'postmed.RData'))
    mask=grepl(paste0('tau',i),names(estimate),fixed=T) & # Columns starting with 'tau'...
         !(names(estimate)%in%paste0('tau',i,c('_m','_sd'))) # ... but not those of hyperparameters.
    med=estimate[mask]
    load(file.path(f[i],'postq5.RData'))
    lower=estimate[mask]
    load(file.path(f[i],'postq95.RData'))
    upper=estimate[mask]
    DF=rbind(DF,data.frame(year=period,med=med,lower=lower,upper=upper,var=HCInames[i]))
  }
  # save
  save(DF,file=file.path(folder,'HCIs.RData'))
  # plot
  g=ggplot(DF,aes(x=year))
  g=g+geom_ribbon(aes(ymin=lower,ymax=upper),fill='red',alpha=0.7)
  g=g+geom_line(aes(y=med))
  g=g+facet_wrap(vars(var),ncol=1)
  g=g+theme_bw()+labs(y='value')
  return(g)
}

#' Plot HCI effects.
#' @param version integer, which version of the plot?
#' @param limits numeric vector, upper limit for each effect
#' @return a ggplot object
plotHCIsEffects <- function(version=1,limits=NULL){
  oneMap <- function(DF,legend=FALSE,limit=NULL){
    if(is.null(limit)){lim=max(abs(DF$effect))} else {lim=limit}
    g=getWorldMap(region='Australia',fill = "gray20")
    g=g+geom_point(data=DF,aes(x=lon,y=lat,color=effect),alpha=0.8,size=2)
    g=g+scale_color_gradient2(low='#2b83ba',mid='#ffffbf',high='#d7191c',limits=c(-lim,lim),
                              breaks=c(-lim,0,lim),labels = c('-','0','+'),)
    g=g+coord_cartesian(xlim=c(134,154),ylim=c(-43.7,-29.5),expand=FALSE)
    if(!legend) {g=g+theme(legend.position='none')}
    g=g+theme(panel.border=element_rect(fill=NA),
              panel.background=element_rect(fill='white'))
    return(g)
  }
  get_legend<-function(a.gplot){
    tmp <- ggplot_gtable(ggplot_build(a.gplot))
    leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
    legend <- tmp$grobs[[leg]]
    legend
  }
  allPlots=list()
  for(v in 1:length(varnames)){
    g=list()
    k=0
    allEffects=data.frame()
    # Get space data frame for this variable (spaceP, spaceQ or spaceT)
    # which should already exist in the environment
    w=varnames[v];letter=substr(w,1,1)
    coord=get(paste0('space',letter))
    if(is.null(limits)){lim=NULL} else {lim=limits[v]}
    # HCIs
    for(i in 1:length(HCIdir)){
      # get HCI effects 
      load(file.path(HCIdir[i],'postmed.RData'))
      string=paste0(w,'_',ifelse(w %in% c('Tx'),'s','m'),i,'_')
      mask=grepl(string,names(estimate),fixed=T)
      val=estimate[mask]
      DF=cbind(coord,effect=val[1:NROW(coord)])
      # do map
      k=k+1;g[[k]]=oneMap(DF,limit=lim)
      allEffects=rbind(allEffects,data.frame(effect=DF$effect,CI=paste0('HCI',i)))
      if(i==1 & v==1) {
        foo=oneMap(DF,legend=TRUE)
        foo=foo+theme(legend.direction='horizontal',
                      legend.title = element_blank())
        leg=get_legend(foo)
      }
    }
    # SCIs
    load(file.path(SCIdir,'postmed.RData'))
    for(i in 1:length(SCInames)){
      string=paste0(w,'_',ifelse(w %in% c('Tx'),'s','m'),i,'_')
      mask=grepl(string,names(estimate),fixed=T)
      val=estimate[mask]
      DF=cbind(coord,effect=val[1:NROW(coord)])
      k=k+1;g[[k]]=oneMap(DF,limit=lim)
      allEffects=rbind(allEffects,data.frame(effect=DF$effect,CI=SCInames[i]))
    }
    maps=grid.arrange(grobs=g,widths=rep(1,6),heights=1)
    # Boxplots
    if(version==1) allEffects$effect=abs(allEffects$effect)
    maxi=max(allEffects$effect)
    if(version==1) {mini=0} else {mini=min(allEffects$effect)}
    if(version==1) {exp=FALSE} else {exp=TRUE}
    if(version==1) {yl=paste0('Absolute effects on ',varnames[v])} else {yl=paste0('Effect on ',varnames[v])}
    if(version==1) {xl=c(0.5,6.5)} else {xl=c(1,6)}
    if(version==1) {bottom=-1*maxi} else {bottom=mini-0.5*(maxi-mini)}
    bx=ggplot(data=allEffects,aes(x=CI,y=effect))
    if(version==1) {
      bx=bx+geom_boxplot(color='black',fill='black',outlier.shape=NA)
    } else {
      bx=bx+geom_violin(color='black',fill='black',scale='width')
    }
    if(version==2){bx=bx+geom_hline(yintercept=0,col='red')}
    bx=bx+labs(y=yl)
    bx=bx+coord_cartesian(xlim=xl,ylim=c(bottom,maxi),expand=exp)
    bx=bx+theme_bw()+theme(legend.position="none",panel.grid=element_blank(),
                           axis.title.x=element_blank())
    if(version==1) {
      bx=bx+geom_polygon(data=data.frame(x=c(0.5,0.5,6.5,6.5),y=c(maxi,0,0,maxi)),
                         aes(x=x,y=y),fill=NA,colour='black')
      bx=bx+scale_y_continuous(breaks=seq(0,maxi,length.out=4),
                               labels=function(x) sprintf("%.2f", x))
      bx=bx+theme(axis.title.y=element_text(hjust=1),
                  panel.background=element_blank(),
                  panel.border=element_blank())
    } else {
      bx=bx+scale_y_continuous(labels=function(x) sprintf("%.2f", x))
      bx=bx+theme(panel.background=element_rect(fill='Gray90'))
    }
    # 
    # Add legend 
    if(v==1){
      bx=bx+annotation_custom(grid.arrange(leg),xmin=4.5,xmax=6.5,ymin=0.4*maxi,ymax=maxi)
    }
    # Add maps
    allPlots[[v]]=bx+annotation_custom(maps,xmin=0.5,xmax=6.5,ymin=bottom,ymax=mini)
  }
  return(allPlots)
}

#' Plot lambda0's, e.g. intercepts, constant stdev etc.
#' @return a list of ggplot objects
plotLambda0 <- function(){
  oneMap <- function(DF,title){
    g=getWorldMap(region='Australia',fill = "gray20")
    g=g+geom_point(data=DF,aes(x=lon,y=lat,color=value),alpha=0.8,size=2)
    g=g+scale_color_distiller('',palette='Spectral')
    g=g+coord_cartesian(xlim=c(134,154),ylim=c(-43.7,-29.5),expand=FALSE)
    g=g+labs(title=title)
    g=g+theme(panel.border=element_rect(fill=NA),
              panel.background=element_rect(fill='white'),
              legend.position=c(0.25,0.1),legend.direction='horizontal')
    return(g)
  }
  gs=vector('list',7)
  k=0
  foo=getCIeffects('Pd')
  k=k+1;gs[[k]]=oneMap(cbind(spaceP[c('lon','lat')],value=foo$Pd_m),title='Pd: intercept for the mean')
  k=k+1;gs[[k]]=oneMap(cbind(spaceP[c('lon','lat')],value=exp(foo$Pd_s)),title='Pd: standard deviation')
  foo=getCIeffects('Qd')
  k=k+1;gs[[k]]=oneMap(cbind(spaceQ[c('lon','lat')],value=foo$Qd_m),title='Qd: intercept for the mean')
  k=k+1;gs[[k]]=oneMap(cbind(spaceQ[c('lon','lat')],value=exp(foo$Qd_s)),title='Qd: standard deviation')
  foo=getCIeffects('Tx')
  k=k+1;gs[[k]]=oneMap(cbind(spaceT[c('lon','lat')],value=exp(foo$Tx_s)),title='Tx: intercept for the scale')
  k=k+1;gs[[k]]=oneMap(cbind(spaceT[c('lon','lat')],value=foo$Tx_x),title='Tx: shape')
  foo=getCIeffects('Tn')
  k=k+1;gs[[k]]=oneMap(cbind(spaceT[c('lon','lat')],value=exp(foo$Tn_m)),title='Tn: intercept for the rate')
  return(gs)
}

#' Plot univariate and bivariate plots for a few sites/variables.
#' @param fewSites dataframe, with colomns var (variable name), site (site index), 
#'    mini (min. value in plots) and maxi (max value in plots)
#' @param mod STooDs model object
#' @param uni numeric matrix, resulting from a call to generateUniPred()
#' @param uniID dataframe, resulting from a call to generateUniPred()
#' @param fill color
#' @return a list with univariate and bivariate ggplot objects
plotFewSites <- function(fewSites,mod,uni,uniID,fill='red'){
  nfew=NROW(fewSites)
  # get bins & atoms for each element in fewSites
  bins=vector(length=length(nfew),mode='list')
  atoms=vector(length=length(nfew),mode='list')
  hasCont=vector(length=length(nfew),mode='list')
  for(i in 1:nfew){
    foo=getBinsAndAtoms(fewSites$var[i])
    bins[[i]]=foo$bins
    atoms[[i]]=foo$atoms
    hasCont[[i]]=foo$hasCont
  }
  # initialize
  gs <- vector(mode='list',length=0.5*nfew*(nfew-1))
  hs <- vector(mode='list',length=nfew)
  xy=matrix(NA,ncol=2,nrow=NCOL(uni)*max(uniID$tstep))
  kg=0;kh=0
  formatter <- function(x) formatC(x,width=5,digits=2,format='f')
  # loop through all bivariate combinations
  for(i in 1:nfew){
    indx=which(uniID$var==fewSites$var[i] & uniID$site==fewSites$site[i])
    ts=uni[indx,]
    xy[,1]=matrix(uni[indx,],ncol=1)
    obs1=getObs(fewSites$var[i],fewSites$site[i],mod)
    for(j in i:nfew){
      if(i==j){
        margin=as.numeric(matrix(uni[indx,]))
        obs=obs1;obs$x=period[obs1$x]
        yl=c(fewSites$mini[i],fewSites$maxi[i])
        yl=yl+diff(yl)*0.075*c(-1,1)
        if(any(is.na(atoms[[i]]))){
          cont=margin
          disc=0
        } else {
          cont=margin[! margin %in% atoms[[i]]]
          if(hasCont[[i]]==F) cont=NULL # discrete TN
          disc=NA*atoms[[i]]
          for(m in 1:length(atoms[[i]])){disc[m]=sum(margin==atoms[[i]][m])/length(margin)}
        }
        g=ggplot()
        w=NULL
        if(!is.null(cont)){
          w=hist(cont,plot=F,breaks=30)
          DF=data.frame(x=w$mids,y=w$density*(1-sum(disc)))
          g=g+geom_area(data=DF,aes(x=x,y=y),fill=fill,alpha=0.5)
        }
        if(!any(is.na(atoms[[i]]))){
          if(is.null(w)){
            width=1
          } else {
            width=diff(w$breaks[1:2])
          }
          for(m in 1:length(atoms[[i]])){
            DF=data.frame(x=atoms[[i]][m]+width*c(-0.5,0.5,0.5,-0.5),
                          y=c(rep(disc[m]/width,2),0,0))
            g=g+geom_polygon(data=DF,aes(x=x,y=y),fill=fill,col='black')
          }
        }
        g=g+coord_flip()+theme_bw()
        g=g+theme(axis.title=element_blank(),axis.ticks=element_blank(),
                  axis.text=element_blank(),panel.grid=element_blank())
        g=g+scale_y_continuous(expand=c(0,0))
        g=g+scale_x_continuous(limits=yl,expand=c(0,0))
        g2=g
        # time series
        g=ribbonsPlot(y=uni[indx,],x=period,fill=fill)
        g=g+geom_point(data=obs,aes(x,y))
        g=g+labs(y=paste(fewSites$var[i],'at site',fewSites$site[i]))
        g=g+scale_x_continuous(limits=range(period),expand=c(0,0))
        g=g+scale_y_continuous(limits=yl,expand=c(0,0),labels=formatter)
        g=g+theme(axis.title.x=element_blank(),axis.ticks.x=element_blank(),
                  axis.text.x=element_blank(),
                  axis.title.y=element_text(size=14),
                  axis.text.y=element_text(size=12))
        g1=g
        kh=kh+1
        hs[[kh]]=gridExtra::grid.arrange(g1,g2,
                                         layout_matrix=matrix(c(rep(1,4),2),nrow=1))
      } else {
        kg=kg+1
        indx=which(uniID$var==fewSites$var[j] & uniID$site==fewSites$site[j])
        xy[,2]=matrix(uni[indx,],ncol=1)
        obs2=getObs(fewSites$var[j],fewSites$site[j],mod)
        # dataframe for plotting
        DF=count2D(xy[,1],xy[,2],xbin=bins[[i]],ybin=bins[[j]])
        DF$count[DF$count==0]=NA
        obs=merge(obs1,obs2,by='x');names(obs)<-c('t','o1','o2')
        # x/y lims
        xl=c(fewSites$mini[i],fewSites$maxi[i])
        yl=c(fewSites$mini[j],fewSites$maxi[j])
        xl=xl+diff(xl)*0.05*c(-1,1)
        yl=yl+diff(yl)*0.05*c(-1,1)
        # Plot
        g=ggplot(DF,aes(x=x,y=y))
        g=g+ geom_raster(aes(fill=count),interpolate=F)
        g=g+scale_fill_gradient(low=lighten(fill),high=fill,na.value='white',trans='identity')
        g=g+geom_point(data=obs,aes(x=o1,y=o2),alpha=0.7)
        g=g+coord_cartesian(xlim=xl,ylim=yl)
        g=g+labs(x=paste(fewSites$var[i],'at site',fewSites$site[i]),
                 y=paste(fewSites$var[j],'at site',fewSites$site[j]))
        g=g+theme_bw()
        g=g+theme(legend.position='none',axis.title=element_text(size=14),
                  axis.text=element_text(size=12))
        gs[[kg]]=g
      }
    }
  }
  return(list(univariate=hs,bivariate=gs))
}

#' Compare conditional or joint frequencies for observations, HCI/SCI univariate predictions.
#' @param mod STooDs model object (either HCI or SCI, both will work).
#' @param uniHCI numeric matrix, resulting from a call to generateUniPred() for the HCI model.
#' @param uniSCI numeric matrix, resulting from a call to generateUniPred() for the SCI model.
#' @param uniID dataframe, resulting from a call to generateUniPred() (for either HCI or SCI, both will work).
#' @param varnames string vector, names of the 2 variables.
#' @param sites integer vector, indices of the 2 sites.
#' @param count function, a function with interface function(z,s1,s2) used to compute either 
#'   joint or conditional frequencies using values in s1 and s2. For instance, 
#'   joint exceedances of value z lead to: function(z,s1,s2){mean(s1>z & s2>z)}.
#' @param x numeric vector, grid of values the count function is applied to.
#' @param xl,yl string, labels of the x/y axis.
#' @return a ggplot object.
conditionalPlot <- function(mod,uniHCI,uniSCI,uniID,varnames,sites,count,x,xl,yl){
  obs1=getObs(varnames[1],sites[1],mod)
  obs2=getObs(varnames[2],sites[2],mod)
  obs=merge(obs1,obs2,by='x');names(obs)<-c('t','o1','o2')
  yobs=sapply(x,count,obs$o1,obs$o2)
  DF=data.frame(x=x,obs=yobs,HCI=NA,SCI=NA)
  for(isHidden in c(T,F)){
    if(isHidden){uni=uniHCI} else {uni=uniSCI}
    indx= uniID$var==varnames[1] & uniID$site==sites[1] & uniID$tstep %in% obs$t
    s1=matrix(uni[indx,])
    indx= uniID$var==varnames[2] & uniID$site==sites[2] & uniID$tstep %in% obs$t
    s2=matrix(uni[indx,])
    ysim=sapply(x,count,s1,s2)
    DF[ifelse(isHidden,'HCI','SCI')]=ysim
  }
  g=ggplot(DF,aes(x=x))+geom_point(aes(y=obs))
  g=g+geom_line(aes(y=HCI),colour='red')
  g=g+geom_line(aes(y=SCI),colour='blue')
  g=g+labs(x=xl,y=yl)+theme_bw()+ylim(0,1)
  g=g+theme(aspect.ratio=1,axis.title=element_text(size=14),
            axis.text=element_text(size=12))
  return(g)
}

#' Fraction Of Over-Threshold Sites plot.
#' @param mod STooDs model object (either HCI or SCI, both will work).
#' @param uniHCI numeric matrix, resulting from a call to generateUniPred() for the HCI model.
#' @param uniSCI numeric matrix, resulting from a call to generateUniPred() for the SCI model.
#' @param uniID dataframe, resulting from a call to generateUniPred() (for either HCI or SCI, both will work).
#' @param prob numeric, probability defining the quantile used as a threshold.
#' @param nsimMax integer, maximum number of simulations. Use a small number (e.g. 10) for a quicklook.
#' @return a ggplot object.
FOOTSplot <- function(mod,uniHCI,uniSCI,uniID,prob=0.5,nsimMax=Inf){
  # obs
  xs=c();ns=getNsite(mod)
  for(v in 1:length(varnames)){
    for(s in 1:ns[v]){
      obs=getObs(varnames[v],s,mod)
      quant=quantile(obs$y,prob)
      foo=rep(NA,length(period));foo[obs$x]=obs$y>quant
      xs=rbind(xs,foo)
    }
  }
  f.obs=apply(xs,2,mean,na.rm=T)
  # HCI/SCI predictions
  for(isHidden in c(T,F)){
    # Fraction of above-median stations for sim
    if(isHidden){uni=uniHCI} else {uni=uniSCI}
    nsim=min(NCOL(uni),nsimMax)
    f.sim=matrix(0,length(period),nsim)
    for(j in 1:nsim){
      message(paste0(j,'/',nsim))
      xs=c()
      for(v in 1:length(varnames)){
        for(s in 1:ns[v]){
          obs=getObs(varnames[v],s,mod)
          indx= which(uniID$var==varnames[v] & uniID$site==s)[obs$x] # where obs are available
          quant=quantile(uni[indx,j],prob)
          foo=rep(NA,length(period));foo[obs$x]=uni[indx,j]>quant
          xs=rbind(xs,foo)
        }
      }
      f.sim[,j]=apply(xs,2,mean,na.rm=T)
    }
    if(isHidden){f.sim.HCI=f.sim} else {f.sim.SCI=f.sim}
  }
  gSCI=ribbonsPlot(f.sim.SCI,x=period,fill='blue')
  g=ribbonsPlot(f.sim.HCI,x=period,fill='red',g=gSCI)
  g=g+geom_point(data=data.frame(x=period,y=f.obs),aes(x,y))
  g=g+geom_hline(yintercept=1-prob,color='green')
  g=g+ylim(0,1)+ylab('Fraction of above-threshold stations')+xlab('Year')
  return(g)
  
}

#' Plot nested uncertainty intervals from many replicated series
#' @param y numeric matrix, nrep*nt, replicated (time) series
#' @param x numeric vector, x-coordinates
#' @param levels numeric vector, probabilities at which quantiles are computed
#' @param fill,col graphic parameters
#' @param g ggplot object, pre-existing ggplot the ribbon is added to. 
#' @return a ggplot object
ribbonsPlot <- function(y,x=1:NROW(y),levels=seq(0.05,0.95,0.05),fill='red',col=NA,g=ggplot()){
  all=apply(y,1,quantile,c(0.5-0.5*levels,0.5+0.5*levels))
  n=length(levels)
  for(i in n:1){
    foo=data.frame(x=x,low=all[i,],high=all[n+i,])
    g=g+geom_ribbon(data=foo,aes(x=x,ymin=low,ymax=high),fill=fill,col=col,alpha=1/n)
  }
  g=g+theme_bw()
  return(g)
}

#' Engine for a 2-D histogram
#' @param x numeric vector, x values
#' @param y numeric vector, y values
#' @param xbin numeric vector, x-intervals
#' @param ybin numeric vector, y-intervals
#' @return a data frame with bins centers + counts
count2D <- function(x,y,xbin=seq(min(x),max(x),length.out=30),ybin=seq(min(y),max(y),length.out=30)){
  nx=length(xbin);ny=length(ybin)
  ix=findInterval(x,xbin,rightmost.closed=TRUE)
  iy=findInterval(y,ybin,rightmost.closed=TRUE)
  con=table(factor(ix,levels=1:nx),factor(iy,levels=1:ny))
  f <- as.data.frame(con);names(f)<-c('ix','iy','freq')
  f$ix=as.integer(f$ix);f$iy=as.integer(f$iy)
  xg=0.5*(xbin[1:(nx-1)]+xbin[2:nx])
  yg=0.5*(ybin[1:(ny-1)]+ybin[2:ny])
  mask= (f$ix>0) & (f$iy>0) & (f$ix<nx) & (f$iy<ny)
  out=data.frame(x=xg[f$ix[mask]],y=yg[f$iy[mask]],count=f$freq[mask])
  return(out)
}

#' Utility for plotting marginal histograms: define the bins for histogram counting, 
#' the atoms with discrete probabilities and flag whether the variable has a continuous part
#' @param var string, variable name
#' @return a list with bins, atoms and continuous flag.
getBinsAndAtoms <- function(var){
  bins=NULL;atoms=NULL;hasCont=T
  if(var %in% c('Qd')){
    bins=seq(-0.025,1.025,0.05)
    atoms=c(0,1)
  }
  if(var %in% c('Pd')){
    bins=seq(-0.01,1.01,0.02)
    atoms=c(0,1)
  }
  if(var %in% c('Tx')){
    bins=seq(0,8,0.5)
    atoms=NA
  }
  if(var %in% c('Tn')){
    bins=seq(-0.25,8.25,0.5)
    atoms=0:8
    hasCont=F
  }
  if(is.null(bins)){warning('getBinsAndAtoms:var not found!')}
  return(list(bins=bins,atoms=atoms,hasCont=hasCont))
}

#' Utility to lighten a color
#' @param color a color
#' @return the lightened color
lighten <- function(color){
  foo=rgb2hsv(col2rgb(color))
  return(hsv(foo[1],0.1,foo[3]))
}

#***************************************************************************----
# Functions to handle the cross-validation experiment  ----

#' Kriging interpolation
#' @param estimate named vector, vector of estimated values containing all required parameters 
#' @param space list, with C and V components containing properties of calibration
#'   and validation sites
#' @param varname string, variable name
#' @return a list with 2 dataframes containg the kriging mean and variance
#'   for all interpolated processes
krig <- function(estimate,space,varname){
  coord=space$C[,c('lon','lat')]
  D=getDistMat(coord)
  pronames=getEnvNames(varname,1)$inF # list of processes to be interpolated
  # initialize
  kmean=as.data.frame(matrix(NA,nrow=NROW(space$V),ncol=length(pronames)))
  names(kmean)=pronames
  kvar=kmean
  for(i in 1:length(pronames)){
    proname=pronames[i]
    # get hyperparameters and process at gauged sites
    hnames=paste0(proname,'_',c('m','s','r')) # names of hyperparameters
    gnames=paste0(proname,'_',1:NROW(space$C)) # names of parameters at gauged sites
    hyperpars=estimate[names(estimate) %in% hnames]
    if(length(hyperpars)==0){ # dealing with constant threshold Tx_m
      kmean[,]=0;kvar[,]=0
    } else {
      m=as.numeric(hyperpars[1])
      s=as.numeric(hyperpars[2])
      r=as.numeric(hyperpars[3])
      gauged=as.numeric(estimate[names(estimate) %in% gnames])
      # Inverse covariane
      V=DtoV(D,s,r)
      Vinv=solve(V)
      # Interpolate at ungauged sites
      for(j in 1:NROW(space$V)){
        pt=space$V[j,]
        lambda=DtoV(getDistVector(pt,coord),s,r)
        foo=lambda%*%Vinv
        kmean[j,i]=m+foo%*%(gauged-m) # kriging mean 
        kvar[j,i]=s^2-foo%*%lambda # kriging variance
      }
    }
  }
  return(list(mean=kmean,var=kvar))
}

#' Generate univariate predictions for a variable
#' @param varname string, variable name
#' @param mod STooDs model object
#' @param mcmc dataframe, MCMC samples from the XV experiment 
#' @param kmean dataframe, kriging mean of effects at prediction sites 
#' @param kvar dataframe, kriging variance of effects at prediction sites
#' @return a list containing a data frame and a matrix. The matrix 'uni' contains all 
#'   generated values, with each row corresponding to a combination site*tstep, and 
#'   each column is a replicate. The data frame uniID identifies each row.
generateUniPred_XV <- function(varname,mod,mcmc,kmean,kvar){
  v=(which(mod$varName==varname)) # index of the variable
  nsite=NROW(kmean)
  nrep=NROW(mcmc)
  taus=getCIs(TRUE)
  nCI=NCOL(taus)
  nt=NROW(taus)
  nr=nsite*nt
  uni=matrix(NA,nr,nrep)
  uniID=data.frame(indx=integer(nr),site=integer(nr),tstep=integer(nr),stringsAsFactors=F)
  #nParentPar=length(getParentPar(varname,mod,taus[1,],kmean[1,]))
  # Loop
  k=0
  message(paste0('*********** VAR: ',varname,' ***********'))
  for(site in 1:nsite){
    message(paste0('site ',site,' / ',nsite))
    for(tstep in 1:nt){
      k=k+1
      uniID[k,]=data.frame(indx=k,site=site,tstep=tstep)
      # look for CIs
      lookup=paste0('tau',1:nCI,'_',tstep)
      CIs=mcmc[,which(names(mcmc) %in% lookup)]
      # reorder and rename CIs
      CIs=CIs[,lookup];names(CIs) <- paste0('tau',1:nCI)
      # generate CI effects from kriging mean and variance
      M=data.frame(matrix(NA,nrow=nrep,ncol=NCOL(kmean)))
      names(M) <- names(kmean)
      for(i in 1:NCOL(M)){
        M[,i]=kmean[site,i]+rnorm(nrep,sd=sqrt(as.numeric(kvar[site,i])))
      }
      param=getParentParFromEnv(cbind(M,CIs),varname,mod)
      # Get univariate predictions
      uni[k,]=getSimulatedValues(mod$parentDist[v],varname,param)
    }
  }
  return(list(uni=uni,uniID=uniID))
}

#' Match predictions from the full-dataset experiment to those of the XV experiment.
#' This includes site renumbering.
#' @param dataset_XV string, name of the XV dataset 
#' @param dataset_full string, name of the full dataset
#' @param varnames string, variable names
#' @param doValidSites logical, match valid or calib sites?
#' @param nCI integer, number of HCIs 
matchPreds <- function(dataset_XV,dataset_full,varnames,doValidSites,nCI=nCIdef){
  # create an environment where stuff can be loaded without polluting the Global environment 
  e=new.env()  
  # Load files for full-dataset experiment
  load(file.path(getwd(),dataset_full,'runs',paste0('HCI',nCI),'uniPred.RData'),envir=e)
  load(file.path(getwd(),dataset_full,'data','dataset.RData'),envir=e)
  spaceP_full=e$spaceP
  spaceQ_full=e$spaceQ
  spaceT_full=e$spaceT
  uni_full=e$uni
  uniID_full=e$uniID
  # load files for XV experiment
  load(file.path(getwd(),dataset_XV,'data','validation.RData'),envir=e)
  # initialize
  renum=data.frame();keep=c()
  uni_out=uni_full
  uniID_out=uniID_full
  # loop on each var
  for(varname in varnames){
    what=substr(varname,1,1)
    # sites in XV experiment 
    foo=get(paste0('space',what,'_XV'),envir=e)
    if(doValidSites){space=foo$V} else {space=foo$C}
    # all sites in full-dataset experiment
    space_full=get(paste0('space',what,'_full'))
    # loop on sites
    for(i in 1:NROW(space)){
      # renumbering
      ix=which(space_full$id==space$id[i]) 
      renum=rbind(renum,data.frame(var=varname,was=ix,becomes=i))
      rows=which(uniID_full$var==varname & uniID_full$site==ix)
      uniID_out$site[rows]=i
      # rows to keep
      keep=c(keep,rows)
    }
  }
  uni_out=uni_out[keep,]
  uniID_out=uniID_out[keep,]
  return(list(renum=renum,uni=uni_out,uniID=uniID_out))
}

#' Compute cross-validation diagnostics (Obs vs. pred, PIT and sharpness)
#' @param varname string, variable name
#' @param obs dataframe, observations with columns var,site,year,tstep and value
#' @param uni numeric matrix, univariate predictions with the reduced calibration dataset, 
#'   resulting from a call to generateUniPred()
#' @param uniID dataframe, corresponding identifier
#' @param uniFull numeric matrix, univariate predictions with the full calibration dataset, 
#'   resulting from a call to generateUniPred()
#' @param uniIDFull dataframe, corresponding identifier
#' @return a list with 3 dataframes named ovp, pit and sharp containing the 3 diagnostics
getXVDiagnostics<- function(varname,obs,uni,uniID,uniFull,uniIDFull){
  # Obs vs. Pred
  ovp=c()
  ovp=rbind(ovp,cbind(ObsVsPred(obs,uniFull,uniIDFull,varname)$df,dataset='full'))
  ovp=rbind(ovp,cbind(ObsVsPred(obs,uni,uniID,varname)$df,dataset='reduced'))
  
  # PIT
  pit=c()
  pit=rbind(pit,cbind(PITvalues(obs,uniFull,uniIDFull)$df,dataset='full'))
  pit=rbind(pit,cbind(PITvalues(obs,uni,uniID)$df,dataset='reduced'))
  pit=pit[pit$var != 'ALL',]
  
  # sharpness
  v=c()
  mask=uniIDFull$var==varname & uniIDFull$tstep %in% unique(obs$tstep)
  v=rbind(v,cbind(varExplained(uniFull[mask,],uniIDFull[mask,])$df,dataset='full'))
  mask=uniID$var==varname & uniID$tstep %in% unique(obs$tstep)
  v=rbind(v,cbind(varExplained(uni[mask,],uniID[mask,])$df,dataset='reduced'))
  v=v[v$var != 'ALL',]
  
  return(list(ovp=ovp,pit=pit,sharp=v))
}

#' Plot the Obs-vs-Pred diagnostic
#' @param DF dataframe, containing the ovp diagnostics from getXVDiagnostics()
#' @param varname string, variable name
#' @param cols color vector, colors given to each calibration dataset
#' @param alpha  numeric, points alpha
#' @return a ggplot.
plotOvp <- function(DF,varname,cols=c('red','black'),alpha=0.8){
  foo=DF;foo$dataset=paste('Calibration dataset:',foo$dataset)
  g=ggplot(foo)+geom_linerange(aes(x=value,ymin=lower,ymax=upper,color=year),alpha=0.5*alpha)
  g=g+geom_point(aes(x=value,y=median,color=year),alpha=alpha)
  g=g+scale_color_distiller(palette='Spectral')
  g=g+geom_abline(slope=1)
  g=g+labs(x=paste('Observed',varname),y=paste('Predicted',varname))
  g=g+theme_bw()+coord_fixed()
  g=g+facet_wrap(vars(dataset),nrow=1)
  return(g)
}

#' Plot the PIT diagnostic
#' @param DF dataframe, containing the PIT diagnostics from getXVDiagnostics()
#' @param cols color vector, colors given to each calibration dataset
#' @return a ggplot.
plotPit <- function(DF,cols=c('red','black')){
  g=ggplot(data=DF,aes(x=freq,y=PIT,color=dataset))
  g=g+geom_line()+scale_color_manual('Calibration dataset',values=cols)
  g=g+labs(x='Frequency',y='PIT value')
  g=g+xlim(0,1)+ylim(0,1)+theme_bw()+theme(aspect.ratio=1)
  g=g+scale_x_continuous(breaks=c(0,0.5,1))+scale_y_continuous(breaks=c(0,0.5,1))
  g=g+facet_wrap(vars(var),nrow=1)+theme(legend.position='none')
  return(g)
}

#' Plot the sharpness diagnostic
#' @param DF dataframe, containing the sharpness diagnostics from getXVDiagnostics()
#' @param cols color vector, colors given to each calibration dataset
#' @return a ggplot.
plotSharp <- function(DF,cols=c('red','black')){
  g=ggplot(data=DF,aes(x=dataset,y=explained,fill=dataset))
  g=g+geom_violin(color='black',scale='width')
  g=g+scale_fill_manual('Calibration dataset',values=cols)
  g=g+scale_color_manual(values=cols)
  g=g+labs(x='Calibration dataset',y='Sharpness ratio')
  g=g+ylim(0,1)+theme_bw()+theme(aspect.ratio=1)
  g=g+facet_wrap(vars(var),nrow=1)+theme(legend.position='none')
  return(g)
}

#' Compute distances between one point and a set of points
#' @param pt1 dataframe, unique (lon,lat) coordinates 
#' @param pts dataframe, several (lon,lat) coordinates 
#' @param R numeric, Earth Radius in km 
#' @param toRad numeric, degree-to-radian conversion factor 
#' @return a vector of distances
getDistVector <- function(pt1,pts,R=6371,toRad=pi/180){
  # Using Salvador's script from this page: https://stackoverflow.com/questions/27928/calculate-distance-between-two-latitude-longitude-points-haversine-formula
  diff=cbind(pt1$lon[1]-pts$lon,pt1$lat-pts$lat)*toRad
  d=0.5-cos(diff[,2])/2+cos(pt1$lat*toRad)*cos(pts$lat*toRad)*(1-cos(diff[,1]))/2
  return(2*R*asin(sqrt(d)))
}

#' Get the distance matrix associated with a set of points
#' @param coord dataframe, several (lon,lat) coordinates
#' @param R numeric, Earth Radius in km 
#' @param toRad numeric, degree-to-radian conversion factor 
#' @return a matrix of distances
getDistMat <- function(coord,R=6371,toRad=pi/180){
  # coord is a data frame with (lon,lat)columns, result is in km.
  n=NROW(coord);M=matrix(0,n,n)
  for(i in 2:n){
    pt1=coord[i,];pts=coord[1:(i-1),]
    d=getDistVector(pt1,pts,R,toRad)
    M[i,1:(i-1)]=d;M[1:(i-1),i]=M[i,1:(i-1)]
  }
  return(M)
}

#' Compute the covariance matrix from the distance matrix
#' @param D numeric matrix, distance matrix
#' @param s numeric, scale parameter
#' @param r numeric, range parameter
#' @return a covariance matrix
DtoV <- function(D,s,r){
  return(s^2*exp(-D/r))
}