### HZAR on Cyanocitta Clines ###
require("hzar")
require("doMC")
registerDoMC()
require("coda")

### Set up seeds for MCMC ###
mainSeed = 
	list(
		A=c(978,544,99,596,528,124),
		B=c(544,99,596,528,124,978),
		C=c(99,596,528,124,978,544),
		D=c(596,528,124,978,544,99),
		E=c(528,124,978,544,99,596),
		F=c(124,978,544,99,596,528),
		G=c(784,496,315,392,481,268),
		H=c(496,315,392,481,268,784),
		I=c(315,392,481,268,784,496),
		J=c(392,481,268,784,496,315),
		K=c(481,268,784,496,315,392),
		L=c(268,784,496,315,392,481),
		M=c(372,285,115,914,377,418),
		N=c(285,115,914,377,418,372),
		O=c(115,914,377,418,372,285))



### Read in data files ###
JayScores<-read.table("../data/Cyanocitta_morpho_PCAscores.txt",header=T)
JayLoc<-read.table("../data/Cyanocitta_morpho_clinal_locality.txt",header=T)

head(JayScores)
head(JayLoc)

## Blank out space in memory to hold morphology data analysis
if(length(apropos("^Cst$",ignore.case=FALSE)) == 0 ||
 !is.list(Cst) ) 
 
Cst <- list()
Cst$morph <- list();
Cst$morph$obs <- list(); #space to hold observed data
Cst$morph$models <- list(); #space to hold models to fit
Cst$morph$fitRs <- list(); #space to hold compiled fit requests
Cst$morph$runs <- list(); #space to hold the output data chains
Cst$morph$analysis <- list(); #space to hold the analysed data


# Add observed data into Cst list
Cst$morph$obs <-hzar.doNormalData1DRaw(hzar.mapSiteDist(JayLoc$Locality,JayLoc$Distance),JayScores$LOCALITY,JayScores$Comp.1)

# Then plot graph of observed data to visualize.
hzar.plot.obsData(Cst$morph$obs);

# Helper function
Cst.loadmorphmodel <- function(scaling,tails,
							 id=paste(scaling,tails,sep=".")){
	Cst$morph$models[[id]] <<-
	 hzar.makeCline1DNormal(Cst$morph$obs, tails)
	  ## As there is no quick option for "fixed" scaling, and the
	  ## sites "ID_Bonner" and "UT_BoulderMtn" have a fair number of samples,
	  ## fix the mean and variance of the left and right sides of
	  ## the cline to values observed at sites "ID_Bonner" and "UT_BoulderMtn".
	 if (all(regexpr("fixed",scaling,ignore.case=TRUE) == 1 )){
		hzar.meta.fix(Cst$morph$models[[id]])$muL <<- TRUE
		hzar.meta.fix(Cst$morph$models[[id]])$muR <<- TRUE
		hzar.meta.fix(Cst$morph$models[[id]])$varL <<- TRUE
		hzar.meta.fix(Cst$morph$models[[id]])$varR <<- TRUE
 }
 
 	 if (all(regexpr("none",scaling,ignore.case=TRUE) == 1 )){
		hzar.meta.fix(Cst$morph$models[[id]])$muL <<- TRUE
		hzar.meta.fix(Cst$morph$models[[id]])$muR <<- TRUE
		hzar.meta.fix(Cst$morph$models[[id]])$varL <<- TRUE
		hzar.meta.fix(Cst$morph$models[[id]])$varR <<- TRUE
		
	  hzar.meta.init(Cst$morph$models[[id]])$muL <<-1
	  hzar.meta.init(Cst$morph$models[[id]])$varL <<-0.001
	  
	  hzar.meta.init(Cst$morph$models[[id]])$muR <<-0
	  hzar.meta.init(Cst$morph$models[[id]])$varR <<-0.001	   
	  
 }else{
	  ## ID_Bonner is the "left" side of the cline, so pull the
	  ## initial values from there.
	  hzar.meta.init(Cst$morph$models[[id]])$muL <<-
	   Cst$morph$obs$frame["ID_Bonner","mu"]
	  hzar.meta.init(Cst$morph$models[[id]])$varL <<-
	   Cst$morph$obs$frame["ID_Bonner","var"]
	  ## UT_BoulderMtn is the "right" side of the cline, so pull the
	  ## initial values from there.
	  hzar.meta.init(Cst$morph$models[[id]])$muR <<-
	   Cst$morph$obs$frame["UT_BoulderMtn","mu"]
	  hzar.meta.init(Cst$morph$models[[id]])$varR <<-
	   Cst$morph$obs$frame["UT_BoulderMtn","var"]
	 }
 }


### Now we name the models with the scaling and tail informations
Cst.loadmorphmodel("none","none","modelI"); 
## Pmax Pmin 0 and 1; no exp tail 
Cst.loadmorphmodel("fixed" ,"none","modelII"); 
## Pmax Pmin fixed to obs mean; no exp tail 
Cst.loadmorphmodel("free" ,"none","modelIII"); 
## Pmax Pmin free to vary; no exp tail 

Cst.loadmorphmodel("none","right","modelIV"); 
## Pmax Pmin 0 and 1; right exp tail
Cst.loadmorphmodel("fixed" ,"right","modelV"); 
## Pmax Pmin fixed to obs mean; right exp tail
Cst.loadmorphmodel("free" ,"right","modelVI"); 
## Pmax Pmin free to vary; right exp tail

Cst.loadmorphmodel("none","left","modelVII"); 
Cst.loadmorphmodel("fixed" ,"left","modelVIII");
Cst.loadmorphmodel("free" ,"left","modelIX");

Cst.loadmorphmodel("none","mirror","modelX");
Cst.loadmorphmodel("fixed" ,"mirror","modelXI"); 
Cst.loadmorphmodel("free" ,"mirror","modelXII");

Cst.loadmorphmodel("none","both","modelXIII"); 
## Pmax Pmin 0 and 1; 2 tails w/ independent params
Cst.loadmorphmodel("fixed" ,"both","modelXIV"); 
## Pmax Pmin fixed to obs mean; 2 tails w/ independent params
Cst.loadmorphmodel("free" ,"both","modelXV"); 
## Pmax Pmin free to vary; 2 tails w/ independent params

## Check the default settings
print(Cst$morph$models)
length(Cst$morph$models)

## Modify all models to focus on the region where the observed data were collected Observations were between 0 and 1200 km.
Cst$morph$models <- sapply(Cst$morph$models,hzar.model.addBoxReq,-50 , 1350, simplify=FALSE) ## !!! Range may change depending on how the locality data is modified!!

## Also, due to the large number of free variables, it is prudent to reduce the tune setting of modelXV from 1.5 to 1.2 
hzar.meta.tune(Cst$morph$models$modelXV)<-1.2

## Check updated settings 
Cst$morph$models

## Using hzar.first.fitRequest.gC for fitting gaussian clines
Cst$morph$fitRs$init <- sapply(Cst$morph$models,hzar.first.fitRequest.gC,obsData=Cst$morph$obs,verbose=FALSE,simplify=FALSE)
 
### Set length of chain and burnin ###
burnin<-5e5 #5e5
chainLength<-1e6 #1e6
 
## Update settings for the fitter is desired
class(Cst$morph$fitRs$init)
for(i in 1:length(Cst$morph$fitRs$init)){
	Cst$morph$fitRs$init[[i]]$mcmcParam$chainLength<-chainLength
	Cst$morph$fitRs$init[[i]]$mcmcParam$burnin<-burnin
	Cst$morph$fitRs$init[[i]]$mcmcParam$seed[[1]] <- mainSeed[[i]]
}
names(Cst$morph$fitRs$init)

## Check fit request settings
print(Cst$morph$fitRs$init)

## Start running models for an initial chain
Cst$morph$runs$init <- list()

for(i in 1:length(Cst$morph$fitRs$init)){
	Cst$morph$runs$init[[i]]<-hzar.doFit(Cst$morph$fitRs$init[[i]])
}
names(Cst$morph$runs$init)<-names(Cst$morph$fitRs$init)

# Then combine a new set of fit requests using the initial chain 
Cst$morph$fitRs$chains <- lapply(Cst$morph$runs$init,hzar.next.fitRequest)

## Replicate each fit request 3 times, keeping the original seeds while switching to a new seed channel (<<<<-----???)
Cst$morph$fitRs$chains <-hzar.multiFitRequest(Cst$morph$fitRs$chains,each=3,baseSeed=NULL)

param_names<-lapply(Cst$morph$fitRs$chains,function(x) names(x$modelParam$init))

## To be thorough, randomize the initial value for each fit.
for(i in 1:length(Cst$morph$fitRs$chains)){
	Cst$morph$fitRs$chains[[i]]$modelParam$init["center"] <- runif(1,-50,1350)
	Cst$morph$fitRs$chains[[i]]$modelParam$init["width"] <- runif(1,0,1400)
	Cst$morph$fitRs$chains[[i]]$modelParam$init["varH"] <- 10^runif(1,-1,1)
}

### Provide initial starting values for other parameters
for(i in 1:length(grep("muL",param_names))){
	Cst$morph$fitRs$chains[[grep("muL",param_names)[i]]]$modelParam$init["muL"]<-runif(1,0,50)
}

for(i in 1:length(grep("muR",param_names))){
	Cst$morph$fitRs$chains[[grep("muR",param_names)[i]]]$modelParam$init["muR"]<-runif(1,0,50)
}

for(i in 1:length(grep("varL",param_names))){
	Cst$morph$fitRs$chains[[grep("varL",param_names)[i]]]$modelParam$init["varL"]<-runif(1,-1,1)
}

for(i in 1:length(grep("varR",param_names))){
	Cst$morph$fitRs$chains[[grep("varR",param_names)[i]]]$modelParam$init["varR"]<-runif(1,-1,1)
}

for(i in 1:length(grep("deltaL",param_names))){
	Cst$morph$fitRs$chains[[grep("deltaL",param_names)[i]]]$modelParam$init["deltaL"]<-runif(1,10,60)
}

for(i in 1:length(grep("tauL",param_names))){
	Cst$morph$fitRs$chains[[grep("tauL",param_names)[i]]]$modelParam$init["tauL"]<-runif(1,0,1)
}

for(i in 1:length(grep("deltaR",param_names))){
	Cst$morph$fitRs$chains[[grep("deltaR",param_names)[i]]]$modelParam$init["deltaR"]<-runif(1,10,60)
}

for(i in 1:length(grep("tauR",param_names))){
	Cst$morph$fitRs$chains[[grep("tauR",param_names)[i]]]$modelParam$init["tauR"]<-runif(1,0,1)
}

for(i in 1:length(grep("deltaM",param_names))){
	Cst$morph$fitRs$chains[[grep("deltaM",param_names)[i]]]$modelParam$init["deltaM"]<-runif(1,10,60)
}

for(i in 1:length(grep("tauM",param_names))){
	Cst$morph$fitRs$chains[[grep("tauM",param_names)[i]]]$modelParam$init["tauM"]<-runif(1,0,1)
}

### Run a chain of 3 runs for every fit request
Cst$morph$runs$chains <- hzar.doChain.multi(Cst$morph$fitRs$chains,doPar=TRUE,inOrder=FALSE,count=3)

### Look at convergence metrics ###
for(i in 1:(45/3)){
	print(summary(do.call(mcmc.list,lapply(Cst$morph$runs$chains[((i-1)*3+1):((i-1)*3+3)],function(x) hzar.mcmc.bindLL(x[[3]])))))
}

for(i in 1:(45/3)){
  plot((do.call(mcmc.list,lapply(Cst$morph$runs$chains[((i-1)*3+1):((i-1)*3+3)],function(x) hzar.mcmc.bindLL(x[[3]])))))
}

### All runs are convergent!!! Move on to data aggregation ###
Cst$morph$analysis$initDGs <- list()
for(i in 1:15){
	Cst$morph$analysis$initDGs[[i]]<-hzar.dataGroup.add(Cst$morph$runs$init[[i]])
}

names(Cst$morph$analysis$initDGs)<-names(Cst$morph$fitRs$init)

### Check for 15 non-indentical models ###
for(i in 1:length(Cst$morph$analysis$initDGs)){
	print(names(Cst$morph$analysis$initDGs)[i])
	print(sapply(Cst$morph$analysis$initDGs,function(x) hzar.sameModel(Cst$morph$analysis$initDGs[[i]],x)))
}

### Create a hzar.obsDataGroup from the hzar.dataGroup we just created.
Cst$morph$analysis$oDG <-hzar.make.obsDataGroup(Cst$morph$analysis$initDGs)
Cst$morph$analysis$oDG <-hzar.copyModelLabels(Cst$morph$analysis$initDGs, Cst$morph$analysis$oDG)
?hzar.make.obsDataGroup



## Convert all runs to hzar.dataGroup objects, adding them to the hzar.obsDataGroup object.
length(Cst$morph$analysis$initDGs)
names(Cst$morph$analysis$oDG[[1]])
length(Cst$morph$analysis$oDG[[1]])

Cst$morph$analysis$oDG <-hzar.make.obsDataGroup(lapply(Cst$morph$runs$chains,hzar.dataGroup.add),Cst$morph$analysis$oDG)

### Save output so we do not have to rerun the analysis ###
save(Cst,file="../output/Cst_HZAR_morpho_output.Rdata")
load("../output/Cst_HZAR_morpho_output.Rdata")
Cst 
 
print(summary(Cst$morph$analysis$oDG$data.groups))
 
hzar.plot.cline(Cst$morph$analysis$oDG)

## Print out the model with the minimum AICc score
print(Cst$morph$analysis$AICcTable <- hzar.AICc.hzar.obsDataGroup(Cst$morph$analysis$oDG))
print(Cst$morph$analysis$model.name <- rownames(Cst$morph$analysis$AICcTable)[[ which.min(Cst$morph$analysis$AICcTable$AICc )]])
Cst$morph$analysis$model.selected <- Cst$morph$analysis$oDG$data.groups[[Cst$morph$analysis$model.name]]

## Show variation in parameters in selected model
print(hzar.getLLCutParam(Cst$morph$analysis$model.selected, names(Cst$morph$analysis$model.selected$data.param)))
## Print maximum likelihood cline params for selected model
print(hzar.get.ML.cline(Cst$morph$analysis$model.selected))

## Plot ML cline plus 95% credible cline region for selected model
hzar.plot.fzCline(Cst$morph$analysis$model.selected)

