### HZAR on Cyanocitta Clines ###
require("hzar")
require("doMC")
registerDoMC()
require("coda")

### Set up seeds for MCMC ###
mainSeed = 
	list(
		A=c(978,544,99,596,528,124),
		B=c(544,99,596,528,124,978),
		C=c(99,596,528,124,978,544),
		D=c(596,528,124,978,544,99),
		E=c(528,124,978,544,99,596),
		F=c(124,978,544,99,596,528),
		G=c(784,496,315,392,481,268),
		H=c(496,315,392,481,268,784),
		I=c(315,392,481,268,784,496),
		J=c(392,481,268,784,496,315),
		K=c(481,268,784,496,315,392),
		L=c(268,784,496,315,392,481),
		M=c(372,285,115,914,377,418),
		N=c(285,115,914,377,418,372),
		O=c(115,914,377,418,372,285))


### Read in data files ###
JayQ<-read.table("../data/Cst_msat_Contact_K2.txt",header=T) # Data from K=2 STRUCTURE run for all populations
JayLoc<-read.table("../data/Cst_msat_clinal_locality.txt",header=T)

JayQ
JayLoc

## Blank out space in memory to hold microsat data analysis
if(length(apropos("^Cst$",ignore.case=FALSE)) == 0 ||
 !is.list(Cst) ) 
 
Cst <- list()
Cst$msat <- list();
Cst$msat$obs <- list(); #space to hold observed data
Cst$msat$models <- list(); #space to hold models to fit
Cst$msat$fitRs <- list(); #space to hold compiled fit requests
Cst$msat$runs <- list(); #space to hold the output data chains
Cst$msat$analysis <- list(); #space to hold the analysed data


# Add observed data into Cst list
Cst$msat$obs <-hzar.doNormalData1DRaw(hzar.mapSiteDist(JayLoc$Locality,JayLoc$Distance),JayQ$Locality,JayQ$Q_score)

# Then plot graph of observed data to visualize.
hzar.plot.obsData(Cst$msat$obs);

# Helper function
Cst.loadmsatmodel <- function(scaling,tails,
							 id=paste(scaling,tails,sep=".")){
	Cst$msat$models[[id]] <<-
	 hzar.makeCline1DNormal(Cst$msat$obs, tails)
	  ## As there is no quick option for "fixed" scaling, and the
	  ## sites "ID_Bonner" and "UT_BoulderMtn" have a fair number of samples,
	  ## fix the mean and variance of the left and right sides of
	  ## the cline to values observed at sites "ID_Bonner" and "UT_BoulderMtn".
	 if (all(regexpr("fixed",scaling,ignore.case=TRUE) == 1 )){
		hzar.meta.fix(Cst$msat$models[[id]])$muL <<- TRUE
		hzar.meta.fix(Cst$msat$models[[id]])$muR <<- TRUE
		hzar.meta.fix(Cst$msat$models[[id]])$varL <<- TRUE
		hzar.meta.fix(Cst$msat$models[[id]])$varR <<- TRUE
 }
 
 	 if (all(regexpr("none",scaling,ignore.case=TRUE) == 1 )){
		hzar.meta.fix(Cst$msat$models[[id]])$muL <<- TRUE
		hzar.meta.fix(Cst$msat$models[[id]])$muR <<- TRUE
		hzar.meta.fix(Cst$msat$models[[id]])$varL <<- TRUE
		hzar.meta.fix(Cst$msat$models[[id]])$varR <<- TRUE
		
	  hzar.meta.init(Cst$msat$models[[id]])$muL <<-1
	  hzar.meta.init(Cst$msat$models[[id]])$varL <<-0.001 #This does not seem to work when var is set to 0
	  hzar.meta.init(Cst$msat$models[[id]])$muR <<-0
	  hzar.meta.init(Cst$msat$models[[id]])$varR <<-0.001 #This does not seem to work when var is set to 0
	  
 }else{
	  ## ID_Bonner is the "left" side of the cline, so pull the
	  ## initial values from there.
	  hzar.meta.init(Cst$msat$models[[id]])$muL <<-
	   Cst$msat$obs$frame["ID_Bonner","mu"]
	  hzar.meta.init(Cst$msat$models[[id]])$varL <<-
	   Cst$msat$obs$frame["ID_Bonner","var"]
	  ## UT_BoulderMtn is the "right" side of the cline, so pull the
	  ## initial values from there.
	  hzar.meta.init(Cst$msat$models[[id]])$muR <<-
	   Cst$msat$obs$frame["UT_BoulderMtn","mu"]
	  hzar.meta.init(Cst$msat$models[[id]])$varR <<-
	   Cst$msat$obs$frame["UT_BoulderMtn","var"]
	 }
 }


### Now we name the models with the scaling and tail informations
Cst.loadmsatmodel("none","none","modelI"); 
## Pmax Pmin 0 and 1; no exp tail 
Cst.loadmsatmodel("fixed" ,"none","modelII"); 
## Pmax Pmin fixed to obs mean; no exp tail 
Cst.loadmsatmodel("free" ,"none","modelIII"); 
## Pmax Pmin free to vary; no exp tail 

Cst.loadmsatmodel("none","right","modelIV"); 
## Pmax Pmin 0 and 1; right exp tail
Cst.loadmsatmodel("fixed" ,"right","modelV"); 
## Pmax Pmin fixed to obs mean; right exp tail
Cst.loadmsatmodel("free" ,"right","modelVI"); 
## Pmax Pmin free to vary; right exp tail

Cst.loadmsatmodel("none","left","modelVII"); 
Cst.loadmsatmodel("fixed" ,"left","modelVIII");
Cst.loadmsatmodel("free" ,"left","modelIX");

Cst.loadmsatmodel("none","mirror","modelX");
Cst.loadmsatmodel("fixed" ,"mirror","modelXI"); 
Cst.loadmsatmodel("free" ,"mirror","modelXII");

Cst.loadmsatmodel("none","both","modelXIII"); 
## Pmax Pmin 0 and 1; 2 tails w/ independent params
Cst.loadmsatmodel("fixed" ,"both","modelXIV"); 
## Pmax Pmin fixed to obs mean; 2 tails w/ independent params
Cst.loadmsatmodel("free" ,"both","modelXV"); 
## Pmax Pmin free to vary; 2 tails w/ independent params

## Check the default settings
print(Cst$msat$models)
length(Cst$msat$models)

## Modify all models to focus on the region where the observed data were collected Observations were between 0 and 1200 km.
Cst$msat$models <- sapply(Cst$msat$models,hzar.model.addBoxReq,-30 , 1250, simplify=FALSE) 

## Also, due to the large number of free variables, reduce the tune setting of modelXV from 1.5 to 1.2
hzar.meta.tune(Cst$msat$models$modelXV)<-1.2

## Check updated settings 
Cst$msat$models

## Using hzar.first.fitRequest.gC for fitting gaussian clines
Cst$msat$fitRs$init <- sapply(Cst$msat$models,hzar.first.fitRequest.gC,obsData=Cst$msat$obs,verbose=FALSE,simplify=FALSE)
 
### Set length of chain and burnin ###
burnin<-5e5 
chainLength<-1e6 
 
## Update settings for the fitter is desired
class(Cst$msat$fitRs$init)
for(i in 1:length(Cst$msat$fitRs$init)){
	Cst$msat$fitRs$init[[i]]$mcmcParam$chainLength<-chainLength
	Cst$msat$fitRs$init[[i]]$mcmcParam$burnin<-burnin
	Cst$msat$fitRs$init[[i]]$mcmcParam$seed[[1]] <- mainSeed[[i]]
}
names(Cst$msat$fitRs$init)

## Check fit request settings
print(Cst$msat$fitRs$init)

## Start running models for an initial chain
Cst$msat$runs$init <- list()

for(i in 1:length(Cst$msat$fitRs$init)){
	Cst$msat$runs$init[[i]]<-hzar.doFit(Cst$msat$fitRs$init[[i]])
}
names(Cst$msat$runs$init)<-names(Cst$msat$fitRs$init)

# Then combine a new set of fit requests using the initial chain 
Cst$msat$fitRs$chains <- lapply(Cst$msat$runs$init,hzar.next.fitRequest)

## Replicate each fit request 3 times, keeping the original seeds while switching to a new seed channel 
Cst$msat$fitRs$chains <-hzar.multiFitRequest(Cst$msat$fitRs$chains,each=3,baseSeed=NULL)

param_names<-lapply(Cst$msat$fitRs$chains,function(x) names(x$modelParam$init))

## To be thorough, randomize the initial value for each fit.
for(i in 1:length(Cst$msat$fitRs$chains)){
	Cst$msat$fitRs$chains[[i]]$modelParam$init["center"] <- runif(1,-50,1350)
	Cst$msat$fitRs$chains[[i]]$modelParam$init["width"] <- runif(1,0,1400)
	Cst$msat$fitRs$chains[[i]]$modelParam$init["varH"] <- 10^runif(1,-1,1)
}

### Provide initial starting values for other parameters
for(i in 1:length(grep("muL",param_names))){
	Cst$msat$fitRs$chains[[grep("muL",param_names)[i]]]$modelParam$init["muL"]<-runif(1,0,50)
}

for(i in 1:length(grep("muR",param_names))){
	Cst$msat$fitRs$chains[[grep("muR",param_names)[i]]]$modelParam$init["muR"]<-runif(1,0,50)
}

for(i in 1:length(grep("varL",param_names))){
	Cst$msat$fitRs$chains[[grep("varL",param_names)[i]]]$modelParam$init["varL"]<-runif(1,-1,1)
}

for(i in 1:length(grep("varR",param_names))){
	Cst$msat$fitRs$chains[[grep("varR",param_names)[i]]]$modelParam$init["varR"]<-runif(1,-1,1)
}

for(i in 1:length(grep("deltaL",param_names))){
	Cst$msat$fitRs$chains[[grep("deltaL",param_names)[i]]]$modelParam$init["deltaL"]<-runif(1,10,60)
}

for(i in 1:length(grep("tauL",param_names))){
	Cst$msat$fitRs$chains[[grep("tauL",param_names)[i]]]$modelParam$init["tauL"]<-runif(1,0,1)
}

for(i in 1:length(grep("deltaR",param_names))){
	Cst$msat$fitRs$chains[[grep("deltaR",param_names)[i]]]$modelParam$init["deltaR"]<-runif(1,10,60)
}

for(i in 1:length(grep("tauR",param_names))){
	Cst$msat$fitRs$chains[[grep("tauR",param_names)[i]]]$modelParam$init["tauR"]<-runif(1,0,1)
}

for(i in 1:length(grep("deltaM",param_names))){
	Cst$msat$fitRs$chains[[grep("deltaM",param_names)[i]]]$modelParam$init["deltaM"]<-runif(1,10,60)
}

for(i in 1:length(grep("tauM",param_names))){
	Cst$msat$fitRs$chains[[grep("tauM",param_names)[i]]]$modelParam$init["tauM"]<-runif(1,0,1)
}

### Run a chain of 3 runs for every fit request
Cst$msat$runs$chains <- hzar.doChain.multi(Cst$msat$fitRs$chains,doPar=TRUE,inOrder=FALSE,count=3)

### Look at convergence metrics ###
for(i in 1:(45/3)){
	print(summary(do.call(mcmc.list,lapply(Cst$msat$runs$chains[((i-1)*3+1):((i-1)*3+3)],function(x) hzar.mcmc.bindLL(x[[3]])))))
}

### All runs are convergent!!! Move on to data aggregation ###
Cst$msat$analysis$initDGs <- list()
for(i in 1:15){
	Cst$msat$analysis$initDGs[[i]]<-hzar.dataGroup.add(Cst$msat$runs$init[[i]])
}

names(Cst$msat$analysis$initDGs)<-names(Cst$msat$fitRs$init)

### Check for 15 non-indential models ###
for(i in 1:length(Cst$msat$analysis$initDGs)){
	print(names(Cst$msat$analysis$initDGs)[i])
	print(sapply(Cst$msat$analysis$initDGs,function(x) hzar.sameModel(Cst$msat$analysis$initDGs[[i]],x)))
}

### Create a hzar.obsDataGroup from the hzar.dataGroup we just created.
Cst$msat$analysis$oDG <-hzar.make.obsDataGroup(Cst$msat$analysis$initDGs)

Cst$msat$analysis$oDG <-hzar.copyModelLabels(Cst$msat$analysis$initDGs,Cst$msat$analysis$oDG)

## Convert all runs to hzar.dataGroup objects, adding them to the hzar.obsDataGroup object.
length(Cst$msat$analysis$initDGs)
names(Cst$msat$analysis$oDG[[1]])
length(Cst$msat$analysis$oDG[[1]])

Cst$msat$analysis$oDG <-hzar.make.obsDataGroup(lapply(Cst$msat$runs$chains,hzar.dataGroup.add),Cst$msat$analysis$oDG)

### Calculate and store AICc scores ###
Cst$msat$analysis$AICcTable<-hzar.AICc.hzar.obsDataGroup(Cst$msat$analysis$oDG)

### Save output so we do not have to rerun the analysis ###
save(Cst,file="../output/Cst_HZAR_msat_output.Rdata")
load("../output/Cst_HZAR_msat_output.Rdata")
Cst 

## Print out the model with the minimum AICc score
Cst$msat$analysis$model.name<-rownames(Cst$msat$analysis$AICcTable)[[ which.min(Cst$msat$analysis$AICcTable$AICc )]] #modelIII is preferred via AICc scores
Cst$msat$analysis$model.selected<-Cst$msat$analysis$oDG$data.groups[[Cst$msat $analysis$model.name]]

### Look at variation in parameters ###
hzar.getLLCutParam(Cst$msat$analysis$model.selected,names(Cst$msat$analysis$model.selected$data.param))

### Print out cline parameters ###
Cst_ML_cline<-hzar.get.ML.cline(Cst$msat$analysis$model.selected)

sink("../output/Cline_parameters.txt")
Cst_ML_cline
sink()

### Create figure of chosen model with confidence intervals ###
hzar.plot.cline(Cst$msat$analysis$model.selected)

pdf("../output/HZAR_msat.pdf")
hzar.plot.fzCline(Cst$msat$analysis$model.selected,ylab="Average Q Score")
dev.off()