### Cline Analysis on Cyanocitta mtDNA ###
library("hzar")
library("doMC")
registerDoMC()
library("coda")

### Set up seeds for MCMC ###
mainSeed = 
	list(
		A=c(978,544,99,596,528,124),
		B=c(544,99,596,528,124,978),
		C=c(99,596,528,124,978,544),
		D=c(596,528,124,978,544,99),
		E=c(528,124,978,544,99,596),
		F=c(124,978,544,99,596,528),
		G=c(784,496,315,392,481,268),
		H=c(496,315,392,481,268,784),
		I=c(315,392,481,268,784,496),
		J=c(392,481,268,784,496,315),
		K=c(481,268,784,496,315,392),
		L=c(268,784,496,315,392,481),
		M=c(372,285,115,914,377,418),
		N=c(285,115,914,377,418,372),
		O=c(115,914,377,418,372,285))

		
### Read in data files ###
JayStreaks <-read.table("../data/Cyanocitta_streaks_input.txt",header=TRUE) # Proportion of blue vs white frontal streaks

JayStreaks


## Blank out space in memory to hold mtDNA data analysis
if(length(apropos("^Cst$",ignore.case=FALSE)) == 0 ||
 !is.list(Cst) ) 
 
Cst <- list()
Cst$streaks <- list();
Cst$streaks$obs <- list(); #space to hold observed data
Cst$streaks$models <- list(); #space to hold models to fit
Cst$streaks$fitRs <- list(); #space to hold compiled fit requests
Cst$streaks$runs <- list(); #space to hold the output data chains
Cst$streaks$analysis <- list(); #space to hold the analysed data

## Add observed data into Cst list
Cst$streaks$obs <-hzar.doMolecularData1DPops(JayStreaks$Distance,JayStreaks$blue,JayStreaks$n)

## Plot graph to quickly visualize observed data
hzar.plot.obsData(Cst$streaks$obs)

## Creating helper function ##
Cst.loadstreaksmodel <- function(scaling,tails,
							 id=paste(scaling,tails,sep="."))
	Cst$streaks$models[[id]] <<-
	 hzar.makeCline1DFreq(Cst$streaks$obs, scaling, tails)


### Now we name the models with the scaling and tail informations
Cst.loadstreaksmodel("none","none","modelI"); ## Pmax Pmin 0 and 1; no exp tail 
Cst.loadstreaksmodel("fixed" ,"none","modelII"); ## Pmax Pmin fixed to obs mean; no exp tail 
Cst.loadstreaksmodel("free" ,"none","modelIII"); ## Pmax Pmin free to vary; no exp tail 

Cst.loadstreaksmodel("none","right","modelIV"); ## Pmax Pmin 0 and 1; right exp tail
Cst.loadstreaksmodel("fixed" ,"right","modelV"); ## Pmax Pmin fixed to obs mean; right exp tail
Cst.loadstreaksmodel("free" ,"right","modelVI"); ## Pmax Pmin free to vary; right exp tail

Cst.loadstreaksmodel("none","left","modelVII"); 
Cst.loadstreaksmodel("fixed" ,"left","modelVIII");
Cst.loadstreaksmodel("free" ,"left","modelIX");

Cst.loadstreaksmodel("none","mirror","modelX");
Cst.loadstreaksmodel("fixed" ,"mirror","modelXI"); 
Cst.loadstreaksmodel("free" ,"mirror","modelXII");

Cst.loadstreaksmodel("none","both","modelXIII"); ## Pmax Pmin 0 and 1; 2 tails w/ independent params
Cst.loadstreaksmodel("fixed" ,"both","modelXIV"); ## Pmax Pmin fixed to obs mean; 2 tails w/ independent params
Cst.loadstreaksmodel("free" ,"both","modelXV"); ## Pmax Pmin free to vary; 2 tails w/ independent params


## Check the default settings
print(Cst$streaks$models)
length(Cst$streaks$models)

## Modify models to focus on region where observed data were collected. ##
Cst$streaks$models <- sapply(Cst$streaks$models,hzar.model.addBoxReq,-50,1350, simplify=FALSE)

## Also, due to the large number of free variables, it is prudent to reduce the tune setting of modelXV from 1.5 to 1.2
# hzar.meta.tune(Cst$streaks$models$modelXV)<-1.2

## Check updated settings 
# Cst$streaks$models

## Compile each of the models to prepare for fitting
Cst$streaks$fitRs$init <- sapply(Cst$streaks$models,hzar.first.fitRequest.old.ML,obsData=Cst$streaks$obs,verbose=FALSE,simplify=FALSE)

### Set length of chain and burnin ###
burnin<-5e5 #5e5
chainLength<-1e6 #1e6


## Update settings for the fitter is desired
class(Cst$streaks$fitRs$init)
for(i in 1:length(Cst$streaks$fitRs$init)){
	Cst$streaks$fitRs$init[[i]]$mcmcParam$chainLength<-chainLength
	Cst$streaks$fitRs$init[[i]]$mcmcParam$burnin<-burnin
	Cst$streaks$fitRs$init[[i]]$mcmcParam$seed[[1]] <- mainSeed[[i]]
}
names(Cst$streaks$fitRs$init)

## Check fit request settings
print(Cst$streaks$fitRs$init)

## Start running models for an initial chain
Cst$streaks$runs$init <- list()

for(i in 1:length(Cst$streaks$fitRs$init)){
	Cst$streaks$runs$init[[i]]<-hzar.doFit(Cst$streaks$fitRs$init[[i]])
}
names(Cst$streaks$runs$init)<-names(Cst$streaks$fitRs$init)

## Plotting the trace
for(i in 1:length(Cst$streaks$runs$init)){
	plot(hzar.mcmc.bindLL(Cst$streaks$runs$init[[i]]))
}

names(Cst$streaks$runs$init)<-names(Cst$streaks$fitRs$init)

## Then combine a new set of fit requests using the initial chain
Cst$streaks$fitRs$chains <- lapply(Cst$streaks$runs$init,hzar.next.fitRequest)

## Replicate each fit request 3 times, keeping the original seeds while switching to a new seed channel
Cst$streaks$fitRs$chains <-hzar.multiFitRequest(Cst$streaks$fitRs$chains,each=3,baseSeed=NULL)

param_names<-lapply(Cst$streaks$fitRs$chains,function(x) names(x$modelParam$init))


## To be thorough, randomize the initial value for each fit.
for(i in 1:length(Cst$streaks$fitRs$chains)){
	Cst$streaks$fitRs$chains[[i]]$modelParam$init["center"] <- runif(1,-50,1350)
	Cst$streaks$fitRs$chains[[i]]$modelParam$init["width"] <- runif(1,0,1400)
}

## Provide initial starting values for other parameters
for(i in 1:length(grep("pMin",param_names))){
	Cst$streaks$fitRs$chains[[grep("pMin",param_names)[i]]]$modelParam$init["pMin"]<-runif(1,0,1)
}
for(i in 1:length(grep("pMax",param_names))){
	Cst$streaks$fitRs$chains[[grep("pMax",param_names)[i]]]$modelParam$init["pMax"]<-runif(1,0,1)
}
for(i in 1:length(grep("deltaL",param_names))){
	Cst$streaks$fitRs$chains[[grep("deltaL",param_names)[i]]]$modelParam$init["deltaL"]<-runif(1,10,60)
}
for(i in 1:length(grep("tauL",param_names))){
	Cst$streaks$fitRs$chains[[grep("tauL",param_names)[i]]]$modelParam$init["tauL"]<-runif(1,0,1)
}
for(i in 1:length(grep("deltaR",param_names))){
	Cst$streaks$fitRs$chains[[grep("deltaR",param_names)[i]]]$modelParam$init["deltaR"]<-runif(1,10,60)
}
for(i in 1:length(grep("tauR",param_names))){
	Cst$streaks$fitRs$chains[[grep("tauR",param_names)[i]]]$modelParam$init["tauR"]<-runif(1,0,1)
}
for(i in 1:length(grep("deltaM",param_names))){
	Cst$streaks$fitRs$chains[[grep("deltaM",param_names)[i]]]$modelParam$init["deltaM"]<-runif(1,10,60)
}
for(i in 1:length(grep("tauM",param_names))){
	Cst$streaks$fitRs$chains[[grep("tauM",param_names)[i]]]$modelParam$init["tauM"]<-runif(1,0,1)
}


### Run a chain of 3 runs for every fit request
Cst$streaks$runs$chains <- hzar.doChain.multi(Cst$streaks$fitRs$chains,doPar=TRUE,inOrder=FALSE,count=3)

### Look at convergence metrics ###
for(i in 1:(45/3)){
	print(summary(do.call(mcmc.list,lapply(Cst$streaks$runs$chains[((i-1)*3+1):((i-1)*3+3)],function(x) hzar.mcmc.bindLL(x[[3]])))))
}


plot(do.call(mcmc.list, lapply(Cst$streaks$runs$chains[14],function(x) hzar.mcmc.bindLL(x[[3]]) )) )


### All runs are convergent! Move on to data aggregation ###

## Create model data group (hzar.dataGroup object) for each model from initial runs
Cst$streaks$analysis$initDGs <- list(
nullModel = hzar.dataGroup.null(Cst$streaks$obs))

for(i in 1:15){
	Cst$streaks$analysis$initDGs[[i]]<-hzar.dataGroup.add(Cst$streaks$runs$init[[i]])
}

names(Cst$streaks$analysis$initDGs)<-names(Cst$streaks$fitRs$init)


### Check for 15 non-indentical models ###
for(i in 1:length(Cst$streaks$analysis$initDGs)){
	print(names(Cst$streaks$analysis$initDGs)[i])
	print(sapply(Cst$streaks$analysis$initDGs,function(x) hzar.sameModel(Cst$streaks$analysis$initDGs[[i]],x)))
}

### Create a hzar.obsDataGroup from the hzar.dataGroup we just created.
Cst$streaks$analysis$oDG <-hzar.make.obsDataGroup(Cst$streaks$analysis$initDGs)
?hzar.make.obsDataGroup


Cst$streaks$analysis$oDG <-hzar.copyModelLabels(Cst$streaks$analysis$initDGs,Cst$streaks$analysis$oDG)


## Convert all runs to hzar.dataGroup objects, adding them to the hzar.obsDataGroup object.
length(Cst$streaks$analysis$initDGs)
names(Cst$streaks$analysis$oDG[[1]])
length(Cst$streaks$analysis$oDG[[1]])

Cst$streaks$analysis$oDG <-hzar.make.obsDataGroup(lapply(Cst$streaks$runs$chains,hzar.dataGroup.add),Cst$streaks$analysis$oDG)
 
## Check the number of hzar.dataGroup objects in the hzar.obsDataGroup object
print(summary(Cst$streaks$analysis$oDG$data.groups))
 
## Compare the fitted clines graphically
hzar.plot.cline(Cst$streaks$analysis$oDG)

## Print out the model with the minimum AICc score
#str(Cst$streaks$analysis)
#print(Cst$streaks$analysis$model.name<-rownames(Cst$streaks$analysis$AICcTable)[[which.min(Cst$streaks$analysis$AICcTable$AICc)]])
print(Cst$streaks$analysis$AICcTable <- hzar.AICc.hzar.obsDataGroup(Cst$streaks$analysis$oDG))

## Print out model with minimum AICc score ##
print(Cst$streaks$analysis$model.name <-
        rownames(Cst$streaks$analysis$AICcTable
                 )[[which.min(Cst$streaks$analysis$AICcTable$AICc)]])

## Extract hzar.dataGroup object for selected model
Cst$streaks$analysis$model.selected<-
Cst$streaks$analysis$oDG$data.groups[[Cst$streaks$analysis$model.name]]

## Looking at variation in parameters for the selected model
print(hzar.getLLCutParam(Cst$streaks$analysis$model.selected,
names(Cst$streaks$analysis$model.selected$data.param)))

## Print and plot ML cline for selected model
print(hzar.get.ML.cline(Cst$streaks$analysis$model.selected));
hzar.plot.cline(Cst$streaks$analysis$model.selected);
hzar.plot.fzCline(Cst$streaks$analysis$model.selected)

hzar.plot.fzCline(Cst$streaks$analysis$model.selected, xlab="Distance (km)", ylab="Proportion of blue-streaked individuals")


### Save output so we do not have to rerun the analysis ###
save(Cst,file="../output/Cst_HZAR_streaks_output.Rdata")
load("../output/Cst_HZAR_streaks_output.Rdata")
Cst 
