# Dyer et al. Molecular Ecology 2018
# R code to run Hzar clines. 
# this code is based on https://github.com/rystanley/Collaborative_R_Stuff/blob/master/Mallory_HZARclines_outlier.R

## Load the package
library(hzar);


## A typical chain length
chainLength=1e5;                       

## Make each model run off a separate seed
mainSeed=
   list(A=c(596,528,124,978,544,99),
        B=c(528,124,978,544,99,596),
        C=c(124,978,544,99,596,528),
	    D=c(544,99,596,528,124,978),
	    E=c(978,544,99,596,528,124),
	    F=c(99,596,528,124,978,544),
	    G=c(168,123,843,761,378,111),
	    H=c(333,444,555,378,222,333),
	    I=c(378,222,333,444,555,666),
	    J=c(902,168,123,843,761,378),
	    K=c(100,200,300,400,500,600),
	    L=c(101,202,303,404,505,606),
	    M=c(110,220,330,440,550,660),
	    N=c(111,222,321,410,520,620),
	    O=c(105,205,305,405,506,605))


###  Treat each line as a population.  Lines from the same pop will have the same location. 

## Read in the data
subqMating <- read.table("subq_mating_data_cline.txt",header=TRUE)

## Print a summary of the trait data
print(subqMating)

## Set aside space in memory to hold morphological analysis.  List is called "subq"
if(length(apropos("^subq$",ignore.case=FALSE)) == 0 ||
   !is.list(subq) ) subq <- list()
subq$Mating <- list();
## Space to hold the observed data
subq$Mating$obs <- list();
## Space to hold the models to fit
subq$Mating$models <- list();
## Space to hold the compiled fit requests
subq$Mating$fitRs <- list();
## Space to hold the output data chains
subq$Mating$runs <- list();
## Space to hold the analysed data
subq$Mating$analysis <- list();


###### PICK ONE of these depending on which mating cline you are testing
###### Make sure only one is un-#'d
###### Just do one at a time and change which data are used.  

# option 1: use subq allo mating data

#subq$Mating$obs <-
#   hzar.doMolecularData1DPops(subqMating$distance_km,
#                              subqMating$allo.mean,
#                              subqMating$allo.N);

# option 2: use recens mating data

#subq$Mating$obs <-
#   hzar.doMolecularData1DPops(subqMating$distance_km,
#                              subqMating$rec.mean,
#                              subqMating$rec.N);



####  END OPTIONS

#look at the plot the frequency vs distance
 hzar.plot.obsData(subq$Mating$obs);


## Make a helper function
subq.loadMatingmodel <- function(scaling,tails,
                              id=paste(scaling,tails,sep="."))
subq$Mating$models[[id]] <<- hzar.makeCline1DFreq(subq$Mating$obs, scaling, tails)

subq.loadMatingmodel("fixed","none","model1");
subq.loadMatingmodel("fixed","both","model2");
subq.loadMatingmodel("fixed","mirror","model3");
subq.loadMatingmodel("free","none","model4");
subq.loadMatingmodel("free","both","model5");
subq.loadMatingmodel("free", "mirror","model6");
subq.loadMatingmodel("fixed", "left","model7");
subq.loadMatingmodel("fixed", "right","model8");
subq.loadMatingmodel("free", "left","model9");
subq.loadMatingmodel("free", "right","model10");
subq.loadMatingmodel("none", "none","model11");
subq.loadMatingmodel("none", "both","model12");
subq.loadMatingmodel("none", "right","model13");
subq.loadMatingmodel("none", "left","model14");
subq.loadMatingmodel("none", "mirror","model15");

print(subq$Mating$models)


## Modify all models to focus on the region where the observed
## data were collected.
#### --> changed from example file to match my data
## Observations were between 0 and 1100 km.
# Decided to not use this - do not need
#subq$all$models <- sapply(subq$all$models,
#                          hzar.model.addBoxReq,
#                          -30 , 1150,
#                          simplify=FALSE)

## Check the updated settings
#print(subq$Mating$models)


## Compile each of the models to prepare for fitting
subq$Mating$fitRs$init <- sapply(subq$Mating$models,
                         hzar.first.fitRequest.old.ML,
                         obsData=subq$Mating$obs,
                         verbose=FALSE,
                         simplify=FALSE)
                         
## Update the settings for the fitter if desired.
subq$Mating$fitRs$init$model1$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model1$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$Mating$fitRs$init$model2$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model2$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$Mating$fitRs$init$model3$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model3$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model4$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model4$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

  
subq$Mating$fitRs$init$model5$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model5$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

  
subq$Mating$fitRs$init$model6$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model6$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model7$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model7$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model8$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model8$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model9$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model9$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model10$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model10$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model11$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model11$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model12$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model12$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model13$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model13$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model14$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model14$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$Mating$fitRs$init$model15$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$Mating$fitRs$init$model15$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


## Check fit request settings
print(subq$Mating$fitRs$init)


#replicate each fit request 3 times, keeping the original seeds 
#while switching to a new seed channel
#18 total fit requests - 6 models, 3 times each
subq$Mating$fitRs$chains <- hzar.multiFitRequest(subq$Mating$fitRs$init,
                                                     each=3,
                                                    baseSeed=NULL)


##have 36 fit requests - models 6, each with 3 chain, 
# THIS STEP TAKES A LONG TIME
#running the chain 3 times - 36 total runs? - This step will take a while
subq$Mating$runs$doSeq <- lapply(subq$Mating$fitRs$chains,
                                     hzar.chain.doSeq, 
									 count = 3)

## Did model1 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[1:3],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Plot the trace if you want to:
# plot(hzar.mcmc.bindLL(subq$Mating$runs$init$model1))

## Did model2 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[4:6],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model3 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[7:9],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model4 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[10:12],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model5 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[13:15],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model6 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[16:18],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model7 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[19:21],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model8 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[22:24],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model9 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[25:27],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model10 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[28:30],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model11 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[31:33],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )
## Did model12 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[34:36],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model13 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[37:39],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model14 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[40:42],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model15 converge?
summary(do.call(mcmc.list,
                lapply(subq$Mating$runs$doSeq[43:45],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )



## Start aggregation of data for analysis

## Create a model data group for the null model (expected allele
## frequency independent of distance along cline) to include in the analysis
subq$Mating$analysis$initDGs <- list(nullModel = hzar.dataGroup.null(subq$Mating$obs))

## Create a model data group (hzar.dataGroup object) for each model from the initial runs
subq$Mating$analysis$initDGs$model1 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model1)
subq$Mating$analysis$initDGs$model2 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model2)
subq$Mating$analysis$initDGs$model3 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model3)
subq$Mating$analysis$initDGs$model4 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model4)
subq$Mating$analysis$initDGs$model5 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model5)
subq$Mating$analysis$initDGs$model6 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model6)
subq$Mating$analysis$initDGs$model7 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model7)
  subq$Mating$analysis$initDGs$model8 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model8)
  subq$Mating$analysis$initDGs$model9 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model9)
  subq$Mating$analysis$initDGs$model10 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model10)
  subq$Mating$analysis$initDGs$model11 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model11)
  subq$Mating$analysis$initDGs$model12 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model12)
  subq$Mating$analysis$initDGs$model13 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model13)
  subq$Mating$analysis$initDGs$model14 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model14)
  subq$Mating$analysis$initDGs$model15 <-
  hzar.dataGroup.add(subq$Mating$runs$doSeq$model15)
  
## Create a hzar.obsDataGroup object from the four hzar.dataGroup just created, copying the naming scheme 
subq$Mating$analysis$oDG <-
  hzar.make.obsDataGroup(subq$Mating$analysis$initDGs)
subq$Mating$analysis$oDG <-
    hzar.copyModelLabels(subq$Mating$analysis$initDGs,
                         subq$Mating$analysis$oDG)

## Convert all the runs to hzar.dataGroup objects, adding them to
## the hzar.obsDataGroup object.
subq$Mating$analysis$oDG <-
  hzar.make.obsDataGroup(lapply(subq$Mating$runs$doSeq,
                                hzar.dataGroup.add),
                         subq$Mating$analysis$oDG);

## Check to make sure that there are only 15 hzar.dataGroup
## objects in the hzar.obsDataGroup object.
print(summary(subq$Mating$analysis$oDG$data.groups))

## Compare the 15 cline models to the null model graphically
hzar.plot.cline(subq$Mating$analysis$oDG);

print(subq$Mating$analysis$AICcTable <-
      hzar.AICc.hzar.obsDataGroup(subq$Mating$analysis$oDG));

## Print out the model with the minimum AICc score
print(subq$Mating$analysis$model.name <-
  rownames(subq$Mating$analysis$AICcTable
           )[[ which.min(subq$Mating$analysis$AICcTable$AICc )]])

## Extract the hzar.dataGroup object for the selected model
subq$Mating$analysis$model.selected <-
  subq$Mating$analysis$oDG$data.groups[[subq$Mating$analysis$model.name]]


## Look at the variation in parameters for the selected model
print(hzar.getLLCutParam(subq$Mating$analysis$model.selected,
                         names(subq$Mating$analysis$model.selected$data.param)));

## Print the maximum likelihood cline for the selected model
print(hzar.get.ML.cline(subq$Mating$analysis$model.selected))

## Plot the maximum likelihood cline for the selected model
hzar.plot.cline(subq$Mating$analysis$model.selected);

## Plot the 95% credible cline region for the selected model
hzar.plot.fzCline(subq$Mating$analysis$model.selected);

## End Molecular Analysis

dev.off()

