# Dyer et al. Molecular Ecology 2018
# R code to run Hzar clines. 
# this code is based on https://github.com/rystanley/Collaborative_R_Stuff/blob/master/Mallory_HZARclines_outlier.R


#change working directory!


library(hzar)

chainLength=1e5;     
#make each model run off a separate seed
mainSeed=
   list(A=c(596,528,124,978,544,99),
        B=c(528,124,978,544,99,596),
        C=c(124,978,544,99,596,528),
	    D=c(544,99,596,528,124,978),
	    E=c(978,544,99,596,528,124),
	    F=c(99,596,528,124,978,544),
	    G=c(168,123,843,761,378,111),
	    H=c(333,444,555,378,222,333),
	    I=c(378,222,333,444,555,666),
	    J=c(902,168,123,843,761,378),
	    K=c(100,200,300,400,500,600),
	    L=c(101,202,303,404,505,606),
	    M=c(110,220,330,440,550,660),
	    N=c(111,222,321,410,520,620),
	    O=c(105,205,305,405,506,605))

#read in data
#file should be tab-delimited
subqMolecular <- read.table("subq_genetic_data_cline.txt",header=TRUE)

#make sure data look good
subqMolecular

#set aside space in memory to hold analyses. list is called "subq"
if(length(apropos("^subq$",ignore.case=FALSE)) == 0 ||
    !is.list(subq) ) subq <- list()
subq$all <- list();
subq$all$obs <- list();
subq$all$models <- list();
subq$all$fitRs <- list();
subq$all$runs <- list();
subq$all$analysis <- list();

###### PICK ONE of these depending on which cline you are inferring
###### Make sure only one is un-#'d
###### Just do one at a time and change which data are used.  

# option 1: use all loci from subq data
subq$all$obs <-
   hzar.doMolecularData1DPops(subqMolecular$distance_km,
                              subqMolecular$all.1,
                              subqMolecular$all.N);
  
# option 2: make cline from fraction subq in each population
#subq$all$obs <-
#   hzar.doMolecularData1DPops(subqMolecular$distance_km,
#                              subqMolecular$FracRec.1,
#                              subqMolecular$FracRec.N);
 
  
# option 3: use all loci from subq/recens combined data i.e. admixture
#subq$all$obs <-
#   hzar.doMolecularData1DPops(subqMolecular$distance_km,
#                              subqMolecular$subq.1,
#                             subqMolecular$subq.N);

 
###### END options.
 
#look at the plot the frequency vs distance
 hzar.plot.obsData(subq$all$obs);


##MODELS

## Make a helper function
subq.loadallmodel <- function(scaling,tails,
                              id=paste(scaling,tails,sep="."))
subq$all$models[[id]] <<- hzar.makeCline1DFreq(subq$all$obs, scaling, tails)

subq.loadallmodel("fixed","none","model1");
subq.loadallmodel("fixed","both","model2");
subq.loadallmodel("fixed","mirror","model3");
subq.loadallmodel("free","none","model4");
subq.loadallmodel("free","both","model5");
subq.loadallmodel("free", "mirror","model6");
subq.loadallmodel("fixed", "left","model7");
subq.loadallmodel("fixed", "right","model8");
subq.loadallmodel("free", "left","model9");
subq.loadallmodel("free", "right","model10");
subq.loadallmodel("none", "none","model11");
subq.loadallmodel("none", "both","model12");
subq.loadallmodel("none", "right","model13");
subq.loadallmodel("none", "left","model14");
subq.loadallmodel("none", "mirror","model15");

#####This would be another way to identify models
#outlier$"11_6A"$models$model1 <- hzar.makeCline1DFreq(data=outlier$"11_6A"$obs, 
#                                               scaling="fixed", tails="none")
#outlier$"11_6A"$models$model2 <- hzar.makeCline1DFreq(data=outlier$"11_6A"$obs, 
#                                                      scaling="fixed", tails="both")
#outlier$"11_6A"$models$model3 <- hzar.makeCline1DFreq(data=outlier$"11_6A"$obs, 
#                                                      scaling="free", tails="none")
#outlier$"11_6A"$models$model4 <- hzar.makeCline1DFreq(data=outlier$"11_6A"$obs, 
#                                                      scaling="free", tails="both")


## Check the default settings
print(subq$all$models)


## Check the updated settings
#print(subq$all$models)


## Compile each of the models to prepare for fitting
subq$all$fitRs$init <- sapply(subq$all$models,
                         hzar.first.fitRequest.old.ML,
                         obsData=subq$all$obs,
                         verbose=FALSE,
                         simplify=FALSE)
                         
## Update the settings for the fitter if desired.
subq$all$fitRs$init$model1$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model1$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$all$fitRs$init$model2$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model2$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$all$fitRs$init$model3$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model3$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model4$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model4$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

  
subq$all$fitRs$init$model5$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model5$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

  
subq$all$fitRs$init$model6$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model6$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model7$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model7$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$all$fitRs$init$model8$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model8$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$all$fitRs$init$model9$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model9$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


subq$all$fitRs$init$model10$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model10$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model11$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model11$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model12$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model12$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model13$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model13$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model14$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model14$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4

subq$all$fitRs$init$model15$mcmcParam$chainLength <-
  chainLength;                          #1e5
subq$all$fitRs$init$model15$mcmcParam$burnin <-
  chainLength %/% 10;                   #1e4


## Check fit request settings
print(subq$all$fitRs$init)


#replicate each fit request 3 times, keeping the original seeds 
#while switching to a new seed channel
#30 total fit requests - 10 models, 3 times each
subq$all$fitRs$chains <- hzar.multiFitRequest(subq$all$fitRs$init,
                                                     each=3,
                                                    baseSeed=NULL)

## THIS IS THE SLOW STEP
##have 30 fit requests - models 15, each with 3 chain, 
#running the chain 3 times - 90 total runs? - This step will take a while
subq$all$runs$doSeq <- lapply(subq$all$fitRs$chains,
                                     hzar.chain.doSeq, 
									 count = 3)

## Did model1 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[1:3],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## If you want to plot the trace: 
# plot(hzar.mcmc.bindLL(subq$all$runs$init$model1))

## Did model2 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[4:6],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model3 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[7:9],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model4 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[10:12],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model5 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[13:15],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model6 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[16:18],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model7 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[19:21],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )


## Did model8 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[22:24],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model9 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[25:27],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model10 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[28:30],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model11 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[31:33],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model12 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[34:36],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model13 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[37:39],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model14 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[40:42],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )

## Did model15 converge?
summary(do.call(mcmc.list,
                lapply(subq$all$runs$doSeq[43:45],
                       function(x) hzar.mcmc.bindLL(x[[3]]) )) )



## Start aggregation of data for analysis

## Create a model data group for the null model (expected allele
## frequency independent of distance along cline) to include in
## analysis.
subq$all$analysis$initDGs <- list(nullModel = hzar.dataGroup.null(subq$all$obs))

## Create a model data group (hzar.dataGroup object) for each model from the initial runs
subq$all$analysis$initDGs$model1 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model1)
subq$all$analysis$initDGs$model2 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model2)
subq$all$analysis$initDGs$model3 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model3)
subq$all$analysis$initDGs$model4 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model4)
subq$all$analysis$initDGs$model5 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model5)
subq$all$analysis$initDGs$model6 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model6)
subq$all$analysis$initDGs$model7 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model7)
subq$all$analysis$initDGs$model8 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model8)
subq$all$analysis$initDGs$model9 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model9)
subq$all$analysis$initDGs$model10 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model10)
subq$all$analysis$initDGs$model11 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model11)
subq$all$analysis$initDGs$model12 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model12)
subq$all$analysis$initDGs$model13 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model13)
subq$all$analysis$initDGs$model14 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model14)
subq$all$analysis$initDGs$model15 <-
  hzar.dataGroup.add(subq$all$runs$doSeq$model15)



## Create a hzar.obsDataGroup object from the four hzar.dataGroup just created, copying the naming scheme 
subq$all$analysis$oDG <-
  hzar.make.obsDataGroup(subq$all$analysis$initDGs)
subq$all$analysis$oDG <-
    hzar.copyModelLabels(subq$all$analysis$initDGs,
                         subq$all$analysis$oDG)

## Convert all the runs to hzar.dataGroup objects, adding them to
## the hzar.obsDataGroup object.
subq$all$analysis$oDG <-
  hzar.make.obsDataGroup(lapply(subq$all$runs$doSeq,
                                hzar.dataGroup.add),
                         subq$all$analysis$oDG);

## Check to make sure that there are only ELEVEN hzar.dataGroup
## objects in the hzar.obsDataGroup object.
print(summary(subq$all$analysis$oDG$data.groups))

## Compare the 10 cline models to the null model graphically
hzar.plot.cline(subq$all$analysis$oDG);

print(subq$all$analysis$AICcTable <-
      hzar.AICc.hzar.obsDataGroup(subq$all$analysis$oDG));

## Print out the model with the minimum AICc score
print(subq$all$analysis$model.name <-
  rownames(subq$all$analysis$AICcTable
           )[[ which.min(subq$all$analysis$AICcTable$AICc )]])

## Extract the hzar.dataGroup object for the selected model
subq$all$analysis$model.selected <-
  subq$all$analysis$oDG$data.groups[[subq$all$analysis$model.name]]


## Look at the variation in parameters for the selected model
print(hzar.getLLCutParam(subq$all$analysis$model.selected,
                         names(subq$all$analysis$model.selected$data.param)));

## Print the maximum likelihood cline for the selected model
print(hzar.get.ML.cline(subq$all$analysis$model.selected))

## Plot the maximum likelihood cline for the selected model
hzar.plot.cline(subq$all$analysis$model.selected);

## Plot the 95% credible cline region for the selected model
hzar.plot.fzCline(subq$all$analysis$model.selected);

## End Molecular Analysis

dev.off()

