### Script for analysis of simulated tree from:
###     Phylogenetic inference of reciprocal effects between geographic range 
###       evolution and diversification
###     Emma E. Goldberg, Lesley T. Lancaster, and Richard H. Ree
###     Systematic Biology

library(diversitreeGSE)
# This script uses diversitreeGSE_0.4-5, which consists of the GeoSSE
# functionality added into version 0.4-5 of Rich FitzJohn's R package
# diversitree.  It is available as supplementary material associated with the
# above paper.
#
# As of January 2011, the version of diversitree on CRAN contains GeoSSE
# natively.  If using this newer version, all references below to "gse2" should
# be replaced with "geosse".  Some other small changes may be required, too.

#--------------------------------------------------
# Function for reading in trees produced by the SimTreeSDD simulator, which is
# available as supplementary material associated with the above paper.
#--------------------------------------------------
read.ttn <- function(treefile, nodes=TRUE, check=TRUE)
{
    treestr <- readLines(treefile, n=1)
    tree <- read.tree(text=treestr)

    all.states <- read.table(treefile, skip=1)

    if (nodes)
    {
        ntips <- (length(all.states[,1]) + 1) / 2
        states <- all.states[,2]
        names(states) <- all.states[,1]
        tree$node.label <- names(states[ntips+1:length(states)])
    } else
    {
        ntips <- length(all.states[,1])
        states <- all.states[1:ntips,2]
        names(states) <- all.states[1:ntips,1]
    }

    if (check)
    {
        ntips2 <- length(tree$tip.label)
        if (ntips != ntips2)
        {
            warning(paste("number of states (", ntips, 
                          ") is not equal to       number of tips (", 
                          ntips2, ")", sep=""), immediate.=T)
        }
        print(paste("read tree with", ntips2, "tips"))

        i <- which(!(names(states[1:ntips]) %in% tree$tip.label))
        if (length(i) != 0)
        {
            warning("some tip and state labels do not match", immediate.=T)
            print(names(states)[i])
            j <- which(!(tree$tip.label %in% names(states)))
            print(tree$tip.label[j])
        }
    }

    return(list(tree = tree, states = states))
}

# Here are the names of the files containing the simulated trees to be fit.
# The example below is for the source-sink scenario (batch 8 in Table 1).
treefiles <- list.files("batch8", pattern="^tree.*\\.ttn$", full.names=T)

#--------------------------------------------------
# Maximum likelihood
#--------------------------------------------------

# Adjust these to reflect the model being fit.
# The example here is for no between-region speciation (sAB = 0).
par.start <- rep(1.5, 6)
names(par.start) <- c("sA", "sB", "xA", "xB", "dA", "dB")

# the file in which results will be recorded
outfile <- "batch8_nosAB_equal-s.dat"
cat(c("tree", "ntips", "nAB", "nA", "nB", "lnL"), names(par.start),
      "converge\n", sep="\t", file=outfile, append=F)

for (treefile in treefiles)
{
    treeinfo <- read.ttn(treefile, nodes=FALSE, check=FALSE)
    treeinfo$states <- treeinfo$states[1:Ntip(treeinfo$tree)]
    lnL.7par <- make.gse2(treeinfo$tree, treeinfo$states)

    # Adjust this to reflect the model being fit:
    lnL <- constrain(lnL.7par, sAB ~ 0)

    ans <- find.mle(lnL, par.start, method="subplex")
    all.pars <- ans$par

    cat(treefile, length(treeinfo$states), sep="\t", file=outfile, append=T)
    cat("", sum(treeinfo$states == 0, na.rm=T), 
              sum(treeinfo$states == 1, na.rm=T), 
              sum(treeinfo$states == 2, na.rm=T), 
        sep="\t", file=outfile, append=T)
    cat("", ans$lnLik, all.pars, ans$convergence, "\n", sep="\t",
        file=outfile, append=T)
}


#--------------------------------------------------
# Markov chain Monte Carlo
#--------------------------------------------------
# Further MCMC explanation is in chaparral-analysis.R.

# Adjust these to reflect the model being fit.
# The example here is for the full, 7 parameter model.
par.start <- rep(1.5, 7)
names(par.start) <- c("sA", "sB", "sAB", "xA", "xB", "dA", "dB")

r <- 0.5
upper <- rep(5, 7)
w <- rep(1, 7)
nsteps <- 5000

dir.create("batch8-mcmc")

for (treefile in treefiles)
{
    treeinfo <- read.ttn(treefile, nodes=FALSE, check=FALSE)
    treeinfo$states <- treeinfo$states[1:Ntip(treeinfo$tree)]

    # adjust this to reflect the model being fit:
    lnL <- make.gse2(treeinfo$tree, treeinfo$states)

    mcmc.ans <- mcmc(lnL, par.start, nsteps=nsteps, w=w, lower=rep(0,7), 
            upper=upper, prior=make.prior.exponential(r), fail.value=-Inf)

    # adjust output file here:
    temp <- substr(treefile, start=7, stop=(nchar(treefile)-3))
    outfile <- paste("batch8-mcmc/", temp, "dat", sep="")

    cat("i", names(par.start), "p\n", file=outfile, append=F) 
    write.table(mcmc.ans, file=outfile, quote=F, row.names=F, append=T, 
                col.names=F)
}
