setwd("C:/Users/tjgal/Desktop/vern") #install.packages("rexpokit") #install.packages("cladoRcpp") require(rexpokit) require#install.packages("rexpokit") #install.packages("cladoRcpp") require(rexpokit) require(cladoRcpp) library(devtools) devtools::install_github(repo="nmatzke/BioGeoBEARS") require(BioGeoBEARS) options(max.print=1000000) library(optimx) library(FD) library(snow) library(parallel) library(phangorn) calc_loglike_sp = compiler::cmpfun(calc_loglike_sp_prebyte) # crucial to fix bug in uppass calculations calc_independent_likelihoods_on_each_branch = compiler::cmpfun(calc_independent_likelihoods_on_each_branch_prebyte) # slight speedup hopefully require(ape) require(geiger) require(phytools) library(optimx) #Set Working Directory Distributions <- read.csv(file="vern_Distributions_ready.csv",row.names = 1, head=TRUE,sep=",") checkdist <- read.csv(file="vern_Distributions_ready.csv",row.names = 1, head=TRUE,sep=",") ##Import tree and check Vernonieae_Tree <- read.tree("Final_tree.tre") Vernonieae_Tree$edge.length is.ultrametric(Vernonieae_Tree) is.binary.tree(Vernonieae_Tree) is.rooted(Vernonieae_Tree) Vernonieae_Tree <- ladderize(Vernonieae_Tree,right = FALSE) plot(Vernonieae_Tree,type="fan",cex = 0.2) nodelabels(frame="none",cex = 0.4,col = "red") Vernonieae_Tree <- drop.tip(Vernonieae_Tree,c("Polydora_angustifolia","Vernonia_brachycalyx")) ##Extract the clade of interest Vernonieae_Tree_Extract <- extract.clade(phy = Vernonieae_Tree, node = mrca(Vernonieae_Tree)["Distephanus_ambongensis", "Vernonanthura_chamaedrys"]) ##Check that tree matches dataframe remove unneeded data from dataset check_These <- name.check(Vernonieae_Tree_Extract, checkdist, data.names=NULL) check_These$tree_not_data check_These$data_not_tree vern_Distributions_ready <- Distributions[!row.names(Distributions) %in% check_These$data_not_tree, ] vern_Distributions_ready <- Distributions name.check(Vernonieae_Tree_Extract, vern_Distributions_ready, data.names=NULL) plot(Vernonieae_Tree_Extract,type="fan",cex = 0.2) nodelabels(frame="none",cex = 0.4,col = "red") write.table(vern_Distributions_ready,"vern_Distributions_ready_check_ME.txt") ## NODENUMBER <- 361 tip_taxa <- Descendants(Vernonieae_Tree_Extract, NODENUMBER, type ="tips") tip_taxa <- unlist(tip_taxa, recursive = TRUE, use.names = TRUE) sort(Vernonieae_Tree_Extract$tip.label[tip_taxa],decreasing = FALSE) tip_taxa <- c("1:361") tip_taxa2 <- c("1","2","3") tip_taxa3 <- toString(tip_taxa) #Vernonieae_Tree_Extract <- drop.tip(phy = Vernonieae_Tree_Extract,c("Cullumia_patula","Polydora_angustifolia",Gazania_ciliaris","Gazania_sp","Gorteria_corymbosa","Hirpicium_armerioides")) ##Tree surgery - until we have calibrated tree #max(Vernonieae_Tree_Extract$edge.length) #Vernonieae_Tree_Extract$edge.length <- Vernonieae_Tree_Extract$edge.length*400 #Biogeobears requires files not objects as input. Save files from tree and data objects write.csv(vern_Distributions_ready,"vern_Distributions_ready.csv") toprow <- NULL toprow <- rbind.data.frame(c(nrow(vern_Distributions_ready),ncol(vern_Distributions_ready),"(A B C D E F G H I J)")) toprow ## Remember to reformat the input data file in phyllip format - manually for now until I finish the coding Regions_10 <- c("Asia", "Tropical_Africa", "No_Ea_Africa", "Australia", "Hawaii", "Andean", "Other_S_America", "N_America", "Meso_America", "Caribbean") plot(Vernonieae_Tree_Extract) write.tree(Vernonieae_Tree_Extract, "Vernonieae_Tree_Extract") #Another check to make sure number of samples are the same str(vern_Distributions_ready) Vernonieae_Tree_Extract plot(Vernonieae_Tree_Extract) Vernonieae_Tree_Extract$edge tree <- read.tree ("Vernonieae_Tree_Extract") #tree <- drop.tip(tree, "Vernonia_acaulis") write.tree(tree, "Vernonieae_Tree_Extract") #Prepare Biogeobears run library(phylotools) trfn = ("Vernonieae_Tree_Extract") geogfn = ("vern_Distributions_ready3.txt") moref(geogfn) tipranges = getranges_from_LagrangePHYLIP(lgdata_fn=geogfn) tipranges max(rowSums(dfnums_to_numeric(tipranges@df))) max_range_size = 4 numstates_from_numareas(numareas=9, maxareas=max_range_size, include_null_range=TRUE) ##Set up analysis analyses ####### BioGeoBEARS_run_object <- NULL BioGeoBEARS_run_object = define_BioGeoBEARS_run() BioGeoBEARS_run_object$trfn = trfn BioGeoBEARS_run_object$geogfn = geogfn BioGeoBEARS_run_object$max_range_size = max_range_size BioGeoBEARS_run_object$min_branchlength = 0.000001 BioGeoBEARS_run_object$include_null_range = TRUE # # set to FALSE for e.g. DEC* model, DEC*+J, etc. BioGeoBEARS_run_object$timesfn = "timeperiods.txt" # Uncomment this for a time stratified analysis and modify this file with the time periods. Oldest time period mus t be older than root. #BioGeoBEARS_run_object$dispersal_multipliers_fn = "manual_dispersal_multipliers.txt" #BioGeoBEARS_run_object$areas_allowed_fn = "areas_allowed.txt" #BioGeoBEARS_run_object$areas_adjacency_fn = "areas_adjacency.txt" BioGeoBEARS_run_object$distsfn = "distances_matrix_scaled.txt" BioGeoBEARS_run_object$on_NaN_error = -1e50 BioGeoBEARS_run_object$speedup = FALSE # shorcuts to speed ML search; use FALSE if worried (e.g. >3 params) BioGeoBEARS_run_object$use_optimx = "GenSA" # if FALSE, use optim() instead of optimx() BioGeoBEARS_run_object$num_cores_to_use = 6 BioGeoBEARS_run_object$force_sparse = FALSE BioGeoBEARS_run_object = readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) BioGeoBEARS_run_object$return_condlikes_table = TRUE BioGeoBEARS_run_object$calc_TTL_loglike_from_condlikes_table = TRUE BioGeoBEARS_run_object$calc_ancprobs = TRUE #Fix Root BioGeoBEARS_run_object$fixnode = c(369) BioGeoBEARS_run_object$fixlikes = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0) ####### Remove non-adjacent areas # Let's remove some non-adjacent ranges # STRATUM 1 (youngest) nonadjacent1=nonadjacent2=nonadjacent3=nonadjacent4=c("ABC", "ABCD", "ABCE", "ABCF", "ABCG", "ABCH", "ABCI", "ABD", "ABDE", "ABDF", "ABDG", "ABDH", "ABDI", "ABE", "ABEF", "ABEG", "ABEH", "ABEI", "ABF", "ABFG", "ABFH", "ABFI", "ABG","ABGH","ABGI","ABH","ABHI","ABI", "ACD", "ACDE", "ACDF","ACDG", "ACDH", "ACDI", "ACEF", "ACEG", "ACEH", "ACEI", "ACFG", "ACFH", "ACFI", "ACGH", "ACGI", "ACHI", "AD", "ADEF", "ADEG", "ADEH", "ADEI", "ADFG", "ADFH", "ADFI", "ADG", "ADGH","ADGI", "ADH", "ADHI", "ADI", "AE", "AEF", "AEFG", "AEFH", "AEFI", "AEG", "AEGH", "AEGI", "AEH", "AEHI", "AEI", "AF", "AFG", "AFGH", "AFGI", "AFH", "AFHI", "AFI", "AG", "AGH", "AGHI", "AGI", "AH", "AHI", "AI", "BC", "BCD", "BCDE", "BCDF", "BCDG", "BCDH", "BCDI", "BCE", "BCEF", "BCEG", "BCEH", "BCEI", "BCFG", "BCFH", "BCFI", "BCH","BCHI","BD", "BDEF", "BDEG", "BDEH", "BDEI", "BDF", "BDFG", "BDFH", "BDFI", "BDGH", "BDGI", "BDH", "BDHI", "BDI", "BE", "BEFG", "BEG", "BEGH", "BEH", "BFG", "BFH", "BFI", "BG", "BGH","BGI", "BH", "BHI", "CD", "CDE", "CDEF", "CDEG", "CDEH", "CDEI", "CDF", "CDFG","CDFH","CDFI","CDFH", "CDFI", "CDG", "CDGH", "CDGI", "CDH", "CDHI", "CDI", "CE", "CEF", "CEFG", "CEFH", "CEFI", "CEG", "CEGH", "CEGI", "CEH", "CEHI", "CEI", "CF", "CFG", "CFGH", "CFGI", "CFH", "CFHI", "CFI", "CG", "CGH", "CGHI", "CGI", "CH", "CHI", "CI", "DE", "DEF", "DEFG", "DEFH", "DEFI", "DEG", "DEGH", "DEGI", "DEH", "DEHI", "DEI", "DF", "DFG", "DFGH", "DFGI", "DFH", "DFHI", "DFI", "DG","DGH", "DGHI", "DGI", "DH", "DHI", "DI", "EFG","EFGH","EFGI","EG", "FG", "FGH","FGI","FH","FHI") #Cant exclude this one "EFH", sort(unique(nonadjacent1)) keepTF1 = ranges_list %in% nonadjacent1 == FALSE ranges_list_NEW1 = ranges_list[keepTF1] length(ranges_list_NEW1) # now 148 # STRATUM 2 keepTF2 = ranges_list %in% nonadjacent2 == FALSE ranges_list_NEW2 = ranges_list[keepTF2] length(ranges_list_NEW2) # STRATUM 3 keepTF3 = ranges_list %in% nonadjacent3 == FALSE ranges_list_NEW3 = ranges_list[keepTF3] length(ranges_list_NEW3) # STRATUM 4 (oldest) keepTF4 = ranges_list %in% nonadjacent4 == FALSE ranges_list_NEW4 = ranges_list[keepTF4] length(ranges_list_NEW4) # Make the stratum-specific states list states_list_0based_NEW1 = states_list_0based[keepTF1] states_list_0based_NEW2 = states_list_0based[keepTF2] states_list_0based_NEW3 = states_list_0based[keepTF3] states_list_0based_NEW4 = states_list_0based[keepTF4] # INPUT the NEW states list into the BioGeoBEARS_run_object, STRATIFIED BioGeoBEARS_run_object$lists_of_states_lists_0based[[1]] = states_list_0based_NEW1 BioGeoBEARS_run_object$lists_of_states_lists_0based[[2]] = states_list_0based_NEW2 BioGeoBEARS_run_object$lists_of_states_lists_0based[[3]] = states_list_0based_NEW3 BioGeoBEARS_run_object$lists_of_states_lists_0based[[4]] = states_list_0based_NEW4 ####################################################### # END MANUAL MODIFICATION OF THE STATES LIST ####################################################### BioGeoBEARS_run_object$rescale_params = TRUE BioGeoBEARS_run_object = section_the_tree(inputs=BioGeoBEARS_run_object, make_master_table=TRUE, plot_pieces=FALSE) check_BioGeoBEARS_run(BioGeoBEARS_run_object) # For a slow analysis, run once, then set runslow=FALSE to just load the saved result. ####DEC#### runslow = TRUE resfn = "DEC_All_areas_allowed.Rdata" if (runslow) { res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDEC = res } else { # Loads to "res" load(resfn) resDEC = res } ##### DEC +J ### # Set up DEC+J model # Get the ML parameter values from the 2-parameter nested model # (this will ensure that the 3-parameter model always does at least as good) dstart = resDEC$outputs@params_table["d","est"] estart = resDEC$outputs@params_table["e","est"] jstart = 0.0001 # Input starting values for d, e BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","est"] = estart # Add j as a free parameter BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "DEC+J_unconstrained_v2.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDECj = res } else { # Loads to "res" load(resfn) resDECj = res } ##Set up analysis analyses ####### ##### DEC +x ### # Fix J BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.0 # Add X speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" #BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","init"] = xstart #BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","est"] = xstart check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "DEC+X.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDECx = res } else { # Loads to "res" load(resfn) resDECx = res } ##### DEC +j +x ### jstart = resDECxjadj$outputs@params_table["j","est"] xstart = resDECxjadj$outputs@params_table["x","est"] dstart = resDECxjadj$outputs@params_table["d","est"] estart = resDECxjadj$outputs@params_table["e","est"] # Input starting values for d, e BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","est"] = estart # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart # Add X speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","init"] = xstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","est"] = xstart BioGeoBEARS_run_object$rescale_params = TRUE BioGeoBEARS_run_object = section_the_tree(inputs=BioGeoBEARS_run_object, make_master_table=TRUE, plot_pieces=FALSE) check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "DEC_JX_adjacency_Freed.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) DECJXadjacency_Root = res } else { # Loads to "res" load(resfn) DECJXadjacency_Root = res } DECJXadjacency_b = DECJXadjacency_Root ####################################################### # Run DIVALIKE ####################################################### jstart = resDECxjfreenoadjacent$outputs@params_table["j","est"] xstart = resDECxjfreenoadjacent$outputs@params_table["x","est"] dstart = resDECxjfreenoadjacent$outputs@params_table["d","est"] estart = resDECxjfreenoadjacent$outputs@params_table["e","est"] # Set up DIVALIKE model ##Set up analysis analyses ####### BioGeoBEARS_run_object <- NULL BioGeoBEARS_run_object = define_BioGeoBEARS_run() BioGeoBEARS_run_object$trfn = trfn BioGeoBEARS_run_object$geogfn = geogfn BioGeoBEARS_run_object$max_range_size = max_range_size BioGeoBEARS_run_object$min_branchlength = 0.000001 BioGeoBEARS_run_object$include_null_range = TRUE # # set to FALSE for e.g. DEC* model, DEC*+J, etc. BioGeoBEARS_run_object$timesfn = "timeperiods.txt" # Uncomment this for a time stratified analysis and modify this file with the time periods. Oldest time period mus t be older than root. #BioGeoBEARS_run_object$dispersal_multipliers_fn = "manual_dispersal_multipliers.txt" #BioGeoBEARS_run_object$areas_allowed_fn = "areas_allowed.txt" #BioGeoBEARS_run_object$areas_adjacency_fn = "areas_adjacency.txt" BioGeoBEARS_run_object$distsfn = "distances_matrix_scaled.txt" BioGeoBEARS_run_object$on_NaN_error = -1e50 BioGeoBEARS_run_object$speedup = FALSE # shorcuts to speed ML search; use FALSE if worried (e.g. >3 params) BioGeoBEARS_run_object$use_optimx = "GenSA" # if FALSE, use optim() instead of optimx() BioGeoBEARS_run_object$num_cores_to_use = 6 BioGeoBEARS_run_object$force_sparse = FALSE BioGeoBEARS_run_object = readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) BioGeoBEARS_run_object$return_condlikes_table = TRUE BioGeoBEARS_run_object$calc_TTL_loglike_from_condlikes_table = TRUE BioGeoBEARS_run_object$calc_ancprobs = TRUE # Remove subset-sympatry BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ysv","type"] = "2-j" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/2" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["y","type"] = "ysv*1/2" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "ysv*1/2" # Allow classic, widespread vicariance; all events equiprobable BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","init"] = 0.5 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","est"] = 0.5 # No jump dispersal/founder-event speciation # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.01 # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.01 # Make the stratum-specific states list states_list_0based_NEW1 = states_list_0based[keepTF1] states_list_0based_NEW2 = states_list_0based[keepTF2] states_list_0based_NEW3 = states_list_0based[keepTF3] states_list_0based_NEW4 = states_list_0based[keepTF4] # INPUT the NEW states list into the BioGeoBEARS_run_object, STRATIFIED BioGeoBEARS_run_object$lists_of_states_lists_0based[[1]] = states_list_0based_NEW1 BioGeoBEARS_run_object$lists_of_states_lists_0based[[2]] = states_list_0based_NEW2 BioGeoBEARS_run_object$lists_of_states_lists_0based[[3]] = states_list_0based_NEW3 BioGeoBEARS_run_object$lists_of_states_lists_0based[[4]] = states_list_0based_NEW4 ####################################################### # END MANUAL MODIFICATION OF THE STATES LIST ####################################################### BioGeoBEARS_run_object$rescale_params = TRUE BioGeoBEARS_run_object = section_the_tree(inputs=BioGeoBEARS_run_object, make_master_table=TRUE, plot_pieces=FALSE) check_BioGeoBEARS_run(BioGeoBEARS_run_object) runslow = TRUE resfn = "DIVALIKE_M0_unconstrained_v1.Rdata" if (runslow) { res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDIVALIKE = res } else { # Loads to "res" load(resfn) resDIVALIKE = res } # Set up DIVALIKE+J model # Get the ML parameter values from the 2-parameter nested model # (this will ensure that the 3-parameter model always does at least as good) dstart = resDIVALIKE$outputs@params_table["d","est"] estart = resDIVALIKE$outputs@params_table["e","est"] jstart = 0.0001 # Input starting values for d, e BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","est"] = estart # Remove subset-sympatry BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ysv","type"] = "2-j" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/2" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["y","type"] = "ysv*1/2" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "ysv*1/2" # Allow classic, widespread vicariance; all events equiprobable BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","init"] = 0.5 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","est"] = 0.5 # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart # Under DIVALIKE+J, the max of "j" should be 2, not 3 (as is default in DEC+J) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","min"] = 0.00001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","max"] = 1.99999 check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "DIVALIKE+J_M0_unconstrained_v1.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDIVALIKEj = res } else { # Loads to "res" load(resfn) resDIVALIKEj = res } ###### DIVALIKE + X dstart = resDIVALIKE$outputs@params_table["d","est"] estart = resDIVALIKE$outputs@params_table["e","est"] # Input starting values for d, e BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","est"] = estart # Remove subset-sympatry BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ysv","type"] = "2-j" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/2" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["y","type"] = "ysv*1/2" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "ysv*1/2" # Allow classic, widespread vicariance; all events equiprobable BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","init"] = 0.5 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01v","est"] = 0.5 # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.0 # Under DIVALIKE+J, the max of "j" should be 2, not 3 (as is default in DEC+J) # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","min"] = 0.00001 # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","max"] = 1.99999 # Add X speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","init"] = xstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","est"] = xstart check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "DIVALIKE+x_M0_unconstrained_v1.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDIVALIKEx = res } else { # Loads to "res" load(resfn) resDIVALIKEx = res } ####### DIVAlike +J+x jstart = resDIVALIKEj$outputs@params_table["j","est"] xstart = resDIVALIKEx$outputs@params_table["x","est"] dstart = resDIVALIKEx$outputs@params_table["d","est"] estart = resDIVALIKEx$outputs@params_table["e","est"] # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart #Under DIVALIKE+J, the max of "j" should be 2, not 3 (as is default in DEC+J) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","min"] = 0.00001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","max"] = 1.99999 # Add X speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","init"] = xstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","est"] = xstart check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "DIVALIKE+x+j_M0_unconstrained_v1.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDIVALIKExj = res } else { # Loads to "res" load(resfn) resDIVALIKExj = res } ####################################################### # Run BAYAREALIKE ####################################################### ##Set up analysis analyses ####### BioGeoBEARS_run_object <- NULL BioGeoBEARS_run_object = define_BioGeoBEARS_run() BioGeoBEARS_run_object$trfn = trfn BioGeoBEARS_run_object$geogfn = geogfn BioGeoBEARS_run_object$max_range_size = max_range_size BioGeoBEARS_run_object$min_branchlength = 0.000001 BioGeoBEARS_run_object$include_null_range = TRUE # # set to FALSE for e.g. DEC* model, DEC*+J, etc. BioGeoBEARS_run_object$timesfn = "timeperiods.txt" # Uncomment this for a time stratified analysis and modify this file with the time periods. Oldest time period mus t be older than root. #BioGeoBEARS_run_object$dispersal_multipliers_fn = "manual_dispersal_multipliers.txt" #BioGeoBEARS_run_object$areas_allowed_fn = "areas_allowed.txt" #BioGeoBEARS_run_object$areas_adjacency_fn = "areas_adjacency.txt" BioGeoBEARS_run_object$distsfn = "distances_matrix_scaled.txt" BioGeoBEARS_run_object$on_NaN_error = -1e50 BioGeoBEARS_run_object$speedup = FALSE # shorcuts to speed ML search; use FALSE if worried (e.g. >3 params) BioGeoBEARS_run_object$use_optimx = "GenSA" # if FALSE, use optim() instead of optimx() BioGeoBEARS_run_object$num_cores_to_use = 6 BioGeoBEARS_run_object$force_sparse = FALSE BioGeoBEARS_run_object = readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) BioGeoBEARS_run_object$return_condlikes_table = TRUE BioGeoBEARS_run_object$calc_TTL_loglike_from_condlikes_table = TRUE BioGeoBEARS_run_object$calc_ancprobs = TRUE # Set up BAYAREALIKE model # No subset sympatry BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 # No vicariance BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","est"] = 0.0 # No jump dispersal/founder-event speciation # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.01 # BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.01 # Adjust linkage between parameters BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ysv","type"] = "1-j" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/1" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["y","type"] = "1-j" # Only sympatric/range-copying (y) events allowed, and with # exact copying (both descendants always the same size as the ancestor) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01y","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01y","init"] = 0.9999 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01y","est"] = 0.9999 # Check the inputs BioGeoBEARS_run_object = section_the_tree(inputs=BioGeoBEARS_run_object, make_master_table=TRUE, plot_pieces=FALSE) check_BioGeoBEARS_run(BioGeoBEARS_run_object) runslow = TRUE resfn = "BAYAREALIKE_M0_unconstrained_v1.Rdata" if (runslow) { res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resBAYAREALIKE = res } else { # Loads to "res" load(resfn) resBAYAREALIKE = res } # Set up BAYAREALIKE+J model # Get the ML parameter values from the 2-parameter nested model # (this will ensure that the 3-parameter model always does at least as good) dstart = resBAYAREALIKE$outputs@params_table["d","est"] estart = resBAYAREALIKE$outputs@params_table["e","est"] jstart = 0.0001 # Input starting values for d, e BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","est"] = estart # No subset sympatry BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 # No vicariance BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","est"] = 0.0 # *DO* allow jump dispersal/founder-event speciation (set the starting value close to 0) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart # Under BAYAREALIKE+J, the max of "j" should be 1, not 3 (as is default in DEC+J) or 2 (as in DIVALIKE+J) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","max"] = 0.99999 # Adjust linkage between parameters BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ysv","type"] = "1-j" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/1" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["y","type"] = "1-j" # Only sympatric/range-copying (y) events allowed, and with # exact copying (both descendants always the same size as the ancestor) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01y","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01y","init"] = 0.9999 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["mx01y","est"] = 0.9999 # NOTE (NJM, 2014-04): BAYAREALIKE+J seems to crash on some computers, usually Windows # machines. I can't replicate this on my Mac machines, but it is almost certainly # just some precision under-run issue, when optim/optimx tries some parameter value # just below zero. The "min" and "max" options on each parameter are supposed to # prevent this, but apparently optim/optimx sometimes go slightly beyond # these limits. Anyway, if you get a crash, try raising "min" and lowering "max" # slightly for each parameter: BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","min"] = 0.0000001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","max"] = 4.9999999 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","min"] = 0.0000001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","max"] = 4.9999999 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","min"] = 0.00001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","max"] = 0.99999 check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "BAYAREALIKE+J_M0_unconstrained_v1.Rdata" runslow = TRUE if (runslow) { res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resBAYAREALIKEj = res } else { # Loads to "res" load(resfn) resBAYAREALIKEj = res } ######## BAYESAREALike + X # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.0 # Add X speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","init"] = xstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","est"] = xstart BioGeoBEARS_run_object$rescale_params = FALSE check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "Bayesarealike+x_M0_unconstrained_v1.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) Bayesarealikex = res } else { # Loads to "res" load(resfn) Bayesarealikex = res } ####### BayesAREA +J+x jstart = resBAYAREALIKEj$outputs@params_table["j","est"] xstart = Bayesarealikex$outputs@params_table["x","est"] dstart = Bayesarealikex$outputs@params_table["d","est"] estart = Bayesarealikex$outputs@params_table["e","est"] # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart #Under DIVALIKE+J, the max of "j" should be 2, not 3 (as is default in DEC+J) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","min"] = 0.00001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","max"] = 0.99999 # Add X speciation BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","init"] = xstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","est"] = xstart check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "Bayesarealike+x+j_M0_unconstrained_v1.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resBAYESAREAIKExj = res } else { # Loads to "res" load(resfn) resBAYESAREAIKExj = res } ####################################################### # Plot ancestral states - DEC ####################################################### tr = read.tree(trfn) analysis_titletxt ="BioGeoBEARS DEC Vernonieae_unconstrained" # Setup results_object = resDECjx scriptdir = np(system.file("extdata/a_scripts", package="BioGeoBEARS")) analysis_titletxt ="BioGeoBEARS DEC on Vernonieae_1 M0_unconstrained" # Setup # States res2 = plot_BioGeoBEARS_results(results_object, analysis_titletxt, addl_params=list("x","j"), plotwhat="text", label.offset=0.45, tipcex=0.7, statecex=0.7, splitcex=0.6, titlecex=0.8, plotsplits=TRUE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) # Pie chart plot_BioGeoBEARS_results(results_object, analysis_titletxt, addl_params=list("x","j"), plotwhat="pie", label.offset=0.45, tipcex=0.7, statecex=0.7, splitcex=0.6, titlecex=0.8, plotsplits=FALSE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges,legend_ncol = 6,plotlegend = TRUE) ####################################################### # Plot ancestral states - DECJ ####################################################### analysis_titletxt ="BioGeoBEARS DEC+J on Vernonieae_1 M0_unconstrained" # Setup results_object = resDECj scriptdir = np(system.file("extdata/a_scripts", package="BioGeoBEARS")) # States res1 = plot_BioGeoBEARS_results(results_object, analysis_titletxt, addl_params=list("j"), plotwhat="text", label.offset=0.45, tipcex=0.7, statecex=0.7, splitcex=0.6, titlecex=0.8, plotsplits=TRUE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) # Pie chart plot_BioGeoBEARS_results(resDECj, analysis_titletxt, addl_params=list("j"), plotwhat="pie", label.offset=0.45, tipcex=0.7, statecex=0.7, splitcex=0.6, titlecex=0.8, plotsplits=FALSE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) dev.off() # Turn off PDF cmdstr = paste("open ", pdffn, sep="") system(cmdstr) # Plot it ##### PLOT RESULTS ON A FAN TREE ########################## res <- DECJXadjacency_b ##BEST MODEL tr = read.tree(trfn) trtable = prt(tr, printflag=FALSE) head(trtable) tail(trtable) plot(tr) axisPhylo() nodelabels() tiplabels(1:length(tr$tip.label)) areas = getareas_from_tipranges_object(tipranges) # This is the list of states/ranges, where each state/range # is a list of areas, counting from 0 states_list_0based = rcpp_areas_list_to_states_list(areas=areas, maxareas=max_range_size, include_null_range=TRUE) # Make the list of ranges ranges_list = NULL for (i in 1:length(states_list_0based)) { if ( (length(states_list_0based[[i]]) == 1) && (is.na(states_list_0based[[i]])) ) { tmprange = "_" } else { tmprange = paste(areas[states_list_0based[[i]]+1], collapse="") } ranges_list = c(ranges_list, tmprange) } # Look at the ranges list ranges_list ### range_probabilities range_probabilities = as.data.frame(res$ML_marginal_prob_each_state_at_branch_top_AT_node) row.names(range_probabilities) = trtable$node names(range_probabilities) = ranges_list head(range_probabilities) # Write the table to a tab-delimited text file (for Excel etc.) write.table(range_probabilities, file="New_range_probabilities-4-15-2020.csv", quote=FALSE, sep=",") range_probabilities[which(colSums(range_probabilities)>0)] range_probabilities[range_probabilities < 0.15] <- 0 ###### SETS STATES WITH PROBABILITY LESS THAN THE VALUE THAT YOU SET HERE TO 0 ### range_probabilities$OTHER <- 1-rowSums(range_probabilities) range_probabilities_2 <- range_probabilities[,colSums(range_probabilities) > 0] min(rowSums(range_probabilities_2)) max(rowSums(range_probabilities_2)) which(colSums(range_probabilities)>0) write.table(range_probabilities_2,"range_probabilities_4_20b.csv",sep = ",") areanames = names(tipranges@df) #range_probabilities_2 <- range_probabilities #### Custom graphics #head(range_probabilities_2) #color = grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)] #nodelabels(frame="none",cex = 0.9,col = "red") #BASETREE1 <- read.tree("Base_tree.nwk") #tr_false_root <- bind.tree(BASETREE1, tr, # where = 1, position = 0, interactive = FALSE) #range_probabilities_2[,ncol(range_probabilities_2)] <- round(range_probabilities_2[,ncol(range_probabilities_2)], digits = 5) colors_list_for_states = get_colors_for_states_list_0based(areanames=areanames, states_list_0based=states_list_0based, max_range_size=max_range_size, plot_null_range=TRUE) colors_list_for_states[[1]] <- "#6A2AEA" colors_list_for_states[[23]] <- "#6A806A" colors_list_for_states[[ncol(range_probabilities_2)]] <- "#FFFFFF" #round(range_probabilities_2[,ncol(range_probabilities_2)], digits = 5) colSums(range_probabilities_2) plotTree(tr, cex = 0.15, type="fan", part=0.98,fsize=0.1, label.offset=3) nodelabels(pie=as.matrix(range_probabilities_2[369:735,]), piecol=colors_list_for_states[],cex=0.4) tiplabels(pie=as.matrix(range_probabilities_2[1:368,]/rowSums(range_probabilities_2[1:368,])), piecol=colors_list_for_states[],cex=0.2) #plotTree(tr_false_root, cex = 0.1, type="fan", part=0.98,fsize=0.2, label.offset=3) #nodelabels(pie=as.matrix(range_probabilities_2[360:nrow(range_probabilities_2),]), # piecol=colors_list_for_states[],cex=0.4) plot.new() #colSums(range_probabilities_2[368:733,]) add.simmap.legend(leg=names(range_probabilities_2), colors=colors_list_for_states[1:ncol(range_probabilities_2)], prompt=TRUE, vertical=TRUE,shape="circle",fsize=0.5) colors_list_for_states[1:ncol(range_probabilities_2)] unique(colors_list_for_states[1:ncol(range_probabilities_2)]) # Look at the file moref("range_probabilities.txt") ######################################################################### ######################################################################### ######################################################################### ######################################################################### # # CALCULATE SUMMARY STATISTICS TO COMPARE # DEC, DEC+J, DIVALIKE, DIVALIKE+J, BAYAREALIKE, BAYAREALIKE+J # ######################################################################### ######################################################################### ######################################################################### ######################################################################### ######################################################################### ######################################################################### # REQUIRED READING: # # Practical advice / notes / basic principles on statistical model # comparison in general, and in BioGeoBEARS: # http://phylo.wikidot.com/advice-on-statistical-model-comparison-in-biogeobears ###################################################################### #str(resDECj$ML_marginal_prob_each_state_at_branch_top_AT_node) ######################################################################### ######################################################################### # Set up empty tables to hold the statistical results restable = NULL teststable = NULL ####################################################### # Statistics -- DEC vs. DEC+J ####################################################### # We have to extract the log-likelihood differently, depending on the # version of optim/optimx load("DEC_All_areas_allowed.Rdata", envir = parent.frame(), verbose = TRUE) resDEC = res load("DEC+J_unconstrained_v2.Rdata", envir = parent.frame(), verbose = TRUE) resDECj = res load("DEC+X_scaled_v2.Rdata", envir = parent.frame(), verbose = TRUE) resDECx = res load("DEC+JX_2_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDECxj = res load("DEC+JX+adjacency.Rdata", envir = parent.frame(), verbose = TRUE) resDECxjADJ = res load("DEC_JX_adjacency_ROOT.Rdata", envir = parent.frame(), verbose = TRUE) DEC_JX_adjacency_ROOT = res resDEc summary(resDECxj) get_LnL_from_BioGeoBEARS_results_object(resDECxj) load("DEC+JX_scaled_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDECxjs = res load("DEC+JX_free_noadjacent2.Rdata", envir = parent.frame(), verbose = TRUE) resDECxjna = res load("DEC+JX_Fixedroot_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDECxj_fixedroot = res ###### load("DIVALIKE_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDIVALIKE = res load("DIVALIKE+J_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDIVALIKEj = res load("DIVALIKE+x_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDIVALIKEx = res load("DIVALIKE+x+j_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resDIVALIKExj = res load("BAYAREALIKE_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resBAYAREALIKE = res load("BAYAREALIKE+J_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resBAYAREALIKEj = res load("Bayesarealike+x_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resBAYAREALIKEx = res load("Bayesarealike+x+j_M0_unconstrained_v1.Rdata", envir = parent.frame(), verbose = TRUE) resBAYAREALIKExj = res LnL_0 = get_LnL_from_BioGeoBEARS_results_object(resDEC) LnL_1 = get_LnL_from_BioGeoBEARS_results_object(resDECj) LnL_2 = get_LnL_from_BioGeoBEARS_results_object(resDECx) LnL_3 = get_LnL_from_BioGeoBEARS_results_object(resDECxj) LnL_3b = get_LnL_from_BioGeoBEARS_results_object(resDECxjADJ) LnL_F = get_LnL_from_BioGeoBEARS_results_object(DEC_JX_adjacency_ROOT) resDECxjscaled rbind(LnL_0,LnL_1,LnL_2,LnL_3,LnL_F) numparams0 = 2 numparams1 = 3 numparams2 = 3 numparams3 = 4 numparams3b = 4 numparamsF = 4 ##### aic = c(975.9081, 964.4231, 863.8352, 860.9044, 810.2476, 818.0869, 1038.602, 1002.241, 916.8763, 888.2279, 1130.041, 1004.033, 1124.386, 905.7738) delAIC <- aic - min(aic) relLik <- exp(-0.5 * delAIC) aicweight <- relLik/sum(relLik) aic.table <- data.frame(AICc = aic, delAIC = delAIC, relLik = relLik, weight = aicweight) #Here the table is printed using round() to limit the number of digits shown. round(aic.table, digits = 3) stats1_0 = AICstats_2models(LnL_1, LnL_0, numparams1, numparams0) stats2_0 = AICstats_2models(LnL_2, LnL_0, numparams2, numparams0) stats3_0 = AICstats_2models(LnL_3, LnL_0, numparams3, numparams0) stats3b_0 = AICstats_2models(LnL_3b, LnL_0, numparams3b, numparams0) statsF_0 = AICstats_2models(LnL_F, LnL_3B, numparams3b, numparams3) stats3_1 = AICstats_2models(LnL_3, LnL_1, numparams3, numparams0) stats3_2 = AICstats_2models(LnL_3, LnL_2, numparams3, numparams0) statsDEC <- rbind(stats1_0, stats2_0, stats3_0,stats3_1,stats3_2) # DEC, null model for Likelihood Ratio Test (LRT) res0 = extract_params_from_BioGeoBEARS_results_object(results_object=resDEC, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # DEC+J, alternative model for Likelihood Ratio Test (LRT) res1 = extract_params_from_BioGeoBEARS_results_object(results_object=resDECj, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # res2 = extract_params_from_BioGeoBEARS_results_object(results_object=resDECx, returnwhat="table", addl_params=c("x"), paramsstr_digits=4) # res3 = extract_params_from_BioGeoBEARS_results_object(results_object=resDECxj, returnwhat="table", addl_params=c("j","x"), paramsstr_digits=4) # res3B = extract_params_from_BioGeoBEARS_results_object(results_object=resDECxjADJ, returnwhat="table", addl_params=c("j","x"), paramsstr_digits=4) # resf = extract_params_from_BioGeoBEARS_results_object(results_object=DEC_JX_adjacency_ROOT, returnwhat="table", addl_params=c("j","x"), paramsstr_digits=4) # The null hypothesis for a Likelihood Ratio Test (LRT) is that two models # confer the same likelihood on the data. See: Brian O'Meara's webpage: # http://www.brianomeara.info/tutorials/aic # ...for an intro to LRT, AIC, and AICc restable <- rbind(res0, res1,res2,res3) tmp_tests = conditional_format_table(stats) restable = rbind(restable, res2, res1) teststable = rbind(teststable, tmp_tests) ####################################################### # Statistics -- DIVALIKE vs. DIVALIKE+J ####################################################### # We have to extract the log-likelihood differently, depending on the # version of optim/optimx LnL_0 = get_LnL_from_BioGeoBEARS_results_object(resDIVALIKE) LnL_1 = get_LnL_from_BioGeoBEARS_results_object(resDIVALIKEj) LnL_2 = get_LnL_from_BioGeoBEARS_results_object(resDIVALIKEx) LnL_3 = get_LnL_from_BioGeoBEARS_results_object(resDIVALIKExj) rbind(LnL_0,LnL_1,LnL_2,LnL_3) numparams0 = 2 numparams1 = 3 numparams2 = 3 numparams3 = 4 stats1_0 = AICstats_2models(LnL_1, LnL_0, numparams1, numparams0) stats2_0 = AICstats_2models(LnL_2, LnL_0, numparams2, numparams0) stats3_0 = AICstats_2models(LnL_3, LnL_0, numparams3, numparams0) stats3_1 = AICstats_2models(LnL_3, LnL_1, numparams3, numparams0) stats3_2 = AICstats_2models(LnL_3, LnL_2, numparams3, numparams0) statsDIVA <- rbind(stats1_0, stats2_0, stats3_0,stats3_1,stats3_2) # DIVALIKE, null model for Likelihood Ratio Test (LRT) res0 = extract_params_from_BioGeoBEARS_results_object(results_object=resDIVALIKE, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # DIVALIKE+J, alternative model for Likelihood Ratio Test (LRT) res1 = extract_params_from_BioGeoBEARS_results_object(results_object=resDIVALIKEj, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # DIVALIKE+x, alternative model for Likelihood Ratio Test (LRT) res2 = extract_params_from_BioGeoBEARS_results_object(results_object=resDIVALIKEx, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # DIVALIKE+Jx, alternative model for Likelihood Ratio Test (LRT) res3 = extract_params_from_BioGeoBEARS_results_object(results_object=resDIVALIKExj, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) rbind(res0, res1,res2,res3) conditional_format_table(stats) tmp_tests = conditional_format_table(stats) restable = rbind(restable, res2, res1) teststable = rbind(teststable, tmp_tests) ####################################################### # Statistics -- BAYAREALIKE vs. BAYAREALIKE+J ####################################################### # We have to extract the log-likelihood differently, depending on the # version of optim/optimx LnL_0 = get_LnL_from_BioGeoBEARS_results_object(resBAYAREALIKE) LnL_1 = get_LnL_from_BioGeoBEARS_results_object(resBAYAREALIKEj) LnL_2 = get_LnL_from_BioGeoBEARS_results_object(resBAYAREALIKEx) LnL_3 = get_LnL_from_BioGeoBEARS_results_object(resBAYAREALIKExj) rbind(LnL_0,LnL_1,LnL_2,LnL_3) numparams0 = 2 numparams1 = 3 numparams2 = 3 numparams3 = 4 stats1_0 = AICstats_2models(LnL_1, LnL_0, numparams1, numparams0) stats2_0 = AICstats_2models(LnL_2, LnL_0, numparams2, numparams0) stats3_0 = AICstats_2models(LnL_3, LnL_0, numparams3, numparams0) stats3_1 = AICstats_2models(LnL_3, LnL_1, numparams3, numparams0) stats3_2 = AICstats_2models(LnL_3, LnL_2, numparams3, numparams0) statsBAYES <- rbind(stats1_0, stats2_0, stats3_0,stats3_1,stats3_2) # BAYAREALIKE, null model for Likelihood Ratio Test (LRT) res0 = extract_params_from_BioGeoBEARS_results_object(results_object=resBAYAREALIKE, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # BAYAREALIKE+J, alternative model for Likelihood Ratio Test (LRT) res1 = extract_params_from_BioGeoBEARS_results_object(results_object=resBAYAREALIKEj, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # BAYAREALIKE, null model for Likelihood Ratio Test (LRT) res2 = extract_params_from_BioGeoBEARS_results_object(results_object=resBAYAREALIKEx, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) # BAYAREALIKE+J, alternative model for Likelihood Ratio Test (LRT) res3 = extract_params_from_BioGeoBEARS_results_object(results_object=resBAYAREALIKExj, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) rbind(res2, res1) conditional_format_table(stats) tmp_tests = conditional_format_table(stats) restable = rbind(restable, res2, res1) teststable = rbind(teststable, tmp_tests) ######################################################################### # ASSEMBLE RESULTS TABLES: DEC, DEC+J, DIVALIKE, DIVALIKE+J, BAYAREALIKE, BAYAREALIKE+J ######################################################################### teststable$alt = c("DEC+J", "DIVALIKE+J", "BAYAREALIKE+J") teststable$null = c("DEC", "DIVALIKE", "BAYAREALIKE") row.names(restable) = c("DEC", "DEC+J", "DIVALIKE", "DIVALIKE+J", "BAYAREALIKE", "BAYAREALIKE+J") restable = put_jcol_after_ecol(restable) restable # Look at the results!! restable teststable ####################################################### # Save the results tables for later -- check for e.g. # convergence issues ####################################################### # Loads to "restable" save(restable, file="restable_v1.Rdata") load(file="restable_v1.Rdata") # Loads to "teststable" save(teststable, file="teststable_v1.Rdata") load(file="teststable_v1.Rdata") # Also save to text files write.table(restable, file="restable.txt", quote=FALSE, sep="\t") write.table(unlist_df(teststable), file="teststable.txt", quote=FALSE, sep="\t") ####################################################### # Model weights of all six models ####################################################### restable2 = restable # With AICs: AICtable = calc_AIC_column(LnL_vals=restable$LnL, nparam_vals=restable$numparams) restable = cbind(restable, AICtable) restable_AIC_rellike = AkaikeWeights_on_summary_table(restable=restable, colname_to_use="AIC") restable_AIC_rellike = put_jcol_after_ecol(restable_AIC_rellike) restable_AIC_rellike # With AICcs -- factors in sample size samplesize = length(tr$tip.label) AICtable = calc_AICc_column(LnL_vals=restable$LnL, nparam_vals=restable$numparams, samplesize=samplesize) restable2 = cbind(restable2, AICtable) restable_AICc_rellike = AkaikeWeights_on_summary_table(restable=restable2, colname_to_use="AICc") restable_AICc_rellike = put_jcol_after_ecol(restable_AICc_rellike) restable_AICc_rellike # Also save to text files write.table(restable_AIC_rellike, file="restable_AIC_rellike.txt", quote=FALSE, sep="\t") write.table(restable_AICc_rellike, file="restable_AICc_rellike.txt", quote=FALSE, sep="\t") # Save with nice conditional formatting write.table(conditional_format_table(restable_AIC_rellike), file="restable_AIC_rellike_formatted.txt", quote=FALSE, sep="\t") write.table(conditional_format_table(restable_AICc_rellike), file="restable_AICc_rellike_formatted.txt", quote=FALSE, sep="\t") ########################################################## ########################################################## ###### Stochastic mapping #### from http://phylo.wikidot.com/biogeographical-stochastic-mapping-example-script #This needs to be integrated with my actual scripts above model_name = "DEC_JX_adjacency_ROOT" res = DEC_JX_adjacency_ROOT pdffn = paste0("Vernonieae_", model_name, "_v1.pdf") pdf(pdffn, width=6, height=6) analysis_titletxt = paste0(model_name, " on Vernonieae") # Setup results_object = res scriptdir = np(system.file("extdata/a_scripts", package="BioGeoBEARS")) # States res2 = plot_BioGeoBEARS_results(results_object, analysis_titletxt, addl_params=list("j"), plotwhat="text", label.offset=0.45, tipcex=0.7, statecex=0.7, splitcex=0.6, titlecex=0.8, plotsplits=TRUE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) # Pie chart #plot_BioGeoBEARS_results(results_object, analysis_titletxt, addl_params=list("j"), plotwhat="pie", label.offset=0.45, tipcex=0.7, statecex=0.7, splitcex=0.6, titlecex=0.8, plotsplits=TRUE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) dev.off() # Turn off PDF cmdstr = paste("open ", pdffn, sep="") system(cmdstr) # Plot it ####################################################### # Stochastic mapping on DEC M3b stratified with islands coming up ####################################################### clado_events_tables = NULL ana_events_tables = NULL lnum = 0 ####################################################### # Get the inputs for Biogeographical Stochastic Mapping # Note: this can be slow for large state spaces and trees, since # the independent likelihoods for each branch are being pre-calculated # E.g., for 10 areas, this requires calculation of a 1024x1024 matrix # for each branch. On a tree with ~800 tips and thus ~1600 branches, this was about 1.6 gigs # for storage of "BSM_inputs_file.Rdata". # Update: 2015-09-23 -- now, if you used multicore functionality for the ML analysis, # the same settings will be used for get_inputs_for_stochastic_mapping(). ####################################################### BSM_inputs_fn = "BSM_inputs_file.Rdata" runInputsSlow = TRUE if (runInputsSlow) { stochastic_mapping_inputs_list = get_inputs_for_stochastic_mapping(res=res) save(stochastic_mapping_inputs_list, file=BSM_inputs_fn) } else { # Loads to "stochastic_mapping_inputs_list" load(BSM_inputs_fn) } # END if (runInputsSlow) # Check inputs (doesn't work the same on unconstr) names(stochastic_mapping_inputs_list) stochastic_mapping_inputs_list$phy2 stochastic_mapping_inputs_list$COO_weights_columnar stochastic_mapping_inputs_list$unconstr set.seed(seed=as.numeric(Sys.time())) runBSMslow = TRUE if (runBSMslow == TRUE) { # Saves to: RES_clado_events_tables.Rdata # Saves to: RES_ana_events_tables.Rdata BSM_output = runBSM(res, stochastic_mapping_inputs_list=stochastic_mapping_inputs_list, maxnum_maps_to_try=100, nummaps_goal=1000, maxtries_per_branch=40000, save_after_every_try=TRUE, savedir=getwd(), seedval=12345, wait_before_save=0.01) RES_clado_events_tables = BSM_output$RES_clado_events_tables RES_ana_events_tables = BSM_output$RES_ana_events_tables } else { # Load previously saved... # Loads to: RES_clado_events_tables load(file="RES_clado_events_tables.Rdata") # Loads to: RES_ana_events_tables load(file="RES_ana_events_tables.Rdata") BSM_output = NULL BSM_output$RES_clado_events_tables = RES_clado_events_tables BSM_output$RES_ana_events_tables = RES_ana_events_tables } # END if (runBSMslow == TRUE) ######################## # Extract BSM output clado_events_tables = BSM_output$RES_clado_events_tables ana_events_tables = BSM_output$RES_ana_events_tables head(clado_events_tables[[1]]) head(ana_events_tables[[1]]) length(clado_events_tables) length(ana_events_tables) include_null_range = TRUE areanames = names(tipranges@df) areas = areanames max_range_size = 4 # Note: If you did something to change the states_list from the default given the number of areas, you would # have to manually make that change here as well! (e.g., areas_allowed matrix, or manual reduction of the states_list) states_list_0based = rcpp_areas_list_to_states_list(areas=areas, maxareas=max_range_size, include_null_range=include_null_range) colors_list_for_states = get_colors_for_states_list_0based(areanames=areanames, states_list_0based=states_list_0based, max_range_size=max_range_size, plot_null_range=TRUE) ############################################ # Setup for painting a single stochastic map ############################################ scriptdir = np(system.file("extdata/a_scripts", package="BioGeoBEARS")) stratified = TRUE clado_events_table = clado_events_tables[[1]] ana_events_table = ana_events_tables[[1]] cols_to_get = names(clado_events_table[,-ncol(clado_events_table)]) colnums = match(cols_to_get, names(ana_events_table)) ana_events_table_cols_to_add = ana_events_table[,colnums] anagenetic_events_txt_below_node = rep("none", nrow(ana_events_table_cols_to_add)) ana_events_table_cols_to_add = cbind(ana_events_table_cols_to_add, anagenetic_events_txt_below_node) rows_to_get_TF = ana_events_table_cols_to_add$node <= length(tr$tip.label) master_table_cladogenetic_events = rbind(ana_events_table_cols_to_add[rows_to_get_TF,], clado_events_table) ############################################ # Open a PDF ############################################ pdffn = paste0(model_name, "_single_stochastic_map_n1.pdf") pdf(file=pdffn, width=6, height=6) # Convert the BSM into a modified res object master_table_cladogenetic_events = clado_events_tables[[1]] resmod = stochastic_map_states_into_res(res=res, master_table_cladogenetic_events=master_table_cladogenetic_events, stratified=stratified) plot_BioGeoBEARS_results(results_object=resmod, analysis_titletxt="Stochastic map", addl_params=list("j"), label.offset=0.5, plotwhat="text", cornercoords_loc=scriptdir, root.edge=TRUE, colors_list_for_states=colors_list_for_states, skiptree=FALSE, show.tip.label=TRUE) # Paint on the branch states paint_stochastic_map_branches(res=resmod, master_table_cladogenetic_events=master_table_cladogenetic_events, colors_list_for_states=colors_list_for_states, lwd=5, lty=par("lty"), root.edge=TRUE, stratified=stratified) plot_BioGeoBEARS_results(results_object=resmod, analysis_titletxt="Stochastic map", addl_params=list("j"), plotwhat="text", cornercoords_loc=scriptdir, root.edge=TRUE, colors_list_for_states=colors_list_for_states, skiptree=TRUE, show.tip.label=TRUE) ############################################ # Close PDF ############################################ dev.off() cmdstr = paste("open ", pdffn, sep="") system(cmdstr) ####################################################### # Plot all 50 stochastic maps to PDF ####################################################### # Setup include_null_range = include_null_range areanames = areanames areas = areanames max_range_size = max_range_size states_list_0based = rcpp_areas_list_to_states_list(areas=areas, maxareas=max_range_size, include_null_range=include_null_range) colors_list_for_states = get_colors_for_states_list_0based(areanames=areanames, states_list_0based=states_list_0based, max_range_size=max_range_size, plot_null_range=TRUE) scriptdir = np(system.file("extdata/a_scripts", package="BioGeoBEARS")) stratified = stratified # Loop through the maps and plot to PDF pdffn = paste0(model_name, "_", length(clado_events_tables), "BSMs_v1.pdf") pdf(file=pdffn, width=6, height=6) nummaps_goal = 50 for (i in 1:nummaps_goal) { clado_events_table = clado_events_tables[[i]] analysis_titletxt = paste0(model_name, " - Stochastic Map #", i, "/", nummaps_goal) plot_BSM(results_object=res, clado_events_table=clado_events_table, stratified=stratified, analysis_titletxt=analysis_titletxt, addl_params=list("j"), label.offset=0.5, plotwhat="text", cornercoords_loc=scriptdir, root.edge=TRUE, colors_list_for_states=colors_list_for_states, show.tip.label=TRUE, include_null_range=include_null_range) } # END for (i in 1:nummaps_goal) dev.off() cmdstr = paste("open ", pdffn, sep="") system(cmdstr) ####################################################### # Summarize stochastic map tables ####################################################### length(clado_events_tables) length(ana_events_tables) head(clado_events_tables[[1]][,-20]) tail(clado_events_tables[[1]][,-20]) head(ana_events_tables[[1]]) tail(ana_events_tables[[1]]) areanames = names(tipranges@df) actual_names = areanames actual_names # Get the dmat and times (if any) dmat_times = get_dmat_times_from_res(res=res, numstates=NULL) dmat_times # Extract BSM output clado_events_tables = BSM_output$RES_clado_events_tables ana_events_tables = BSM_output$RES_ana_events_tables # Simulate the source areas BSMs_w_sourceAreas = simulate_source_areas_ana_clado(res, clado_events_tables, ana_events_tables, areanames) clado_events_tables = BSMs_w_sourceAreas$clado_events_tables ana_events_tables = BSMs_w_sourceAreas$ana_events_tables # Count all anagenetic and cladogenetic events counts_list = count_ana_clado_events(clado_events_tables, ana_events_tables, areanames, actual_names) summary_counts_BSMs = counts_list$summary_counts_BSMs print(conditional_format_table(summary_counts_BSMs)) # Histogram of event counts hist_event_counts(counts_list, pdffn=paste0(model_name, "_histograms_of_event_counts.pdf")) ####################################################### # Print counts to files ####################################################### #setwd("C:/Users/tjgal/Desktop/THE_DESKTOP/Vernonieae/") tmpnames = names(counts_list) cat("\n\nWriting tables* of counts to tab-delimited text files:\n(* = Tables have dimension=2 (rows and columns). Cubes (dimension 3) and lists (dimension 1) will not be printed to text files.) \n\n") for (i in 1:length(tmpnames)) { cmdtxt = paste0("item = counts_list$", tmpnames[i]) eval(parse(text=cmdtxt)) # Skip cubes if (length(dim(item)) != 2) { next() } outfn = paste0(tmpnames[i], ".txt") if (length(item) == 0) { cat(outfn, " -- NOT written, *NO* events recorded of this type", sep="") cat("\n") } else { cat(outfn) cat("\n") write.table(conditional_format_table(item), file=outfn, quote=FALSE, sep="\t", col.names=TRUE, row.names=TRUE) } # END if (length(item) == 0) } # END for (i in 1:length(tmpnames)) cat("...done.\n") ####################################################### # Check that ML ancestral state/range probabilities and # the mean of the BSMs approximately line up ####################################################### library(MultinomialCI) # For 95% CIs on BSM counts check_ML_vs_BSM(res, clado_events_tables, model_name, tr=NULL, plot_each_node=FALSE, linreg_plot=TRUE, MultinomialCI=TRUE)