All the custom code and custom functions called in this document can be downloaded and installed in R as the standalone working package beer (Beak Elaboration and Exploration in R). Although beer is intentionally designed to be portable and shareable, we advice workers to use it with moderation and tailor it’s consumption to their specific research needs. The following code snippets are used to illustrate the implementation of the functions. See §§§ for running the actual analyses.
The method described in the next three markdown is essentially a mini chains with pseudo-metropolis coupling markov chain generalised linear mixed model method to determine clades and tips elaboration and exploration (mcmcpmcmcmcglmmmctee) but we will refrain using this acronym and method name.
## Installing the dispRity branch
devtools::install_github("TGuillerme/dispRity@MCMCglmm")
## Loading packages
library(dispRity) #v>=1.7
TODO:
Trees were manipulated in R (R Core Team 2021) using the package ape (Paradis and Schliep 2019).
## Reading the different trait spaces
shapespace <- readRDS("../Data/Raw/Beak_data/2020_08_07_MMB_MORPHO_SHAPESPACE_FULL.rds")
shapespace <- shapespace$PCscores
formspace <- readRDS("../Data/Raw/Beak_data/2020_08_07_MMB_MORPHO_FORMSPACE_FULL.rds")
formspace <- formspace$PCscores
## Reading the consensus tree
consensus_tree <- read.nexus("../Data/Raw/Tree_data/9993taxa_1_tree_Global_MCC_CA.tre")
## Reading the distribution of trees
trees_list <- read.tree("../Data/Raw/Tree_data/AllBirdsHackett1.tre")
## Reading the clades list
clades_list <- as.data.frame(read.csv("../Data/Raw/BirdClades.csv", row.names = 1, stringsAsFactors = TRUE))
## Remove the absent species
clades_list <- clades_list[rownames(clades_list) %in% rownames(shapespace), ]
## Sorting the list by rownames from the shapespace
clades_list <- clades_list[match(rownames(shapespace), rownames(clades_list)), -c(6, 7)]
## Renaming the levels
colnames(clades_list) <- c("superorder", "order", "suborder", "superfamily", "family")
## Cleaning the levels (removing any level with less than 15 elements)
update.level <- function(level, min = 15) {
to_remove <- which(table(level) < min)
remove_table <- data.frame("clade" = names(to_remove), "species" = c(table(level)[to_remove]))
level[which(level %in% c(names(to_remove)))] <- ""
return(list(level = as.factor(as.character(level)), removed = remove_table))
}
## Updated list
update_clade_list <- lapply(as.list(clades_list), update.level, min = 15)
clade_list <- data.frame(lapply(update_clade_list, `[[`, 1))
removed_table <- do.call(rbind, lapply(update_clade_list, function(x) {rownames(x$removed) <- NULL; return(x$removed)}))
## Saving the clade list
save(clade_list, file = "../Data/Processed/clade_list.rda")
| clade | species | |
|---|---|---|
| superorder | Eurypygimorphae | 5 |
| order.1 | Apterygiformes | 4 |
| order.2 | Cariamiformes | 2 |
| order.3 | Casuariiformes | 4 |
| order.4 | Coliiformes | 6 |
| order.5 | Eurypygiformes | 2 |
| order.6 | Gaviiformes | 5 |
| order.7 | Leptosomiformes | 1 |
| order.8 | Mesitornithiformes | 3 |
| order.9 | Opisthocomiformes | 1 |
| order.10 | Phaethontiformes | 3 |
| order.11 | Phoenicopteriformes | 6 |
| order.12 | Rheiformes | 2 |
| order.13 | Struthioniformes | 1 |
| suborder.1 | Acanthisitti | 2 |
| suborder.2 | Pelecani | 9 |
| suborder.3 | Upupi | 9 |
| superfamily.1 | Bombycilloidea | 10 |
| superfamily.2 | Muscicapoidea | 1 |
| superfamily.3 | Stenostiridae | 1 |
| family.1 | Acanthisittidae | 2 |
| family.2 | Aegithinidae | 4 |
| family.3 | Aegothelidae | 3 |
| family.4 | Anhimidae | 3 |
| family.5 | Anhingidae | 4 |
| family.6 | Anseranatidae | 1 |
| family.7 | Apterygidae | 4 |
| family.8 | Aramidae | 1 |
| family.9 | Artamidae | 11 |
| family.10 | Atrichornithidae | 2 |
| family.11 | Balaenicipitidae | 1 |
| family.12 | Bombycillidae | 8 |
| family.13 | Brachypteraciidae | 5 |
| family.14 | Bucorvidae | 2 |
| family.15 | Burhinidae | 9 |
| family.16 | Callaeatidae | 2 |
| family.17 | Cariamidae | 2 |
| family.18 | Casuariidae | 3 |
| family.19 | Cathartidae | 7 |
| family.20 | Certhiidae | 9 |
| family.21 | Chaetopidae | 2 |
| family.22 | Chionidae | 3 |
| family.23 | Cinclosomatidae | 8 |
| family.24 | Climacteridae | 6 |
| family.25 | Cnemophilidae | 3 |
| family.26 | Coliidae | 6 |
| family.27 | Conopophagidae | 12 |
| family.28 | Coraciidae | 12 |
| family.29 | Corcoracidae | 2 |
| family.30 | Cracticidae | 11 |
| family.31 | Dasyornithidae | 3 |
| family.32 | Dromadidae | 1 |
| family.33 | Dromaiidae | 1 |
| family.34 | Dulidae | 1 |
| family.35 | Eulacestomidae | 1 |
| family.36 | Eupetidae | 1 |
| family.37 | Eurypygidae | 1 |
| family.38 | Falcunculidae | 1 |
| family.39 | Formicariidae | 10 |
| family.40 | Fregatidae | 5 |
| family.41 | Gaviidae | 5 |
| family.42 | Haematopodidae | 11 |
| family.43 | Heliornithidae | 3 |
| family.44 | Hemiprocnidae | 3 |
| family.45 | Hylocitreidae | 1 |
| family.46 | Ibidorhynchidae | 1 |
| family.47 | Ifritidae | 1 |
| family.48 | Jacanidae | 8 |
| family.49 | Leptosomidae | 1 |
| family.50 | Machaerirhynchidae | 2 |
| family.51 | Melampittidae | 2 |
| family.52 | Melanocharitidae | 9 |
| family.53 | Menuridae | 2 |
| family.54 | Mesitornithidae | 3 |
| family.55 | Mohouidae | 3 |
| family.56 | Motmotidae | 10 |
| family.57 | Neosittidae | 2 |
| family.58 | Notiomystidae | 1 |
| family.59 | Numididae | 6 |
| family.60 | Nyctibiidae | 4 |
| family.61 | Opisthocomidae | 1 |
| family.62 | Oreoicidae | 3 |
| family.63 | Orthonychidae | 3 |
| family.64 | Paramythidae | 2 |
| family.65 | Pardalotidae | 4 |
| family.66 | Pedionomidae | 1 |
| family.67 | Pelecanidae | 7 |
| family.68 | Phaethontidae | 3 |
| family.69 | Philepittidae | 3 |
| family.70 | Phoenicopteridae | 6 |
| family.71 | Phoeniculidae | 8 |
| family.72 | Picathartidae | 2 |
| family.73 | Pityriaseidae | 1 |
| family.74 | Podargidae | 11 |
| family.75 | Polioptilidae | 10 |
| family.76 | Pomatostomidae | 4 |
| family.77 | Psophidae | 3 |
| family.78 | Psophiidae | 3 |
| family.79 | Recurvirostridae | 9 |
| family.80 | Reguliidae | 4 |
| family.81 | Rhagologidae | 1 |
| family.82 | Rheidae | 2 |
| family.83 | Rhipiduridae | 10 |
| family.84 | Rhynochetidae | 1 |
| family.85 | Rostratulidae | 3 |
| family.86 | Sapayoaidae | 1 |
| family.87 | Scopidae | 1 |
| family.88 | Steatornithidae | 1 |
| family.89 | Stercorariidae | 8 |
| family.90 | Strigopoidea | 3 |
| family.91 | Struthionidae | 1 |
| family.92 | Sulidae | 10 |
| family.93 | Thinocoridae | 4 |
| family.94 | Thraupidae | 1 |
| family.95 | Tichodromidae | 1 |
| family.96 | Timaliidae | 7 |
| family.97 | Todidae | 5 |
| family.98 | Tytonidae | 6 |
| family.99 | Upupidae | 1 |
| family.100 | Urocynchramidae | 1 |
| family.101 | Zosteropidae | 1 |
How many dimensions do we need to represent at least 95% of the variance in each group
## Temporary change the empty levels to noclade
clades <- clades_list[, c(1)]
levels(clades)[1] <- "noclade"
## Make a dispRity object and get axes data
shape_axes <- select.axes(custom.subsets(shapespace, group = clades))
form_axes <- select.axes(custom.subsets(formspace, group = clades))
| PC1.var | PC1.sum | PC2.var | PC2.sum | PC3.var | PC3.sum | PC4.var | PC4.sum | PC5.var | PC5.sum | PC6.var | PC6.sum | PC7.var | PC7.sum | PC8.var | PC8.sum | PC9.var | PC9.sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| noclade | 0.504 | 0.504 | 0.339 | 0.843 | 0.078 | 0.921 | 0.025 | 0.945 | 0.015 | 0.961 | 0.008 | 0.969 | 0.010 | 0.979 | 0.006 | 0.984 | 0.003 | 0.987 |
| Aequornithes | 0.384 | 0.384 | 0.375 | 0.759 | 0.075 | 0.834 | 0.030 | 0.864 | 0.020 | 0.884 | 0.011 | 0.895 | 0.047 | 0.942 | 0.017 | 0.959 | 0.014 | 0.972 |
| Columbimorphae | 0.293 | 0.293 | 0.456 | 0.750 | 0.093 | 0.843 | 0.040 | 0.883 | 0.016 | 0.899 | 0.040 | 0.938 | 0.019 | 0.957 | 0.005 | 0.962 | 0.006 | 0.968 |
| Eurypygimorphae | 0.440 | 0.440 | 0.146 | 0.586 | 0.266 | 0.852 | 0.005 | 0.857 | 0.038 | 0.896 | 0.039 | 0.934 | 0.026 | 0.960 | 0.003 | 0.964 | 0.003 | 0.966 |
| Galloanserae | 0.253 | 0.253 | 0.401 | 0.653 | 0.141 | 0.795 | 0.118 | 0.912 | 0.032 | 0.944 | 0.011 | 0.955 | 0.009 | 0.964 | 0.010 | 0.974 | 0.004 | 0.978 |
| Mirandornithes | 0.124 | 0.124 | 0.337 | 0.461 | 0.040 | 0.501 | 0.313 | 0.814 | 0.089 | 0.902 | 0.006 | 0.908 | 0.057 | 0.965 | 0.017 | 0.982 | 0.004 | 0.986 |
| Otidimorphae | 0.401 | 0.401 | 0.396 | 0.797 | 0.090 | 0.887 | 0.051 | 0.938 | 0.013 | 0.951 | 0.021 | 0.972 | 0.007 | 0.979 | 0.004 | 0.983 | 0.003 | 0.986 |
| Paleognathae | 0.363 | 0.363 | 0.402 | 0.766 | 0.133 | 0.899 | 0.027 | 0.926 | 0.018 | 0.944 | 0.028 | 0.972 | 0.008 | 0.980 | 0.002 | 0.982 | 0.003 | 0.985 |
| Strisores | 0.712 | 0.712 | 0.226 | 0.938 | 0.024 | 0.961 | 0.024 | 0.985 | 0.003 | 0.988 | 0.004 | 0.992 | 0.002 | 0.994 | 0.002 | 0.996 | 0.001 | 0.997 |
| Telluraves | 0.511 | 0.511 | 0.341 | 0.851 | 0.073 | 0.924 | 0.021 | 0.945 | 0.014 | 0.959 | 0.014 | 0.974 | 0.008 | 0.982 | 0.004 | 0.986 | 0.003 | 0.989 |
| whole_space | 0.554 | 0.554 | 0.297 | 0.851 | 0.066 | 0.917 | 0.028 | 0.945 | 0.016 | 0.962 | 0.011 | 0.973 | 0.009 | 0.982 | 0.004 | 0.987 | 0.003 | 0.989 |
| PC1.var | PC1.sum | PC2.var | PC2.sum | PC3.var | PC3.sum | PC4.var | PC4.sum | PC5.var | PC5.sum | PC6.var | PC6.sum | PC7.var | PC7.sum | PC8.var | PC8.sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| noclade | 0.915 | 0.915 | 0.038 | 0.953 | 0.032 | 0.986 | 0.007 | 0.992 | 0.003 | 0.995 | 0.001 | 0.996 | 0.001 | 0.997 | 0.001 | 0.998 |
| Aequornithes | 0.951 | 0.951 | 0.012 | 0.963 | 0.020 | 0.983 | 0.008 | 0.990 | 0.002 | 0.992 | 0.001 | 0.993 | 0.001 | 0.994 | 0.003 | 0.997 |
| Columbimorphae | 0.792 | 0.792 | 0.061 | 0.853 | 0.099 | 0.952 | 0.015 | 0.967 | 0.009 | 0.977 | 0.003 | 0.979 | 0.008 | 0.988 | 0.004 | 0.991 |
| Eurypygimorphae | 0.424 | 0.424 | 0.272 | 0.696 | 0.097 | 0.793 | 0.129 | 0.923 | 0.006 | 0.929 | 0.013 | 0.942 | 0.021 | 0.963 | 0.014 | 0.978 |
| Galloanserae | 0.863 | 0.863 | 0.026 | 0.889 | 0.058 | 0.948 | 0.028 | 0.975 | 0.013 | 0.989 | 0.003 | 0.992 | 0.002 | 0.993 | 0.001 | 0.995 |
| Mirandornithes | 0.911 | 0.911 | 0.009 | 0.920 | 0.028 | 0.948 | 0.003 | 0.951 | 0.034 | 0.985 | 0.005 | 0.991 | 0.001 | 0.991 | 0.005 | 0.997 |
| Otidimorphae | 0.856 | 0.856 | 0.056 | 0.912 | 0.059 | 0.971 | 0.012 | 0.983 | 0.008 | 0.991 | 0.001 | 0.993 | 0.003 | 0.996 | 0.001 | 0.997 |
| Paleognathae | 0.928 | 0.928 | 0.022 | 0.951 | 0.032 | 0.982 | 0.011 | 0.993 | 0.002 | 0.995 | 0.001 | 0.996 | 0.002 | 0.998 | 0.001 | 0.999 |
| Strisores | 0.649 | 0.649 | 0.238 | 0.887 | 0.086 | 0.973 | 0.012 | 0.985 | 0.010 | 0.995 | 0.001 | 0.996 | 0.001 | 0.997 | 0.001 | 0.998 |
| Telluraves | 0.869 | 0.869 | 0.069 | 0.938 | 0.044 | 0.983 | 0.007 | 0.990 | 0.003 | 0.993 | 0.002 | 0.995 | 0.002 | 0.997 | 0.001 | 0.998 |
| whole_space | 0.892 | 0.892 | 0.059 | 0.951 | 0.033 | 0.985 | 0.006 | 0.991 | 0.003 | 0.994 | 0.002 | 0.996 | 0.001 | 0.997 | 0.001 | 0.998 |
Fig X1: variance per axes per superorder and order in the shapespace
Fig X1: variance per axes per superorder and order in the formspace
By analysing the distribution of the variance within each group, we will need 8 dimensions for the shape space and 7 for the form space.
Here we are going to group the spaces, trees and levels for subsequent levels (nested). There are 5 levels: Super order (1), Order (2), Sub-order (3), Super families (4) and families (5). The script above creates the model for the two first levels, the subsequent models create the mini-chains for the following overlapping pairs of models (order + sub-order; sub-order + super families; super families + families).
source("../Functions/prep.data.R")
## Getting the data ready for all birds
shapespace_allbirds_lvl_superorder_order <- prep.data(
level = 0, lvl.inc = c(1,2),
clades = clades_list,
space = shapespace,
dim = shape_axes$dimensions,
consensus = consensus_tree,
trees = trees_list,
verbose = TRUE)
## Separating the trees:.Done.
## Preparing the levels:.Done.
## Preparing the spaces:.Done.
save(shapespace_allbirds_lvl_superorder_order, file = "../Data/Processed/shapespace_allbirds_lvl_superorder_order.rda")
## Getting the data ready for each bird super order
shapespace_superorder_lvl_order_suborder <- prep.data(
level = 1, lvl.inc = c(2,3),
clades = clades_list,
space = shapespace,
dim = shape_axes$dimensions,
consensus = consensus_tree,
trees = trees_list,
verbose = TRUE)
## Separating the trees:........Done.
## Preparing the levels:........Done.
## Preparing the spaces:........Done.
save(shapespace_superorder_lvl_order_suborder, file = "../Data/Processed/shapespace_superorder_lvl_order_suborder.rda")
## Getting the data ready for each bird order
shapespace_order_lvl_suborder_superfam <- prep.data(
level = 2, lvl.inc = c(3,4),
clades = clades_list,
space = shapespace,
dim = shape_axes$dimensions,
consensus = consensus_tree,
trees = trees_list,
verbose = TRUE)
## Separating the trees:...........................Done.
## Preparing the levels:...........................Done.
## Preparing the spaces:...........................Done.
save(shapespace_order_lvl_suborder_superfam, file = "../Data/Processed/shapespace_order_lvl_suborder_superfam.rda")
## Getting the data ready for each bird sub order
shapespace_suborder_lvl_superfam_fam <- prep.data(
level = 3, lvl.inc = c(4,5),
clades = clades_list,
space = shapespace,
dim = shape_axes$dimensions,
consensus = consensus_tree,
trees = trees_list,
verbose = TRUE)
## Separating the trees:............Done.
## Preparing the levels:............Done.
## Preparing the spaces:............Done.
save(shapespace_suborder_lvl_superfam_fam, file = "../Data/Processed/shapespace_suborder_lvl_superfam_fam.rda")
## Getting the data ready for each bird super family
shapespace_superfam_lvl_fam <- prep.data(
level = 4, lvl.inc = 5,
clades = clades_list,
space = shapespace,
dim = shape_axes$dimensions,
consensus = consensus_tree,
trees = trees_list,
verbose = TRUE)
## Separating the trees:....Done.
## Preparing the levels:....Done.
## Preparing the spaces:....Done.
save(shapespace_superfam_lvl_fam, file = "../Data/Processed/shapespace_superfam_lvl_fam.rda")
## Get a list of all selected levels for the record (and number of elements per levels)
super_orders <- shapespace_allbirds_lvl_superorder_order[[1]]$levels[[1]]
orders <- shapespace_allbirds_lvl_superorder_order[[1]]$levels[[2]]
sub_orders <- lapply(shapespace_order_lvl_suborder_superfam, function(x) x$levels[[1]])
names(sub_orders) <- NULL
sub_orders <- unlist(sub_orders, recursive = FALSE)
super_families <- lapply(shapespace_order_lvl_suborder_superfam, function(x) x$levels[[2]])
names(super_families) <- NULL
super_families <- unlist(super_families, recursive = FALSE)
families <- lapply(shapespace_superfam_lvl_fam, function(x) x$levels[[1]])
names(families) <- NULL
families <- unlist(families, recursive = FALSE)
## Get the list of levels
levels_list <- list("super_orders" = unlist(lapply(super_orders, length)),
"orders" = unlist(lapply(orders, length)),
"sub_orders" = unlist(lapply(sub_orders, length)),
"super_families" = unlist(lapply(super_families, length)),
"families" = unlist(lapply(families, length)))
save(levels_list, file = "../Data/Processed/levels_list.rda")
Paradis, E., and K. Schliep. 2019. “Ape 5.0: An Environment for Modern Phylogenetics and Evolutionary Analyses in R.” Bioinformatics 35: 526–28.
R Core Team. 2021. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.