All the custom code and custom functions called in this document can be downloaded and installed in R as the standalone working package beer (Beak Elaboration and Exploration in R). Although beer is intentionally designed to be portable and shareable, we advice workers to use it with moderation and tailor it’s consumption to their specific research needs. The following code snippets are used to illustrate the implementation of the functions. See §§§ for running the actual analyses.

The method described in the next three markdown is essentially a mini chains with pseudo-metropolis coupling markov chain generalised linear mixed model method to determine clades and tips elaboration and exploration (mcmcpmcmcmcglmmmctee) but we will refrain using this acronym and method name.

## Installing the dispRity branch
devtools::install_github("TGuillerme/dispRity@MCMCglmm")
## Loading packages
library(dispRity) #v>=1.7

Data preparation

TODO:

Trees were manipulated in R (R Core Team 2021) using the package ape (Paradis and Schliep 2019).

## Reading the different trait spaces
shapespace <- readRDS("../Data/Raw/Beak_data/2020_08_07_MMB_MORPHO_SHAPESPACE_FULL.rds")
shapespace <- shapespace$PCscores
formspace <- readRDS("../Data/Raw/Beak_data/2020_08_07_MMB_MORPHO_FORMSPACE_FULL.rds")
formspace <- formspace$PCscores

## Reading the consensus tree
consensus_tree <- read.nexus("../Data/Raw/Tree_data/9993taxa_1_tree_Global_MCC_CA.tre")

## Reading the distribution of trees
trees_list <- read.tree("../Data/Raw/Tree_data/AllBirdsHackett1.tre")
## Reading the clades list
clades_list <- as.data.frame(read.csv("../Data/Raw/BirdClades.csv", row.names = 1, stringsAsFactors = TRUE))
## Remove the absent species
clades_list <- clades_list[rownames(clades_list) %in% rownames(shapespace), ]
## Sorting the list by rownames from the shapespace
clades_list <- clades_list[match(rownames(shapespace), rownames(clades_list)), -c(6, 7)]
## Renaming the levels
colnames(clades_list) <- c("superorder", "order", "suborder", "superfamily", "family")

## Cleaning the levels (removing any level with less than 15 elements)
update.level <- function(level, min = 15) {
    to_remove <- which(table(level) < min)
    remove_table <- data.frame("clade" = names(to_remove), "species" =  c(table(level)[to_remove]))
    level[which(level %in% c(names(to_remove)))] <-  ""
    return(list(level = as.factor(as.character(level)), removed = remove_table))
}

## Updated list
update_clade_list <- lapply(as.list(clades_list), update.level, min = 15)
clade_list <- data.frame(lapply(update_clade_list, `[[`, 1))
removed_table <- do.call(rbind, lapply(update_clade_list, function(x) {rownames(x$removed) <- NULL; return(x$removed)}))

## Saving the clade list
save(clade_list, file = "../Data/Processed/clade_list.rda")
List of removed clades by level and number of species in that clade in the dataset.
clade species
superorder Eurypygimorphae 5
order.1 Apterygiformes 4
order.2 Cariamiformes 2
order.3 Casuariiformes 4
order.4 Coliiformes 6
order.5 Eurypygiformes 2
order.6 Gaviiformes 5
order.7 Leptosomiformes 1
order.8 Mesitornithiformes 3
order.9 Opisthocomiformes 1
order.10 Phaethontiformes 3
order.11 Phoenicopteriformes 6
order.12 Rheiformes 2
order.13 Struthioniformes 1
suborder.1 Acanthisitti 2
suborder.2 Pelecani 9
suborder.3 Upupi 9
superfamily.1 Bombycilloidea 10
superfamily.2 Muscicapoidea 1
superfamily.3 Stenostiridae 1
family.1 Acanthisittidae 2
family.2 Aegithinidae 4
family.3 Aegothelidae 3
family.4 Anhimidae 3
family.5 Anhingidae 4
family.6 Anseranatidae 1
family.7 Apterygidae 4
family.8 Aramidae 1
family.9 Artamidae 11
family.10 Atrichornithidae 2
family.11 Balaenicipitidae 1
family.12 Bombycillidae 8
family.13 Brachypteraciidae 5
family.14 Bucorvidae 2
family.15 Burhinidae 9
family.16 Callaeatidae 2
family.17 Cariamidae 2
family.18 Casuariidae 3
family.19 Cathartidae 7
family.20 Certhiidae 9
family.21 Chaetopidae 2
family.22 Chionidae 3
family.23 Cinclosomatidae 8
family.24 Climacteridae 6
family.25 Cnemophilidae 3
family.26 Coliidae 6
family.27 Conopophagidae 12
family.28 Coraciidae 12
family.29 Corcoracidae 2
family.30 Cracticidae 11
family.31 Dasyornithidae 3
family.32 Dromadidae 1
family.33 Dromaiidae 1
family.34 Dulidae 1
family.35 Eulacestomidae 1
family.36 Eupetidae 1
family.37 Eurypygidae 1
family.38 Falcunculidae 1
family.39 Formicariidae 10
family.40 Fregatidae 5
family.41 Gaviidae 5
family.42 Haematopodidae 11
family.43 Heliornithidae 3
family.44 Hemiprocnidae 3
family.45 Hylocitreidae 1
family.46 Ibidorhynchidae 1
family.47 Ifritidae 1
family.48 Jacanidae 8
family.49 Leptosomidae 1
family.50 Machaerirhynchidae 2
family.51 Melampittidae 2
family.52 Melanocharitidae 9
family.53 Menuridae 2
family.54 Mesitornithidae 3
family.55 Mohouidae 3
family.56 Motmotidae 10
family.57 Neosittidae 2
family.58 Notiomystidae 1
family.59 Numididae 6
family.60 Nyctibiidae 4
family.61 Opisthocomidae 1
family.62 Oreoicidae 3
family.63 Orthonychidae 3
family.64 Paramythidae 2
family.65 Pardalotidae 4
family.66 Pedionomidae 1
family.67 Pelecanidae 7
family.68 Phaethontidae 3
family.69 Philepittidae 3
family.70 Phoenicopteridae 6
family.71 Phoeniculidae 8
family.72 Picathartidae 2
family.73 Pityriaseidae 1
family.74 Podargidae 11
family.75 Polioptilidae 10
family.76 Pomatostomidae 4
family.77 Psophidae 3
family.78 Psophiidae 3
family.79 Recurvirostridae 9
family.80 Reguliidae 4
family.81 Rhagologidae 1
family.82 Rheidae 2
family.83 Rhipiduridae 10
family.84 Rhynochetidae 1
family.85 Rostratulidae 3
family.86 Sapayoaidae 1
family.87 Scopidae 1
family.88 Steatornithidae 1
family.89 Stercorariidae 8
family.90 Strigopoidea 3
family.91 Struthionidae 1
family.92 Sulidae 10
family.93 Thinocoridae 4
family.94 Thraupidae 1
family.95 Tichodromidae 1
family.96 Timaliidae 7
family.97 Todidae 5
family.98 Tytonidae 6
family.99 Upupidae 1
family.100 Urocynchramidae 1
family.101 Zosteropidae 1

Selecting the number of axes

How many dimensions do we need to represent at least 95% of the variance in each group

## Temporary change the empty levels to noclade
clades <- clades_list[, c(1)]
levels(clades)[1] <- "noclade"

## Make a dispRity object and get axes data
shape_axes <- select.axes(custom.subsets(shapespace, group = clades))
form_axes  <- select.axes(custom.subsets(formspace, group = clades))
variance per axes for the Shape space
PC1.var PC1.sum PC2.var PC2.sum PC3.var PC3.sum PC4.var PC4.sum PC5.var PC5.sum PC6.var PC6.sum PC7.var PC7.sum PC8.var PC8.sum PC9.var PC9.sum
noclade 0.504 0.504 0.339 0.843 0.078 0.921 0.025 0.945 0.015 0.961 0.008 0.969 0.010 0.979 0.006 0.984 0.003 0.987
Aequornithes 0.384 0.384 0.375 0.759 0.075 0.834 0.030 0.864 0.020 0.884 0.011 0.895 0.047 0.942 0.017 0.959 0.014 0.972
Columbimorphae 0.293 0.293 0.456 0.750 0.093 0.843 0.040 0.883 0.016 0.899 0.040 0.938 0.019 0.957 0.005 0.962 0.006 0.968
Eurypygimorphae 0.440 0.440 0.146 0.586 0.266 0.852 0.005 0.857 0.038 0.896 0.039 0.934 0.026 0.960 0.003 0.964 0.003 0.966
Galloanserae 0.253 0.253 0.401 0.653 0.141 0.795 0.118 0.912 0.032 0.944 0.011 0.955 0.009 0.964 0.010 0.974 0.004 0.978
Mirandornithes 0.124 0.124 0.337 0.461 0.040 0.501 0.313 0.814 0.089 0.902 0.006 0.908 0.057 0.965 0.017 0.982 0.004 0.986
Otidimorphae 0.401 0.401 0.396 0.797 0.090 0.887 0.051 0.938 0.013 0.951 0.021 0.972 0.007 0.979 0.004 0.983 0.003 0.986
Paleognathae 0.363 0.363 0.402 0.766 0.133 0.899 0.027 0.926 0.018 0.944 0.028 0.972 0.008 0.980 0.002 0.982 0.003 0.985
Strisores 0.712 0.712 0.226 0.938 0.024 0.961 0.024 0.985 0.003 0.988 0.004 0.992 0.002 0.994 0.002 0.996 0.001 0.997
Telluraves 0.511 0.511 0.341 0.851 0.073 0.924 0.021 0.945 0.014 0.959 0.014 0.974 0.008 0.982 0.004 0.986 0.003 0.989
whole_space 0.554 0.554 0.297 0.851 0.066 0.917 0.028 0.945 0.016 0.962 0.011 0.973 0.009 0.982 0.004 0.987 0.003 0.989
variance per axes for the Form space
PC1.var PC1.sum PC2.var PC2.sum PC3.var PC3.sum PC4.var PC4.sum PC5.var PC5.sum PC6.var PC6.sum PC7.var PC7.sum PC8.var PC8.sum
noclade 0.915 0.915 0.038 0.953 0.032 0.986 0.007 0.992 0.003 0.995 0.001 0.996 0.001 0.997 0.001 0.998
Aequornithes 0.951 0.951 0.012 0.963 0.020 0.983 0.008 0.990 0.002 0.992 0.001 0.993 0.001 0.994 0.003 0.997
Columbimorphae 0.792 0.792 0.061 0.853 0.099 0.952 0.015 0.967 0.009 0.977 0.003 0.979 0.008 0.988 0.004 0.991
Eurypygimorphae 0.424 0.424 0.272 0.696 0.097 0.793 0.129 0.923 0.006 0.929 0.013 0.942 0.021 0.963 0.014 0.978
Galloanserae 0.863 0.863 0.026 0.889 0.058 0.948 0.028 0.975 0.013 0.989 0.003 0.992 0.002 0.993 0.001 0.995
Mirandornithes 0.911 0.911 0.009 0.920 0.028 0.948 0.003 0.951 0.034 0.985 0.005 0.991 0.001 0.991 0.005 0.997
Otidimorphae 0.856 0.856 0.056 0.912 0.059 0.971 0.012 0.983 0.008 0.991 0.001 0.993 0.003 0.996 0.001 0.997
Paleognathae 0.928 0.928 0.022 0.951 0.032 0.982 0.011 0.993 0.002 0.995 0.001 0.996 0.002 0.998 0.001 0.999
Strisores 0.649 0.649 0.238 0.887 0.086 0.973 0.012 0.985 0.010 0.995 0.001 0.996 0.001 0.997 0.001 0.998
Telluraves 0.869 0.869 0.069 0.938 0.044 0.983 0.007 0.990 0.003 0.993 0.002 0.995 0.002 0.997 0.001 0.998
whole_space 0.892 0.892 0.059 0.951 0.033 0.985 0.006 0.991 0.003 0.994 0.002 0.996 0.001 0.997 0.001 0.998

Fig X1: variance per axes per superorder and order in the shapespace

Fig X1: variance per axes per superorder and order in the formspace

By analysing the distribution of the variance within each group, we will need 8 dimensions for the shape space and 7 for the form space.

Saving the data

Here we are going to group the spaces, trees and levels for subsequent levels (nested). There are 5 levels: Super order (1), Order (2), Sub-order (3), Super families (4) and families (5). The script above creates the model for the two first levels, the subsequent models create the mini-chains for the following overlapping pairs of models (order + sub-order; sub-order + super families; super families + families).

source("../Functions/prep.data.R")
## Getting the data ready for all birds
shapespace_allbirds_lvl_superorder_order <- prep.data(
                                          level = 0, lvl.inc = c(1,2),
                                          clades    = clades_list,
                                          space     = shapespace,
                                          dim       = shape_axes$dimensions,
                                          consensus = consensus_tree,
                                          trees     = trees_list,
                                          verbose   = TRUE)
## Separating the trees:.Done.
## Preparing the levels:.Done.
## Preparing the spaces:.Done.
save(shapespace_allbirds_lvl_superorder_order, file = "../Data/Processed/shapespace_allbirds_lvl_superorder_order.rda")

## Getting the data ready for each bird super order
shapespace_superorder_lvl_order_suborder <- prep.data(
                                          level = 1, lvl.inc = c(2,3),
                                          clades    = clades_list,
                                          space     = shapespace,
                                          dim       = shape_axes$dimensions,
                                          consensus = consensus_tree,
                                          trees     = trees_list,
                                          verbose   = TRUE)
## Separating the trees:........Done.
## Preparing the levels:........Done.
## Preparing the spaces:........Done.
save(shapespace_superorder_lvl_order_suborder, file = "../Data/Processed/shapespace_superorder_lvl_order_suborder.rda")

## Getting the data ready for each bird order
shapespace_order_lvl_suborder_superfam <- prep.data(
                                          level = 2, lvl.inc = c(3,4),
                                          clades    = clades_list,
                                          space     = shapespace,
                                          dim       = shape_axes$dimensions,
                                          consensus = consensus_tree,
                                          trees     = trees_list,
                                          verbose   = TRUE)
## Separating the trees:...........................Done.
## Preparing the levels:...........................Done.
## Preparing the spaces:...........................Done.
save(shapespace_order_lvl_suborder_superfam, file = "../Data/Processed/shapespace_order_lvl_suborder_superfam.rda")

## Getting the data ready for each bird sub order
shapespace_suborder_lvl_superfam_fam <- prep.data(
                                          level = 3, lvl.inc = c(4,5),
                                          clades    = clades_list,
                                          space     = shapespace,
                                          dim       = shape_axes$dimensions,
                                          consensus = consensus_tree,
                                          trees     = trees_list,
                                          verbose   = TRUE)
## Separating the trees:............Done.
## Preparing the levels:............Done.
## Preparing the spaces:............Done.
save(shapespace_suborder_lvl_superfam_fam, file = "../Data/Processed/shapespace_suborder_lvl_superfam_fam.rda")

## Getting the data ready for each bird super family
shapespace_superfam_lvl_fam <- prep.data(
                                          level = 4, lvl.inc = 5,
                                          clades    = clades_list,
                                          space     = shapespace,
                                          dim       = shape_axes$dimensions,
                                          consensus = consensus_tree,
                                          trees     = trees_list,
                                          verbose   = TRUE)
## Separating the trees:....Done.
## Preparing the levels:....Done.
## Preparing the spaces:....Done.
save(shapespace_superfam_lvl_fam, file = "../Data/Processed/shapespace_superfam_lvl_fam.rda")

## Get a list of all selected levels for the record (and number of elements per levels)
super_orders     <- shapespace_allbirds_lvl_superorder_order[[1]]$levels[[1]]
orders           <- shapespace_allbirds_lvl_superorder_order[[1]]$levels[[2]]
sub_orders       <- lapply(shapespace_order_lvl_suborder_superfam, function(x) x$levels[[1]])
names(sub_orders) <- NULL
sub_orders       <- unlist(sub_orders, recursive = FALSE)
super_families   <- lapply(shapespace_order_lvl_suborder_superfam, function(x) x$levels[[2]])
names(super_families) <- NULL
super_families   <- unlist(super_families, recursive = FALSE)
families         <- lapply(shapespace_superfam_lvl_fam, function(x) x$levels[[1]])
names(families) <- NULL
families         <- unlist(families, recursive = FALSE)

## Get the list of levels
levels_list <- list("super_orders"   = unlist(lapply(super_orders, length)),
                    "orders"         = unlist(lapply(orders, length)),
                    "sub_orders"     = unlist(lapply(sub_orders, length)),
                    "super_families" = unlist(lapply(super_families, length)),
                    "families"       = unlist(lapply(families, length)))
save(levels_list, file = "../Data/Processed/levels_list.rda")

References

Paradis, E., and K. Schliep. 2019. “Ape 5.0: An Environment for Modern Phylogenetics and Evolutionary Analyses in R.” Bioinformatics 35: 526–28.

R Core Team. 2021. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.