library(readxl)
library(phytools)
library(sensiPhy)
library(phylolm)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(car)
library(mvMORPH)
library(ellipse)
library(BAT)
library(ggtree)
library(stringr)

1) IMPORT & PREPARE DATA

Set working directory
#set working directory
#setwd()
Load data
#load Niphargus trait data
trait_data <- read_excel("Niphargus_morpho_data.xlsx", 
    col_types = c("text", "text", "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric"), na = "NA")

head(trait_data)
## # A tibble: 6 × 17
##   species        ecology body_…¹ pV_le…² pVI_l…³ pVII_…⁴ cxII_…⁵ cxIII…⁶ pVb_w…⁷
##   <chr>          <chr>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 N_alpheus_H    Cave l…    15.8    6.11    8.12    7.37    1.22    1.47    1.31
## 2 N_antipodes    Cave l…    27.8   12.2    15.1    14.0     2.41    2.89    2.13
## 3 N_arbiter_H    Cave l…    30.1   15.0    19.0    18.1     2.60    3.04    2.05
## 4 N_arethusa     Cave l…    13.7    5.31    7.06    6.41    1.09    1.26    1.14
## 5 N_balcanicus_H Cave l…    27.1   17.5    21.5    20.6     2.15    2.35    1.44
## 6 N_bilecanus    Cave l…    21.0   10.1    12.9    11.9     1.78    1.98    1.67
## # … with 8 more variables: pVIb_width <dbl>, pVIIb_width <dbl>, gnI_l <dbl>,
## #   gnI_w <dbl>, gnI_d <dbl>, gnII_l <dbl>, gnII_w <dbl>, gnII_d <dbl>, and
## #   abbreviated variable names ¹​body_length, ²​pV_length, ³​pVI_length,
## #   ⁴​pVII_length, ⁵​cxII_length, ⁶​cxIII_length, ⁷​pVb_width
#load Niphargus phylogenetic tree
tree <- read.nexus(file = "Niphargus_tree.nex")
Mutate and filter data
#compute gnathopod perimeters from three gnathopod measures (sum of gnathopod length, width and diagonal)
trait_data <- trait_data %>% rowwise() %>% mutate(gnI_perim = sum(gnI_l, gnI_w, gnI_d))
trait_data <- trait_data %>% rowwise() %>% mutate(gnII_perim = sum(gnII_l, gnII_w, gnII_d))
#remove excess columns
trait_data <- trait_data[,-c(12:17)]
#remove species with missing morphological data
#N gallicus, N patrizii, N virei C
trait_data <- trait_data %>% drop_na(c(4:13))

2) DATA ANALYSIS 1: PHYLOGENETIC PCA

2.1) Phylogenetic regression

Run phylogenetic regression: obtain residuals trait ~ body length

#remove species with unknown ecology
trait_data <- trait_data %>% drop_na(c(2))
#prune tree
species <- as.vector(trait_data$species)
tree <- keep.tip(phy = tree, tip = species)
#assign rownames
trait_data <- data.frame(trait_data, row.names = 1)

#iterate phylogenetic regression function through columns 
phylolm <- lapply(trait_data[,-c(1:2)], function(y){
  
        phylolm(formula = y ~ trait_data$body_length, 
                data = trait_data, phy = tree, 
                model = "BM", track = FALSE)
  
})

#extract residuals from lists
residuals <- lapply(phylolm, function (x) x[c("residuals")])

#convert to dataframe and assign original column names
residuals_df <- as.data.frame(residuals)
colnames(residuals_df) <- names(residuals)
#add back scaled body length column
body_length <- scale(trait_data$body_length)
residuals_df <- cbind(residuals_df, body_length)

rm(residuals, phylolm, body_length)

2.2) Phylogenetic PCA

Morphological traits were divided in three groups; “body shape” includes coxae II and III lengths and pereopod V-VII basis widths; “locomotion” includes pereopod V-VII lengths and body length, and “trophic position” includes gnathopod I and II perimeters.

#create data matrices for phylogenetic PCA 
shape <- as.matrix(residuals_df[,c(4:8)]) #Body shape
locom <- as.matrix(residuals_df[,c(1:3, 11)]) #Locomotion
tp <- as.matrix(residuals_df[,c(9,10)])#Trophic position
#run PCAs
shape_pca <- phyl.pca(tree = tree, shape, method = "BM", mode = "cov") 
locom_pca <- phyl.pca(tree = tree, locom, method = "BM", mode = "cov") 
tp_pca <- phyl.pca(tree = tree, tp, method = "BM", mode = "cov")

#save PC1 scores
pc1_scores <- as.data.frame(cbind(locom_pca$S[,1], shape_pca$S[,1], tp_pca$S[,1]))
colnames(pc1_scores) <- c("PC1_locom", "PC1_shape", "PC1_tp")
Body shape PCA results
#PCA diagnostics
summary(shape_pca) #PC1: explains 71% of variance
## Importance of components:
##                              PC1        PC2        PC3        PC4        PC5
## Standard deviation     0.1722207 0.08499009 0.04815304 0.04078591 0.02580628
## Proportion of Variance 0.7141567 0.17392404 0.05583036 0.04005377 0.01603518
## Cumulative Proportion  0.7141567 0.88808069 0.94391105 0.98396482 1.00000000
shape_pca$L #Loadings: PC1: negatively correlates with all parameters.
##                     PC1        PC2          PC3         PC4          PC5
## cxII_length  -0.7326387  0.6020932  0.311237465  0.03801735 -0.049093930
## cxIII_length -0.8769183  0.3236893 -0.340323027 -0.10171178  0.008631484
## pVb_width    -0.8678673 -0.3665770  0.179750955 -0.25402623  0.124851935
## pVIb_width   -0.8631714 -0.4537067 -0.004076993  0.04885167 -0.216060657
## pVIIb_width  -0.8836415 -0.1856570 -0.032271885  0.40083084  0.151665446
Locomotion PCA results
#PCA diagnostics
summary(locom_pca) #PC1: explains 82% of variance
## Importance of components:
##                              PC1       PC2        PC3         PC4
## Standard deviation     1.0549019 0.4446390 0.20517398 0.098497377
## Proportion of Variance 0.8168551 0.1451229 0.03090049 0.007121479
## Cumulative Proportion  0.8168551 0.9619780 0.99287852 1.000000000
locom_pca$L #Loadings: PC1: positively correlates with all parameters except body length.
##                     PC1          PC2          PC3         PC4
## pV_length    0.96884658 -0.012044599  0.220264507  0.11258236
## pVI_length   0.98986989  0.036845596  0.081029597 -0.11060834
## pVII_length  0.97024140 -0.004478462 -0.239527673  0.03518619
## body_length -0.05287479  0.998524627 -0.004821576  0.01138323
Trophic position PCA results
#PCA diagnostics
summary(tp_pca) #PC1: explains 94% of variance
## Importance of components:
##                              PC1        PC2
## Standard deviation     0.4330261 0.10512251
## Proportion of Variance 0.9443462 0.05565377
## Cumulative Proportion  0.9443462 1.00000000
tp_pca$L #Loadings: PC1: positively correlates with all parameters.
##                  PC1        PC2
## gnI_perim  0.9217241 -0.3878462
## gnII_perim 0.9903340  0.1387032
rm(shape, shape_pca, locom, locom_pca, tp, tp_pca)

3) DATA ANALYSIS 2: REGRESSIONS BETWEEN PCs

#add ecology column to pc1_scores dataframe
ecology <- as.data.frame(trait_data[,1])
colnames(ecology) <- "ecology"
rownames(ecology) <- rownames(trait_data)
pc1_scores <- merge(pc1_scores, ecology, by = "row.names")
rownames(pc1_scores) <- pc1_scores[,1]
pc1_scores[,1] <- NULL

3.1) Interstitial

Regression 1: PC1 tp ~ PC1 locomotion
int_reg1 <- lm(PC1_tp ~ PC1_locom, data = pc1_scores %>% filter(ecology == "Interstitial"))
#plot diagnostics
par(mfrow = c(2, 2))
plot(int_reg1)

Regression 2: PC1 tp ~ PC1 shape
int_reg2 <- lm(PC1_tp ~ PC1_shape, data = pc1_scores %>% filter(ecology == "Interstitial"))
#plot diagnostics
par(mfrow = c(2, 2))
plot(int_reg2) 

3.2) Cave lake

Regression 1: PC1 tp ~ PC1 locomotion
lake_reg1 <- lm(PC1_tp ~ PC1_locom, data = pc1_scores %>% filter(ecology == "Cave lake"))
#plot diagnostics
par(mfrow = c(2, 2))
plot(lake_reg1) 

Regression 2: PC1 tp ~ PC1 shape
lake_reg2 <- lm(PC1_tp ~ PC1_shape, data = pc1_scores %>% filter(ecology == "Cave lake"))
#plot diagnostics
par(mfrow = c(2, 2))
plot(lake_reg2) 

3.3) Cave stream

Regression 1: PC1 tp ~ PC1 locomotion
stream_reg1 <- lm(PC1_tp ~ PC1_locom, data = pc1_scores %>% filter(ecology == "Cave stream"))
#plot diagnostics
par(mfrow = c(2, 2))
plot(stream_reg1) 

Regression 2: PC1 tp ~ PC1 shape
stream_reg2 <- lm(PC1_tp ~ PC1_shape, data = pc1_scores %>% filter(ecology == "Cave stream"))
#plot diagnostics
par(mfrow = c(2, 2))
plot(stream_reg2) 

4) CORRELATIONS PLOTS & LM SUMMARIES

LM models summaries below each plot.

Interstitial species

Plot 1

theme_corr_plots <- theme_bw() + theme(axis.text = element_text(color = "black", size = 10),
          axis.title = element_text(color = "black", size = 10),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(data = pc1_scores %>% filter(ecology == "Interstitial"), 
       mapping = aes(x = PC1_locom, y = PC1_tp)) +
    geom_point(color = "#ff7f00", size = 3, alpha = 0.8) +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.5) +
    ylab("PC1 Trophic apparatus") +
    xlab("PC1 Locomotion") +
    scale_y_continuous(labels = label_number(accuracy = 0.1)) +
    scale_x_continuous(labels = label_number(accuracy = 0.1)) +
    theme_corr_plots
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("inter1.png", width = 8, height = 8, units = "cm")

LM summary

## 
## Call:
## lm(formula = PC1_tp ~ PC1_locom, data = pc1_scores %>% filter(ecology == 
##     "Interstitial"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94868 -0.19613  0.07935  0.25639  0.66956 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.14368    0.05983   2.401   0.0213 *  
## PC1_locom    0.24195    0.04442   5.447 3.26e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3782 on 38 degrees of freedom
## Multiple R-squared:  0.4385, Adjusted R-squared:  0.4237 
## F-statistic: 29.67 on 1 and 38 DF,  p-value: 3.261e-06

Plot2

ggplot(data = pc1_scores %>% filter(ecology == "Interstitial"), 
       mapping = aes(x = PC1_shape, y = PC1_tp)) +
    geom_point(color = "#ff7f00", size = 3, alpha = 0.8) +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.5) +
    ylab("PC1 Trophic apparatus") +
    xlab("PC1 Body shape")  +
    scale_y_continuous(labels = label_number(accuracy = 0.1)) +
    scale_x_continuous(labels = label_number(accuracy = 0.1)) +
    theme_corr_plots
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("inter2.png", width = 8, height = 8, units = "cm")

LM summary

## 
## Call:
## lm(formula = PC1_tp ~ PC1_shape, data = pc1_scores %>% filter(ecology == 
##     "Interstitial"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.50567 -0.15576 -0.01867  0.29413  0.81024 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.08172    0.08089   1.010   0.3188  
## PC1_shape    0.70465    0.29940   2.354   0.0239 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4715 on 38 degrees of freedom
## Multiple R-squared:  0.1272, Adjusted R-squared:  0.1043 
## F-statistic: 5.539 on 1 and 38 DF,  p-value: 0.02387
Cave lake species

Plot 1

ggplot(data = pc1_scores %>% filter(ecology == "Cave lake"), 
       mapping = aes(x = PC1_locom, y = PC1_tp)) +
    geom_point(color = "#4dae48", size = 3, alpha = 0.8) +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.5) +
    ylab("PC1 Trophic apparatus") +
    xlab("PC1 Locomotion")  +
    scale_y_continuous(labels = label_number(accuracy = 0.1)) +
    scale_x_continuous(labels = label_number(accuracy = 0.1)) +
    theme_corr_plots
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("lake1.png", width = 8, height = 8, units = "cm")

LM summary

## 
## Call:
## lm(formula = PC1_tp ~ PC1_locom, data = pc1_scores %>% filter(ecology == 
##     "Cave lake"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.58172 -0.63867  0.00952  0.63835  2.97807 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.24432    0.16864   1.449 0.153097    
## PC1_locom    0.16070    0.04233   3.796 0.000368 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.083 on 55 degrees of freedom
## Multiple R-squared:  0.2076, Adjusted R-squared:  0.1932 
## F-statistic: 14.41 on 1 and 55 DF,  p-value: 0.0003684

Plot 2

ggplot(data = pc1_scores %>% filter(ecology == "Cave lake"), 
       mapping = aes(x = PC1_shape, y = PC1_tp)) +
    geom_point(color = "#4dae48", size = 3, alpha = 0.8) +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.5, linetype = "dashed") +
    ylab("PC1 Trophic apparatus") +
    xlab("PC1 Body shape")  +
    scale_y_continuous(labels = label_number(accuracy = 0.1)) +
    scale_x_continuous(labels = label_number(accuracy = 0.1)) +
    theme_corr_plots
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("lake2.png", width = 8, height = 8, units = "cm")

LM summary

## 
## Call:
## lm(formula = PC1_tp ~ PC1_shape, data = pc1_scores %>% filter(ecology == 
##     "Cave lake"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1453 -1.0082 -0.1507  0.6564  3.5915 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.4058     0.2904   1.397    0.168
## PC1_shape    -0.2154     0.2974  -0.725    0.472
## 
## Residual standard error: 1.21 on 55 degrees of freedom
## Multiple R-squared:  0.009454,   Adjusted R-squared:  -0.008556 
## F-statistic: 0.5249 on 1 and 55 DF,  p-value: 0.4718
Cave stream species

Plot 1

ggplot(data = pc1_scores %>% filter(ecology == "Cave stream"), 
       mapping = aes(x = PC1_locom, y = PC1_tp)) +
    geom_point(color = "#7570b2", size = 3, alpha = 0.8) +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.5) +
    ylab("PC1 Trophic apparatus") +
    xlab("PC1 Locomotion")  +
    scale_y_continuous(labels = label_number(accuracy = 0.1)) +
    scale_x_continuous(labels = label_number(accuracy = 0.1)) +
    theme_corr_plots
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("stream1.png", width = 8, height = 8, units = "cm")

LM summary

## 
## Call:
## lm(formula = PC1_tp ~ PC1_locom, data = pc1_scores %>% filter(ecology == 
##     "Cave stream"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.28244 -0.26230 -0.07212  0.13518  2.47581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.32937    0.10859   3.033   0.0032 ** 
## PC1_locom    0.41630    0.03621  11.496   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6487 on 86 degrees of freedom
## Multiple R-squared:  0.6058, Adjusted R-squared:  0.6012 
## F-statistic: 132.2 on 1 and 86 DF,  p-value: < 2.2e-16

Plot 2

ggplot(data = pc1_scores %>% filter(ecology == "Cave stream"), 
       mapping = aes(x = PC1_shape, y = PC1_tp)) +
    geom_point(color = "#7570b2", size = 3, alpha = 0.8) +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.5) +
    ylab("PC1 Trophic apparatus") +
    xlab("PC1 Body shape")  +
    scale_y_continuous(labels = label_number(accuracy = 0.1)) +
    scale_x_continuous(labels = label_number(accuracy = 0.1)) +
    theme_corr_plots #+
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("stream2.png", width = 8, height = 8, units = "cm")

LM summary

## 
## Call:
## lm(formula = PC1_tp ~ PC1_shape, data = pc1_scores %>% filter(ecology == 
##     "Cave stream"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6615 -0.5630  0.1131  0.5328  2.8181 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.6120     0.0930  -6.581 3.50e-09 ***
## PC1_shape    -1.7394     0.2948  -5.899 7.02e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8717 on 86 degrees of freedom
## Multiple R-squared:  0.2881, Adjusted R-squared:  0.2798 
## F-statistic:  34.8 on 1 and 86 DF,  p-value: 7.019e-08
#remove objects
rm(list = ls(pattern = "reg"))

5) DATA ANALYSIS 3: FUNCTIONAL DIVERSITY

5.1) Functional diversity of real data and data simulated from phylogeny

Prepare tree and ecology data
#tree
tree <- ladderize(tree, right = FALSE)

#ecological data (categorical)
ecology_ggtree <- cbind(rownames(ecology), data.frame(ecology, row.names=NULL))
colnames(ecology_ggtree) <- c("id","ecology")
ecology_ggtree <- ecology_ggtree[match(tree$tip.label, ecology_ggtree$id),]  

ecology_v <- setNames(ecology$ecology, rownames(ecology))
ecology_v <- ecology_v[tree$tip.label]
ecology_v <- as.factor(ecology_v)

# simmap tree
tree_simmap <- make.simmap(tree, ecology_v, model = "ER", nsim = 1)
## make.simmap is sampling character histories conditioned on
## the transition matrix
## 
## Q =
##                Cave lake Cave stream Interstitial
## Cave lake    -0.02189916  0.01094958   0.01094958
## Cave stream   0.01094958 -0.02189916   0.01094958
## Interstitial  0.01094958  0.01094958  -0.02189916
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##    Cave lake  Cave stream Interstitial 
##    0.3333333    0.3333333    0.3333333
## Done.
plot(tree_simmap)
## no colors provided. using the following legend:
##    Cave lake  Cave stream Interstitial 
##      "black"    "#DF536B"    "#61D04F"

Run mvMORPH with different evolutionary models
#mvMorph 
model_BM1 <- mvBM(tree_simmap, pc1_scores[,1:3], model = "BM1")
## successful convergence of the optimizer 
## a reliable solution has been reached 
## 
## -- Summary results for multiple rate BM1 model -- 
## LogLikelihood:    -862.0074 
## AIC:      1742.015 
## AICc:     1742.345 
## 9 parameters 
## 
## Estimated rate matrix 
## ______________________ 
##             PC1_locom   PC1_shape      PC1_tp
## PC1_locom  1.10679809 -0.02597011  0.27866909
## PC1_shape -0.02597011  0.02950097 -0.01546998
## PC1_tp     0.27866909 -0.01546998  0.18650141
## 
## Estimated root state 
## ______________________ 
##           PC1_locom    PC1_shape       PC1_tp
## theta: 1.471726e-13 1.576491e-14 4.430104e-14
model_BMM <- mvBM(tree_simmap,pc1_scores[,1:3], model = "BMM")
## successful convergence of the optimizer 
## a reliable solution has been reached 
## 
## -- Summary results for multiple rate BMM model -- 
## LogLikelihood:    -725.222 
## AIC:      1492.444 
## AICc:     1494.178 
## 21 parameters 
## 
## Estimated rate matrix 
## ______________________ 
## , , Cave lake
## 
##             PC1_locom    PC1_shape       PC1_tp
## PC1_locom 2.560805868  0.005812747  0.570522123
## PC1_shape 0.005812747  0.065539056 -0.005769793
## PC1_tp    0.570522123 -0.005769793  0.358966311
## 
## , , Cave stream
## 
##             PC1_locom   PC1_shape      PC1_tp
## PC1_locom  0.58189434 -0.05522822  0.22551056
## PC1_shape -0.05522822  0.01405183 -0.02874662
## PC1_tp     0.22551056 -0.02874662  0.15627476
## 
## , , Interstitial
## 
##             PC1_locom    PC1_shape      PC1_tp
## PC1_locom  0.22418763 -0.025036448 0.020759933
## PC1_shape -0.02503645  0.011958860 0.001319363
## PC1_tp     0.02075993  0.001319363 0.010905559
## 
## 
## Estimated root state 
## ______________________ 
##           PC1_locom   PC1_shape      PC1_tp
## theta: -0.002195461 0.006893864 0.006533899
model_EB <- mvEB(tree,pc1_scores[,1:3])
## No lower bound provided. Use of default setting "  -0.2455886 "
##  successful convergence of the optimizer 
## a reliable solution has been reached 
## 
## -- Summary results for Early Burst or ACDC model -- 
## LogLikelihood:    -862.0074 
## AIC:      1744.015 
## AICc:     1744.419 
## 10 parameters 
## Rate change: 
## ______________________ 
## [1] 0
## 
## Estimated rate matrix 
## ______________________ 
##             PC1_locom   PC1_shape      PC1_tp
## PC1_locom  1.10680289 -0.02597233  0.27866979
## PC1_shape -0.02597233  0.02949963 -0.01546955
## PC1_tp     0.27866979 -0.01546955  0.18649803
## 
## Estimated root states 
## ______________________ 
##           PC1_locom    PC1_shape        PC1_tp
## theta -7.455359e-14 1.517188e-14 -3.038151e-15
model_OU1 <- mvOU(tree_simmap,pc1_scores[,1:3], model = "OU1")
## successful convergence of the optimizer 
## a reliable solution has been reached 
## 
## -- Summary results -- 
## LogLikelihood:    -747.2597 
## AIC:      1524.519 
## AICc:     1525.41 
## 15 parameters 
## 
## Estimated theta values 
## ______________________ 
##      PC1_locom   PC1_shape     PC1_tp
## OU1 -0.9455434 -0.09238072 -0.1935939
## 
## ML alpha values 
## ______________________ 
##             PC1_locom   PC1_shape      PC1_tp
## PC1_locom  0.15178304  0.04936579 -0.03170637
## PC1_shape  0.04936579  0.26578984 -0.04204308
## PC1_tp    -0.03170637 -0.04204308  0.22808950
## 
## ML sigma values 
## ______________________ 
##           PC1_locom   PC1_shape      PC1_tp
## PC1_locom 2.3651344  0.08340240  0.57086095
## PC1_shape 0.0834024  0.08116343 -0.02233941
## PC1_tp    0.5708609 -0.02233941  0.45024174
model_OUM <- mvOU(tree_simmap,pc1_scores[,1:3], model = "OUM")
## successful convergence of the optimizer 
## a reliable solution has been reached 
## 
## -- Summary results -- 
## LogLikelihood:    -693.459 
## AIC:      1428.918 
## AICc:     1430.652 
## 21 parameters 
## 
## Estimated theta values 
## ______________________ 
##               PC1_locom   PC1_shape     PC1_tp
## Cave lake     2.3207283 -0.86023392  0.5793776
## Cave stream  -2.4883501  0.04088197 -0.6648265
## Interstitial  0.1216496  0.11122264  0.1531405
## 
## ML alpha values 
## ______________________ 
##            PC1_locom  PC1_shape     PC1_tp
## PC1_locom 0.40009973 0.06196639 0.08043084
## PC1_shape 0.06196639 0.56972178 0.01696067
## PC1_tp    0.08043084 0.01696067 0.32829221
## 
## ML sigma values 
## ______________________ 
##           PC1_locom   PC1_shape      PC1_tp
## PC1_locom 4.5239039 0.148808942 1.504782112
## PC1_shape 0.1488089 0.124385134 0.008389482
## PC1_tp    1.5047821 0.008389482 0.864880255
# merge into list
models <- list(model_BM1,model_BMM,model_EB,model_OU1,model_OUM)
#choose the best model
aic <- aicw(models)
aic
## -- Akaike weights -- 
##               Rank  AIC  diff wi AICw
## OUM 5            1 1429   0.0  1    1
## BMM default 2    2 1492  63.5  0    0
## OU1 4            3 1525  95.6  0    0
## BM1 default 1    4 1742 313.1  0    0
## ACDC 3           5 1744 315.1  0    0
#run simulations with the best model (OUM)
simul_phylo <- mvSIM(tree_simmap, nsim = 999, param = model_OUM)
rm(list = ls(pattern = "model"))
rm(aic)
#check first df
head(simul_phylo[[1]])
##                    PC1_locom  PC1_shape     PC1_tp
## C_paradoxa         -4.890684  0.8297968 -1.6961831
## N_aberrans         -1.699157  0.8557482 -2.2034833
## N_aggtelekiensis_A -1.072010 -0.2323141  1.0212826
## N_aggtelekiensis_C -5.011859  0.1713023 -0.9891892
## N_aitolosi_H       -1.726677 -0.2209744 -0.1880719
## N_alisadri         -4.169260 -0.2763926 -0.3380913
Construct functional trees

First try test tree to see which method is the best (simulated data)

testtree <- tree.build(trait = simul_phylo[[1]], distance = "euclidean", func = "best")
## The best tree is given by bionj with a quality of 0.9934783
testtree
## 
## Phylogenetic tree with 185 tips and 183 internal nodes.
## 
## Tip labels:
##   C_paradoxa, N_aberrans, N_aggtelekiensis_A, N_aggtelekiensis_C, N_aitolosi_H, N_alisadri, ...
## 
## Unrooted; includes branch lengths.
#the best tree is given by neighbor-joining algorithm
ggtree(testtree, layout = "rectangular", branch.length = "none") + geom_tiplab(size = 1.5)

Construct 1000 functional trees from simulated data

simul_func_trees <- lapply(simul_phylo, function(x){
  tree.build(trait = x, distance = "euclidean", func = "nj")
})

Construct functional tree from real data

real_func_tree <- tree.build(trait = pc1_scores[,1:3], distance = "euclidean", func = "nj")

Create community x species matrix

ecology$species <- rownames(ecology)
row.names(ecology) <- NULL
ecology <- ecology[,c(2,1)]
comm_sp_matrix <- reshape2::acast(ecology, ecology ~ species)
## Using ecology as value column: use value.var to override.
comm_sp_matrix[is.na(comm_sp_matrix)] <- 0
comm_sp_matrix[comm_sp_matrix == "Interstitial" | comm_sp_matrix == "Cave stream" | comm_sp_matrix == "Cave lake"] <- 1
#add another row to a matrix; "community" comprising all species
All <- rep(1,185)
#bind matrix and vector together
comm_sp_matrix <- rbind(comm_sp_matrix, All)
class(comm_sp_matrix) <- "numeric"
rm(All)
Alpha diversity

Calculate simulated alpha functional diversity across trees

#check if matrix column names and tip labels are the same
setdiff(colnames(comm_sp_matrix),simul_func_trees[[1]]$tip.label)
## character(0)
#calculate alpha diversity
alpha_sim1 <- lapply(simul_func_trees, function(x) {alpha(comm = comm_sp_matrix, tree = x)})
#check first element
alpha_sim1[[1]]
##              Richness
## Cave lake    34.71043
## Cave stream  37.46115
## Interstitial 29.49507
## All          71.19194
#extract richness from list for every tree
alpha_sim_df1 <- as.data.frame(do.call(rbind, alpha_sim1))
alpha_sim_df1 <- tibble::rownames_to_column(alpha_sim_df1, "habitat")
#replace values
alpha_sim_df1$habitat <- str_replace_all(alpha_sim_df1$habitat,
                                         c("Cave.lake.\\d+" = "Cave lake",
                                           "Cave.lake" = "Cave lake",
                                           "Interstitial.\\d+" = "Interstitial",
                                           "Cave.stream.\\d+" = "Cave stream",
                                           "Cave.stream" = "Cave stream",
                                           "All.\\d+" = "All"))
unique(alpha_sim_df1$habitat)
## [1] "Cave lake"    "Cave stream"  "Interstitial" "All"

Calculate real alpha functional diversity

#calculate alpha diversity
alpha_real1 <- as.data.frame(alpha(comm = comm_sp_matrix, tree = real_func_tree))
alpha_real1 <- tibble::rownames_to_column(alpha_real1, "habitat")

Calculate p values

#p-value cave lake phylo null model
ses(alpha_real1[1,2], (alpha_sim_df1 %>% filter(habitat == "Cave lake"))$Richness)
##        ses    p-value 
## 1.80267409 0.07143941
#p-value cave stream phylo null model
ses(alpha_real1[2,2], (alpha_sim_df1 %>% filter(habitat == "Cave stream"))$Richness)
##           ses       p-value 
## -4.5868563655  0.0000044997
#p-value interstitial phylo null model
ses(alpha_real1[3,2], (alpha_sim_df1 %>% filter(habitat == "Interstitial"))$Richness)
##           ses       p-value 
## -4.037702e+00  5.397744e-05
#p-value all phylo null model
ses(alpha_real1[4,2], (alpha_sim_df1 %>% filter(habitat == "All"))$Richness)
##         ses     p-value 
## -1.96271685  0.04967908
Beta diversity
#drop row "all" from comm_sp_matrix 
comm_sp_matrix_beta <- comm_sp_matrix[-4,]
#calculate beta diversity
beta_sim1 <- lapply(simul_func_trees, function(x) {beta(comm = comm_sp_matrix_beta, tree = x, func = "jaccard", abund = FALSE)})
#check first element
beta_sim1[[1]]
## $Btotal
##              Cave lake Cave stream
## Cave stream  0.7876989            
## Interstitial 0.6948206   0.7618160
## 
## $Brepl
##              Cave lake Cave stream
## Cave stream  0.7414936            
## Interstitial 0.5888020   0.6145036
## 
## $Brich
##               Cave lake Cave stream
## Cave stream  0.04620531            
## Interstitial 0.10601857  0.14731233
beta_sim1[[1]]$Btotal
##              Cave lake Cave stream
## Cave stream  0.7876989            
## Interstitial 0.6948206   0.7618160

Extract data from lists Btotal1: pair cave stream-cave lake Btotal2: pair interstitial-cave lake Btotal3: pair interstitial-cave stream

#unlist
beta_sim_df1 <- lapply(beta_sim1, function(x){unlist(x)})
#get data for each pair of habitats
#cave stream-interstitial
stream_interstitial <- as.data.frame(sapply(beta_sim_df1, "[[", "Btotal3"))
colnames(stream_interstitial) <- "beta_total"
stream_interstitial$comparison <- "Cave stream - Interstitial"
#cave stream-lake
stream_lake <- as.data.frame(sapply(beta_sim_df1, "[[", "Btotal1"))
colnames(stream_lake) <- "beta_total"
stream_lake$comparison <- "Cave stream - Cave lake"
#interstitial-lake
interstitial_lake <- as.data.frame(sapply(beta_sim_df1, "[[", "Btotal2"))
colnames(interstitial_lake) <- "beta_total"
interstitial_lake$comparison <- "Interstitial - Cave lake"
#bind together
beta_sim_df1 <- rbind(interstitial_lake, stream_lake, stream_interstitial)

Average simulated beta diversity across communities

beta_sim_multi1 <- lapply(simul_func_trees, function(x){beta.multi(comm = comm_sp_matrix_beta, tree = x, func = "jaccard", abund = FALSE)})
#check first element
beta_sim_multi1[[1]]
##          Average     Variance
## Btotal 0.7481118 6.593230e-05
## Brepl  0.6482664 5.713276e-05
## Brich  0.0998454 8.799536e-06
#extract data
beta_sim_multi_df1 <- as.data.frame(do.call(rbind, beta_sim_multi1))
beta_sim_multi_df1 <- tibble::rownames_to_column(beta_sim_multi_df1, "beta_total")
beta_sim_multi_df1 <- beta_sim_multi_df1[,c(1,2)] %>%  filter(str_detect(beta_total, "Btotal"))
beta_sim_multi_df1$comparison <- "Average (all)"
beta_sim_multi_df1$beta_total <- NULL
colnames(beta_sim_multi_df1) <- c("beta_total", "comparison")
#bind together with other pairs
beta_sim_df1 <- rbind(beta_sim_df1, beta_sim_multi_df1)
rm(beta_sim_multi_df1, interstitial_lake, stream_lake, stream_interstitial)

Calculate real beta functional diversity

#calculate beta diversity
beta_real1 <- beta(comm = comm_sp_matrix_beta, tree = real_func_tree, func = "jaccard", abund = FALSE)
beta_real1
## $Btotal
##              Cave lake Cave stream
## Cave stream  0.8848304            
## Interstitial 0.8714144   0.8126775
## 
## $Brepl
##              Cave lake Cave stream
## Cave stream  0.7501522            
## Interstitial 0.4389855   0.4860192
## 
## $Brich
##              Cave lake Cave stream
## Cave stream  0.1346782            
## Interstitial 0.4324289   0.3266582

Average real beta diversity across communities

beta_real_multi1 <- beta.multi(comm = comm_sp_matrix_beta, tree = real_func_tree, func = "jaccard", abund = FALSE)
#keep only total and average
beta_real_multi1 <- as.data.frame(beta_real_multi1[1,1])
colnames(beta_real_multi1) <- "beta_total"
beta_real_multi1$comparison <- "Average (all)"
beta_real1 <- as.data.frame(unlist(beta_real1))
beta_real1 <- tibble::rownames_to_column(beta_real1, "beta_total")[1:3,]
beta_real1$comparison <- c("Cave stream - Cave lake", "Interstitial - Cave lake", "Cave stream - Interstitial")
beta_real1$beta_total <- NULL
colnames(beta_real1) <- c("beta_total", "comparison")
#bind together with other pairs
beta_real_df1 <- rbind(beta_real1, beta_real_multi1)
beta_real_df1$beta_total <- as.numeric(beta_real_df1$beta_total)

Calculate p values

#p-value stream-lake phylo null model
ses(beta_real_df1[1,1], (beta_sim_df1 %>% filter(comparison == "Cave stream - Cave lake"))$beta_total)
##       ses   p-value 
## 1.8644346 0.0622607
#p-value interstitial-lake phylo null model
ses(beta_real_df1[2,1], (beta_sim_df1 %>% filter(comparison == "Interstitial - Cave lake"))$beta_total)
##         ses     p-value 
## 3.051446742 0.002277414
#p-value stream-interstitial phylo null model
ses(beta_real_df1[3,1], (beta_sim_df1 %>% filter(comparison == "Cave stream - Interstitial"))$beta_total)
##        ses    p-value 
## 1.82323213 0.06826824
#p-value all phylo null model
ses(beta_real_df1[4,1], (beta_sim_df1 %>% filter(comparison == "Average (all)"))$beta_total)
##         ses     p-value 
## 3.263574636 0.001100162
rm(alpha_sim1, beta_real_multi1, beta_sim1, beta_sim_multi1, real_func_tree, simul_func_trees, simul_phylo, tree_simmap, ecology_ggtree, testtree)

5.2) Functional diversity of real data and data randomly drawn from real data

Create simulations from real data
#edit pc1_scores df
pc1_scores$species <- rownames(pc1_scores)
row.names(pc1_scores) <- NULL
pc1_scores <- pc1_scores[,c(5, 1:4)]
#create empty list
simul_list <- vector("list", length = 999)
#fill with the same dataframe
simul_list <- lapply(simul_list, function(x) {pc1_scores})
#randomize dataframes (keep fixed ecological groups)
simul_list <- lapply(simul_list, function(x){
  x <- x %>% group_by(ecology) %>% mutate(locom_sim = sample(PC1_locom)) %>% 
    mutate(shape_sim = sample(PC1_shape)) %>% 
    mutate(tp_sim = sample(PC1_tp))
})
#convert to dataframes
simul_list <- lapply(simul_list, function(x) {as.data.frame(x)})
#check
head(simul_list[[1]])
##              species  PC1_locom   PC1_shape      PC1_tp      ecology  locom_sim
## 1         C_paradoxa -1.0951157 -0.04740638 -0.03107836 Interstitial  1.1134747
## 2         N_aberrans  0.7999054  0.42399276  0.30402126 Interstitial -0.6524148
## 3 N_aggtelekiensis_A -2.9534231  0.15731941 -1.08962036  Cave stream -5.2550535
## 4 N_aggtelekiensis_C -0.8487497 -0.08765964  0.10829912  Cave stream -2.4968589
## 5       N_aitolosi_H -2.2424311  0.24299608 -0.58867775  Cave stream -0.6079968
## 6         N_alisadri -0.5702908 -0.23317864  0.46104493  Cave stream -1.4315946
##     shape_sim     tp_sim
## 1  0.53776592 -0.1568067
## 2  0.05381629  0.8700415
## 3 -0.32527909  0.2666107
## 4  0.38454291  0.4851179
## 5  0.15731941 -1.5608668
## 6 -0.01344145 -2.7302167
#species as rownames
simul_list <- lapply(simul_list, function(x) {rownames(x) <- x$species;x})
#remove unneccessary columns: species, real values and ecology
simul_list <- lapply(simul_list, function(x) {x <- x[,c(6:8)]})
#check
head(simul_list[[1]])
##                     locom_sim   shape_sim     tp_sim
## C_paradoxa          1.1134747  0.53776592 -0.1568067
## N_aberrans         -0.6524148  0.05381629  0.8700415
## N_aggtelekiensis_A -5.2550535 -0.32527909  0.2666107
## N_aggtelekiensis_C -2.4968589  0.38454291  0.4851179
## N_aitolosi_H       -0.6079968  0.15731941 -1.5608668
## N_alisadri         -1.4315946 -0.01344145 -2.7302167
Construct functional trees

First try test tree to see which method is best

testtree <- tree.build(trait = simul_list[[1]], distance = "euclidean", func = "best")
## The best tree is given by bionj with a quality of 0.9972073
testtree #the best tree is given by neighbor-joining algorithm
## 
## Phylogenetic tree with 185 tips and 183 internal nodes.
## 
## Tip labels:
##   C_paradoxa, N_aberrans, N_aggtelekiensis_A, N_aggtelekiensis_C, N_aitolosi_H, N_alisadri, ...
## 
## Unrooted; includes branch lengths.

Construct functional tree from simulated data

simul_func_trees <- lapply(simul_list, function(x){
  tree.build(trait = x, distance = "euclidean", func = "nj")
})

Construct functional tree from real data

rownames(pc1_scores) <- pc1_scores$species
pc1_scores <- pc1_scores[,c(-1,-5)]
real_func_tree <- tree.build(trait = pc1_scores, distance = "euclidean", func = "nj")
Alpha diversity

Calculate simulated alpha functional diversity across trees

#check if matrix column names and tip labels are the same
setdiff(colnames(comm_sp_matrix),simul_func_trees[[1]]$tip.label)
## character(0)
#calculate alpha diversity
alpha_sim2 <- lapply(simul_func_trees, function(x) {alpha(comm = comm_sp_matrix, tree = x)})
#check first element
alpha_sim2[[1]]
##              Richness
## Cave lake    37.72950
## Cave stream  36.60450
## Interstitial 18.09249
## All          76.28984
#extract richness from list for every tree
alpha_sim_df2 <- as.data.frame(do.call(rbind, alpha_sim2))
alpha_sim_df2 <- tibble::rownames_to_column(alpha_sim_df2, "habitat")
#replace values
alpha_sim_df2$habitat <- str_replace_all(alpha_sim_df1$habitat,
                                         c("Cave.lake.\\d+" = "Cave lake",
                                           "Cave.lake" = "Cave lake",
                                           "Interstitial.\\d+" = "Interstitial",
                                           "Cave.stream.\\d+" = "Cave stream",
                                           "Cave.stream" = "Cave stream",
                                           "All.\\d+" = "All"))
unique(alpha_sim_df2$habitat)
## [1] "Cave lake"    "Cave stream"  "Interstitial" "All"

Calculate p values

#p-value cave lake phylo null model
ses(alpha_real1[1,2], (alpha_sim_df2 %>% filter(habitat == "Cave lake"))$Richness)
##          ses      p-value 
## -2.827545301  0.004690638
#p-value cave stream phylo null model
ses(alpha_real1[2,2], (alpha_sim_df2 %>% filter(habitat == "Cave stream"))$Richness)
##           ses       p-value 
## -9.108432e+00  8.358099e-20
#p-value interstitial phylo null model
ses(alpha_real1[3,2], (alpha_sim_df2 %>% filter(habitat == "Interstitial"))$Richness)
##          ses      p-value 
## -2.639957143  0.008291651
#p-value all phylo null model
ses(alpha_real1[4,2], (alpha_sim_df2 %>% filter(habitat == "All"))$Richness)
##           ses       p-value 
## -8.674242e+00  4.163155e-18
Beta diversity

Calculate simulated beta functional diversity

#drop row "all" from comm_sp_matrix 
comm_sp_matrix_beta <- comm_sp_matrix[-4,]
#calculate beta diversity
beta_sim2 <- lapply(simul_func_trees, function(x) {beta(comm = comm_sp_matrix_beta, tree = x, func = "jaccard", abund = FALSE)})
#check first element
beta_sim2[[1]]
## $Btotal
##              Cave lake Cave stream
## Cave stream  0.9216298            
## Interstitial 0.8629807   0.8508365
## 
## $Brepl
##              Cave lake Cave stream
## Cave stream  0.9053093            
## Interstitial 0.4630011   0.4619062
## 
## $Brich
##               Cave lake Cave stream
## Cave stream  0.01632052            
## Interstitial 0.39997958  0.38893035

Extract data from lists Btotal1: pair cave stream-cave lake Btotal2: pair interstitial-cave lake Btotal3: pair interstitial-cave stream

#unlist
beta_sim_df2 <- lapply(beta_sim2, function(x){unlist(x)})
#get data for each pair of habitats
#cave stream-interstitial
stream_interstitial <- as.data.frame(sapply(beta_sim_df2, "[[", "Btotal3"))
colnames(stream_interstitial) <- "beta_total"
stream_interstitial$comparison <- "Cave stream - Interstitial"
#cave stream-lake
stream_lake <- as.data.frame(sapply(beta_sim_df2, "[[", "Btotal1"))
colnames(stream_lake) <- "beta_total"
stream_lake$comparison <- "Cave stream - Cave lake"
#interstitial-lake
interstitial_lake <- as.data.frame(sapply(beta_sim_df2, "[[", "Btotal2"))
colnames(interstitial_lake) <- "beta_total"
interstitial_lake$comparison <- "Interstitial - Cave lake"
#bind together
beta_sim_df2 <- rbind(stream_interstitial, stream_lake, interstitial_lake)

Average simulated beta diversity across communities

beta_sim_multi2 <- lapply(simul_func_trees, function(x){beta.multi(comm = comm_sp_matrix_beta, tree = x, func = "jaccard", abund = FALSE)})
#check first element
beta_sim_multi2[[1]]
##          Average     Variance
## Btotal 0.8784824 7.742206e-05
## Brepl  0.6100722 5.376665e-05
## Brich  0.2684102 2.365542e-05
#extract data
beta_sim_multi_df2 <- as.data.frame(do.call(rbind, beta_sim_multi2))
beta_sim_multi_df2 <- tibble::rownames_to_column(beta_sim_multi_df2, "beta_total")
beta_sim_multi_df2 <- beta_sim_multi_df2[,c(1,2)] %>%  filter(str_detect(beta_total, "Btotal"))
beta_sim_multi_df2$comparison <- "Average (all)"
beta_sim_multi_df2$beta_total <- NULL
colnames(beta_sim_multi_df2) <- c("beta_total", "comparison")
#bind together with other pairs
beta_sim_df2 <- rbind(beta_sim_df2, beta_sim_multi_df2)
rm(beta_sim_multi_df2, stream_interstitial, stream_lake, interstitial_lake)

Calculate p values

#p-value stream-lake phylo null model
ses(beta_real_df1[1,1], (beta_sim_df2 %>% filter(comparison == "Cave stream - Cave lake"))$beta_total)
##         ses     p-value 
## -1.79321084  0.07293922
#p-value interstitial-lake phylo null model
ses(beta_real_df1[2,1], (beta_sim_df2 %>% filter(comparison == "Interstitial - Cave lake"))$beta_total)
##       ses   p-value 
## 0.6847664 0.4934914
#p-value stream-interstitial phylo null model
ses(beta_real_df1[3,1], (beta_sim_df2 %>% filter(comparison == "Cave stream - Interstitial"))$beta_total)
##        ses    p-value 
## -0.6612373  0.5084602
#p-value all phylo null model
ses(beta_real_df1[4,1], (beta_sim_df2 %>% filter(comparison == "Average (all)"))$beta_total)
##        ses    p-value 
## -0.8077298  0.4192462

Plots

Alpha diversity

#combine data in single df
alpha_sim_df1$simul <- "Phylogenetic null model"
alpha_sim_df2$simul <- "Non-phylogenetic null model"
alpha_sim_df <- rbind(alpha_sim_df1, alpha_sim_df2)
quantiles_a_phy <- alpha_sim_df1 %>% group_by(habitat) %>% 
  summarise(int = quantile(Richness, c(0.025, 0.975)))
## `summarise()` has grouped output by 'habitat'. You can override using the
## `.groups` argument.
quantiles_a_nonphy <- alpha_sim_df2 %>% group_by(habitat) %>% 
  summarise(int = quantile(Richness, c(0.025, 0.975))) 
## `summarise()` has grouped output by 'habitat'. You can override using the
## `.groups` argument.
ggplot() + 
  geom_density(data = alpha_sim_df %>% filter(simul == "Phylogenetic null model"), 
              mapping = aes(x = Richness, fill = habitat), 
              alpha = 0.5, color = NA) +
  geom_density(data = alpha_sim_df %>% filter(simul == "Non-phylogenetic null model"), 
               mapping = aes(x = Richness, color = habitat), alpha = 1) +
  geom_point(data = alpha_real1, 
             mapping = aes(x = Richness, y = 0, fill = habitat), shape = 21, size = 3) + 

  scale_fill_manual(values = c("grey40", "#4dad47", "#7570b1","#f37d20")) +
  scale_color_manual(values = c("grey40", "#4dad47", "#7570b1","#f37d20")) +

  facet_wrap(~habitat, scales = "fixed", nrow = 4, ncol = 1) +
  geom_vline(data = quantiles_a_phy, aes(xintercept = int, color = habitat), alpha = 0.7) +
  geom_vline(data = quantiles_a_nonphy, aes(xintercept = int, color = habitat), 
             linetype = "dashed") +
  
  xlab(expression(alpha*-functional~diversity~(richness))) +
  ylab("Density") +
  theme_bw() +
  theme(axis.text = element_text(color = "black"),
        legend.position = "none",
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white"))

#ggsave("sim_alpha_density.svg", plot = last_plot(), dpi = 300, width = 4, height = 8)

Beta diversity

#combine data in single df
beta_sim_df1$simul <- "Phylogenetic null model"
beta_sim_df2$simul <- "Non-phylogenetic null model"
beta_sim_df <- rbind(beta_sim_df1, beta_sim_df2)
quantiles_b_phy <- beta_sim_df1 %>% group_by(comparison) %>% 
  summarise(int = quantile(beta_total, c(0.025, 0.975)))
## `summarise()` has grouped output by 'comparison'. You can override using the
## `.groups` argument.
quantiles_b_nonphy <- beta_sim_df2 %>% group_by(comparison) %>% 
  summarise(int = quantile(beta_total, c(0.025, 0.975))) 
## `summarise()` has grouped output by 'comparison'. You can override using the
## `.groups` argument.
ggplot() + 
  geom_density(data = beta_sim_df %>% filter(simul == "Phylogenetic null model"), 
              mapping = aes(x = beta_total), alpha = 0.5, fill = "grey40", color = NA) +
  
  geom_density(data = beta_sim_df %>% filter(simul == "Non-phylogenetic null model"), 
               mapping = aes(x = beta_total), alpha = 1) +
  geom_point(data = beta_real_df1, 
             mapping = aes(x = beta_total, y = 0), shape = 21, size = 3) + 

 facet_wrap(~comparison, scales = "fixed", nrow = 4, ncol = 1) +
  
  geom_vline(data = quantiles_b_phy, aes(xintercept = int), alpha = 0.7, color = "grey40") +
  geom_vline(data = quantiles_b_nonphy, aes(xintercept = int), linetype = "dashed", color = "black") +
  
  xlab(expression(beta*-functional~diversity~(total))) +
  ylab("Density") +
  theme_bw() +
  theme(axis.text = element_text(color = "black"),
        legend.position = "none",
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white"))

#ggsave("sim_beta_density.svg", plot = last_plot(), dpi = 300, width = 4, height = 8)
#colors to this plot were added in Adobe Illustrator
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Slovenian_Slovenia.utf8  LC_CTYPE=Slovenian_Slovenia.utf8   
## [3] LC_MONETARY=Slovenian_Slovenia.utf8 LC_NUMERIC=C                       
## [5] LC_TIME=Slovenian_Slovenia.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] stringr_1.5.0  ggtree_3.4.4   BAT_2.9.2      ellipse_0.4.3  mvMORPH_1.1.6 
##  [6] subplex_1.8    corpcor_1.6.10 car_3.1-1      carData_3.0-5  scales_1.2.1  
## [11] tidyr_1.2.1    dplyr_1.0.10   sensiPhy_0.8.5 ggplot2_3.4.0  phylolm_2.6.2 
## [16] phytools_1.2-0 maps_3.4.1     ape_5.6-2      readxl_1.4.1  
## 
## loaded via a namespace (and not attached):
##   [1] spam_2.9-1              fastmatch_1.1-3         plyr_1.8.8             
##   [4] igraph_1.3.5            lazyeval_0.2.2          sp_1.6-0               
##   [7] splines_4.2.2           listenv_0.9.0           digest_0.6.30          
##  [10] yulab.utils_0.0.6       foreach_1.5.2           htmltools_0.5.4        
##  [13] glassoFast_1.0          fansi_1.0.3             magrittr_2.0.3         
##  [16] optimParallel_1.0-2     cluster_2.1.4           doParallel_1.0.17      
##  [19] ks_1.14.0               recipes_1.0.4           globals_0.16.2         
##  [22] fastcluster_1.2.3       gower_1.0.1             hardhat_1.2.0          
##  [25] timechange_0.2.0        pdist_1.2.1             prettyunits_1.1.1      
##  [28] colorspace_2.1-0        xfun_0.36               crayon_1.5.2           
##  [31] jsonlite_1.8.4          survival_3.5-0          phangorn_2.11.1        
##  [34] iterators_1.0.14        glue_1.6.2              gtable_0.3.1           
##  [37] ipred_0.9-13            geiger_2.0.10           future.apply_1.10.0    
##  [40] abind_1.4-5             mvtnorm_1.1-3           DBI_1.1.3              
##  [43] Rcpp_1.0.9              plotrix_3.8-2           progress_1.2.2         
##  [46] palmerpenguins_0.1.1    magic_1.6-1             tidytree_0.4.2         
##  [49] gridGraphics_0.5-1      proxy_0.4-27            mclust_6.0.0           
##  [52] deSolve_1.34            dotCall64_1.0-2         stats4_4.2.2           
##  [55] lava_1.7.1              prodlim_2019.11.13      ellipsis_0.3.2         
##  [58] farver_2.1.1            pkgconfig_2.0.3         nnet_7.3-18            
##  [61] sass_0.4.5              utf8_1.2.2              caret_6.0-93           
##  [64] labeling_0.4.2          ggplotify_0.1.0         tidyselect_1.2.0       
##  [67] rlang_1.0.6             reshape2_1.4.4          munsell_0.5.0          
##  [70] pbmcapply_1.5.1         cellranger_1.1.0        tools_4.2.2            
##  [73] cachem_1.0.6            cli_3.4.0               generics_0.1.3         
##  [76] evaluate_0.20           geometry_0.4.6.1        fastmap_1.1.0          
##  [79] yaml_2.3.6              ModelMetrics_1.2.2.2    knitr_1.42             
##  [82] purrr_1.0.1             future_1.30.0           nlme_3.1-161           
##  [85] aplot_0.1.9             pracma_2.4.2            compiler_4.2.2         
##  [88] rstudioapi_0.14         e1071_1.7-12            treeio_1.20.2          
##  [91] clusterGeneration_1.3.7 tibble_3.1.8            bslib_0.4.2            
##  [94] stringi_1.7.8           highr_0.10              rgeos_0.6-1            
##  [97] lattice_0.20-45         Matrix_1.5-3            permute_0.9-7          
## [100] vegan_2.6-4             vctrs_0.5.0             pillar_1.8.1           
## [103] lifecycle_1.0.3         combinat_0.0-8          jquerylib_0.1.4        
## [106] data.table_1.14.6       raster_3.6-14           patchwork_1.1.2        
## [109] R6_2.5.1                KernSmooth_2.23-20      parallelly_1.34.0      
## [112] codetools_0.2-18        MASS_7.3-58.2           assertthat_0.2.1       
## [115] proto_1.0.0             withr_2.5.0             mnormt_2.1.1           
## [118] mgcv_1.8-41             expm_0.999-7            parallel_4.2.2         
## [121] hms_1.1.2               terra_1.7-3             ggfun_0.0.9            
## [124] quadprog_1.5-8          grid_4.2.2              rpart_4.1.19           
## [127] timeDate_4022.108       coda_0.19-4             class_7.3-21           
## [130] rmarkdown_2.20          nls2_0.3-3              hypervolume_3.1.0      
## [133] pROC_1.18.0             numDeriv_2016.8-1.1     scatterplot3d_0.3-42   
## [136] lubridate_1.9.1