library(dplyr)
library(stringr)
library(ggplot2)
library(fuzzyjoin)
library(tidyr)
library(ape)
library(caper)
library(phangorn)
library(picante)
library(pbmcapply)
library(plyr)
library(ggpubr)
library(ggExtra)
library(coda)
library(geiger)
library(ggtree)
library(ggnewscale)

Statistical models

PGLS models presented in main text and supplementary material. Requires packages above and function to be loaded. Not presented in html format.

Figures

Figure 1

env_nest_size_total <- read.csv('env_nest_size_total_2022.csv')
mcc_tree=readRDS('mcc_tree10.rds')
comb_phy= env_nest_size_total %>% filter(location=="CONTINENTAL")
comb_phy$logrange=scale(log(comb_phy$range.size.m2))
comb_phy$nest_cat_cup2=as.factor(comb_phy$nest_three_cat)

#Filter information per family BLFamilyLatin#
comb_temp_fam = (comb_phy %>% group_by(BLFamilyLatin, nest_three_cat) %>% summarise_all(mean) %>% ungroup())[c('BLFamilyLatin','nest_three_cat','range.size.m2','PC1_T','PC1_P')]

comb_temp_fam$code=paste(comb_temp_fam$BLFamilyLatin,comb_temp_fam$nest_three_cat)
comb_names = (comb_phy %>% group_by(BLFamilyLatin, nest_three_cat) %>% sample_n(1))[c('TipLabel','BLFamilyLatin','nest_three_cat')]
comb_names$code=paste(comb_names$BLFamilyLatin,comb_names$nest_three_cat)

fam_data= merge(comb_temp_fam, comb_names, by='code')
fam_data$logrange= scale(log(fam_data$range.size.m2))

species=as.data.frame(unique(fam_data$TipLabel))
row.names(species)=species[,1]
temp <- name.check(mcc_tree, species)
tree.pruned <- drop.tip(mcc_tree, tip=temp$tree_not_data)

tree_only <- ggtree(tree.pruned,layout='circular' , color='gray60', lwd=1)  # + geom_tiplab(aes (color='blue'))

 ggtree(tree.pruned,layout='circular' , color='gray60', lwd=1) + geom_tiplab(size=1.5) 

both= fam_data[c('TipLabel','logrange','nest_three_cat.y')]
#Rescale limits of log range size so that looks good on figure
both= both %>% mutate(logrange = ifelse(logrange < -2.5, -2.5, logrange))
both= both %>% mutate(logrange = ifelse(logrange > 1.5, 1.5, logrange))

rownames(both)= both$TipLabel
both0=as.data.frame(both[,2])
rownames(both0)=fam_data$TipLabel

both1=as.data.frame(both[,3])
rownames(both1)=fam_data$TipLabel
both1$`both[, 3]`= factor(both1$`both[, 3]`, levels=c("Domed", "Open","Cavity"))

range <-  gheatmap(tree_only, both0, offset=0.1, colnames_offset_y = 0, width=0.05, colnames=FALSE) + scale_fill_gradient2( low ="#892B64", mid = "#00BACA", high = "#00BACA", midpoint = 1.3) 

p2 <- range + new_scale_fill()

both <- gheatmap(p2, both1, offset=3, width=0.05, colnames=FALSE) + scale_fill_manual(values=c("#93DA49","#892B64","#00BACA"))

both

Figure 2

building <- read.csv('nest_building.csv')

model_building <- readRDS('Model_nest_building_reduced.rds')
newdata_b <- as.data.frame(cbind(building$nest_category2, building$BodyMass.Value))
colnames(newdata_b) <- c('nest_category2','BodyMass.Value')
newdata_b$BodyMass.Value <- as.numeric(as.character(newdata_b$BodyMass.Value))
newdata_b$nest_category2 = factor(newdata_b$nest_category2, levels=c("Open", "Domed"))
pre_building <- predict.pgls(model_building, newdata_b)
building <- cbind(building, pre_building)
#building$nest_category2 = factor(building$nest_category2, levels=c("Domed", "Open"))

p5 <- ggplot(data= building, aes(x=log(BodyMass.Value), y= log(average.days.number), colour= nest_category2 )) + geom_point(size=2) + geom_line(aes(x=log(BodyMass.Value), y=pre_building), lwd=1.5) + scale_colour_manual(values=c("#892B64","#00BACA","#93DA49")) + theme_classic() + theme(legend.position='none', text = element_text(size=13)) + xlab('Body mass (log)') + ylab('Average days to construct (log)')

p5_ok <- ggMarginal(p5, groupColour= TRUE, groupFill=TRUE, margins = c("y"))

p5_ok

p6 <- ggplot() + geom_violin(data=building, aes(x=as.factor(who2), y=log(average.days.number)),fill="gray91", colour="gray91") + geom_jitter(data=building, aes(x=as.factor(who2), y=log(average.days.number), fill=nest_category2), alpha=0.8, width=0.1, shape=21, size=3) + theme_classic()  + scale_fill_manual(values=c("#892B64","#00BACA","#93DA49")) +
    stat_summary(data= building,aes(x=who2, y=log(average.days.number), fill=nest_category2),fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.3), width = 0.2, size=1) + stat_summary(data=building,aes(x=who2, y=log(average.days.number),fill=nest_category2),fun.y=mean, geom="point", shape=23, size=5, stroke=2,position = position_dodge(width = 0.3) ) + theme_classic() + scale_colour_manual(values=c("#892B64","#00BACA","#93DA49")) + theme(legend.position='none', text = element_text(size=13)) + xlab('Builder') + ylab('Average days to construct (log)')

ggarrange(p6, p5_ok)

model_building_100= readRDS('PGLS results/Model_nest_building100_2021.rds')
HPDinterval(as.mcmc(model_building_100[2:16]))
##                        lower        upper
## estimateDomed   0.3526677339  0.388126113
## ts1             3.1223224788  3.449328919
## ps1             0.0006518628  0.001989330
## estimatefemale  0.1373667843  0.165437416
## tP2             1.5056418877  1.818288968
## pP2             0.0701279564  0.133328095
## estimatemale   -0.3170967056 -0.233643192
## tP3            -1.5081122977 -1.122307546
## pP3             0.1326949853  0.262728742
## estimatelat     0.1139541928  0.131479124
## tP5             2.1431948637  2.614764889
## pP5             0.0094306346  0.032991194
## X1             -0.0040042806 -0.003299139
## X2             -1.4714017963 -1.211381134
## X3              0.1380033088  0.220971529
## attr(,"Probability")
## [1] 0.95

Figure 3 (First part)

env_nest_size_total$nest_three_cat = factor(env_nest_size_total$nest_three_cat, levels=c("Open", "Domed","Cavity"))

Model_range_c <- readRDS('PGLS results/MCC_Range_nest.logsize.lat.cont.q2.oct.cat2.R')

newdata_temp <- as.data.frame(cbind(as.character(env_nest_size_total$nest_three_cat)), env_nest_size_total$BodyMass.Value)

newdata_temp$BodyMass.Value <- as.numeric(as.character(env_nest_size_total$BodyMass.Value))
colnames(newdata_temp) <- c('nest_cat_cup2','BodyMass.Value')

#Create grid for prediction for small species
newdata_small <- expand.grid(nest_cat_cup2 = (env_nest_size_total$nest_three_cat), 
                            BodyMass.Value = 15)

newdata_small <- cbind(newdata_small, env_nest_size_total$Latitude)
colnames(newdata_small) <- c('nest_cat_cup2','BodyMass.Value','Latitude')


#Create grid for prediction for medium species
newdata_med <- expand.grid(nest_cat_cup2 = (env_nest_size_total$nest_three_cat), 
                            BodyMass.Value = 30)

newdata_med  <- cbind(newdata_med, env_nest_size_total$Latitude)
colnames(newdata_med) <- c('nest_cat_cup2','BodyMass.Value','Latitude')

#Create grid for prediction for large species
newdata_big <- expand.grid(nest_cat_cup2 = (env_nest_size_total$nest_three_cat), 
                            BodyMass.Value = 100)
newdata_big  <- cbind(newdata_big, env_nest_size_total$Latitude)
colnames(newdata_big) <- c('nest_cat_cup2','BodyMass.Value','Latitude')

#Predict the values using the dataset created above

pred_range_small=predict.pgls(Model_range_c, newdata=newdata_small,type="response", se.fit=TRUE, interval="prediction", level=0.95, it=NULL, posterior="all", verbose=TRUE)

pred_range_md=predict.pgls(Model_range_c, newdata=newdata_med,type="response", se.fit=TRUE, interval="prediction", level=0.95, it=NULL, posterior="all", verbose=TRUE)

pred_range_lg=predict.pgls(Model_range_c, newdata=newdata_big,type="response", se.fit=TRUE, interval="prediction", level=0.95, it=NULL, posterior="all", verbose=TRUE)

#Combine original dataset with the predicted values
env_nest_size_total_range <- env_nest_size_total %>% drop_na(Latitude)
comb_range= cbind(env_nest_size_total_range,pred_range_small,pred_range_lg,pred_range_md)

comb_range =  comb_range %>% 
  mutate(body_cat = ifelse( BodyMass.Value < 15,  "Small",
                     ifelse(15 < BodyMass.Value & BodyMass.Value < 30, "Medium", 
                    ifelse( 30 < BodyMass.Value & BodyMass.Value < 1000, "Large", 'NA')))) 

comb_range_sm= comb_range  %>% filter(body_cat == "Small") %>% filter(location == "CONTINENTAL")

p1 <- ggplot(comb_range_sm, aes(x=abs(Latitude), y=log(range.size.m2), colour=nest_three_cat)) + geom_point(alpha=0.5) + scale_fill_manual(values=c("#00BACA","#892B64","#93DA49")) + scale_colour_manual(values=c("#00BACA","#892B64","#93DA49")) + theme_classic() + theme(text = element_text(size=13))+ geom_line(aes(x=abs(Latitude), y=pred_range_small), lwd=1)+
  ylim(15,32) + theme(legend.position='none') +   xlab('Latitude (abs)') + ylab(bquote('Range size (log'~ m^2 ~ ')'))
  
p1_ok <- ggMarginal(p1, groupColour= TRUE, groupFill=TRUE,  margins = c("y"))
  
comb_range_md= comb_range  %>%  filter(body_cat == "Medium") %>% filter(location == "CONTINENTAL")

p2 <- ggplot(comb_range_md, aes(x=abs(Latitude), y=log(range.size.m2), colour=nest_three_cat)) + geom_point(alpha=0.5) +  scale_fill_manual(values=c("#00BACA","#892B64","#93DA49")) + scale_colour_manual(values=c("#00BACA","#892B64","#93DA49")) + theme_classic() + theme(text = element_text(size=13))+
  ylim(15,32) + theme(legend.position='none') + geom_line(aes(x=abs(Latitude), y=pred_range_md), lwd=1) + 
  xlab('Latitude (abs)') + ylab(bquote('Range size (log'~ m^2 ~ ')'))
  
p2_ok <- ggMarginal(p2, groupColour= TRUE, groupFill=TRUE,  margins = c("y"))

 
comb_range_lg= comb_range  %>%  filter(body_cat == "Large") %>% filter(location == "CONTINENTAL")


p3 <- ggplot(comb_range_lg, aes(x=abs(Latitude), y=log(range.size.m2), colour=nest_three_cat)) + geom_point(alpha=0.5) +  scale_fill_manual(values=c("#00BACA","#892B64","#93DA49")) + scale_colour_manual(values=c("#00BACA","#892B64","#93DA49")) + theme_classic() + theme(text = element_text(size=13))+
  ylim(15,32) + theme(legend.position='none') + geom_line(aes(x=abs(Latitude), y=pred_range_lg), lwd=1) +   xlab('Latitude (abs)') + ylab(bquote('Range size (log'~ m^2 ~ ')'))
  
p3_ok <- ggMarginal(p3, groupColour= TRUE, groupFill=TRUE, margins = c("y"))

ggarrange(p1_ok,p2_ok,p3_ok, nrow=1)

Figure 3 (complete)

env_nest_size_total$nest_three_cat = factor(env_nest_size_total$nest_three_cat, levels=c("Open", "Domed","Cavity"))

fam_graph = env_nest_size_total %>% filter(BLFamilyLatin == "Emberizidae" | BLFamilyLatin == "Alaudidae" | BLFamilyLatin == "Sylviidae" | BLFamilyLatin == "Muscicapidae" ) %>% filter(location == "CONTINENTAL")

p4_ok <- ggplot() + geom_violin(data=fam_graph, aes(x=as.factor(nest_three_cat), y=log(range.size.m2)),fill="gray91", colour="gray91") + geom_boxplot(data=fam_graph, aes(x=as.factor(nest_three_cat), y=log(range.size.m2)), fill=NA, colour="gray40") + geom_jitter(data=fam_graph, aes(x=as.factor(nest_three_cat), y=log(range.size.m2), fill=nest_three_cat), alpha=0.35, width=0.1, shape=21, size=3) + theme_classic() + scale_fill_manual(values=c("#00BACA","#892B64","#93DA49")) + scale_colour_manual(values=c("#00BACA","#892B64","#93DA49")) + facet_grid(.~BLFamilyLatin) + ylim(20,32) + theme(legend.position='none', text = element_text(size=13)) +
    xlab(NULL) + ylab(bquote('Range size (log'~ m^2 ~ ')'))

ggarrange(p4_ok,                                                 # First row with scatter plot
          ggarrange(p1_ok, p2_ok, p3_ok, ncol = 3, labels = c("B", "C","D")), # Second row with box and dot plots
          nrow = 2, 
          labels = "A"                                        # Labels of the scatter plot
          ) 

Additional fig

##### Detail within families

p6_ok <- ggplot() + geom_point(data=fam_graph, aes(x=abs(Latitude), y=log(range.size.m2), colour=nest_three_cat)) + geom_smooth(method='lm', se= FALSE, data=fam_graph, aes(x=abs(Latitude), y=log(range.size.m2), colour= nest_three_cat)) + scale_fill_manual(values=c("#00BACA","#892B64","#93DA49")) + theme_classic() + scale_colour_manual(values=c("#00BACA","#892B64","#93DA49")) + facet_grid(.~BLFamilyLatin) +
  theme(legend.position='none', text = element_text(size=13)) + ylab(bquote('Range size (log'~ m^2 ~ ')')) + xlab('Latitude (abs)')

  
p7_ok <- ggplot() +  geom_boxplot(data=fam_graph, aes(x=nest_three_cat, y=log(BodyMass.Value)), fill=NA, ) + geom_jitter(data=fam_graph, aes(x=as.factor(nest_three_cat), y=log(BodyMass.Value), fill=as.factor(nest_three_cat)), alpha=0.35, width=0.1, shape=21, size=3) + theme_classic() + scale_fill_manual(values=c("#00BACA","#892B64","#93DA49")) + scale_colour_manual(values=c("#00BACA","#892B64","#93DA49")) + facet_grid(.~BLFamilyLatin) +
  theme(legend.position='none', text = element_text(size=13)) + xlab('Nest type') + ylab('Body mass (log g)')

 ggarrange(p6_ok, p7_ok, ncol=1)