library(rsq)

library(rgdal)

library(pscl)

library(dominanceanalysis)

library(lmtest)

library(sandwich)

library(parameters)

library(effectsize)

library(spdep)

library(rgeos)

library(spatialreg)

library(olsrr)

library(dispmod)

library(gridExtra)

library(viridis)

library(lattice)

library(latticeExtra)

library(factoextra)

library(grid)

library(FactoMineR)

library(FSA)

library(raster)




# Load Data Matrix

data2=read.csv(file.choose(),header=T, sep=";")

head(data2)

data2=data2[,-1]

head(data2)




######################## Preparing the Dataset #######################


data3=data2[complete.cases(data2[,2:22]),]


# addressing pairwise variable correlations

cor(data3[,c(2:20,22)], use="pairwise.complete.obs")


## Calculating Tolerance for the most correlated predictors of Megafauna Indices to evaluate Multicollinearity 

## Separate analyses will be carried to MAR and pH as they were already found to be highly correlated

# for MAR

summary(lm(INSUL~MAT+MAR+RS+CEC+SND+FF+FI, data3))

summary(lm(MAT~INSUL+MAR+RS+CEC+SND+FF+FI, data3))

summary(lm(MAR~MAT+INSUL+RS+CEC+SND+FF+FI, data3))

summary(lm(RS~MAT+INSUL+MAR+CEC+SND+FF+FI, data3))

summary(lm(CEC~RS+MAT+INSUL+MAR+SND+FF+FI, data3))

summary(lm(SND~CEC+RS+MAT+INSUL+MAR+FF+FI, data3))

summary(lm(FF~SND+CEC+RS+MAT+INSUL+MAR+FI, data3))

summary(lm(FI~FF+SND+CEC+RS+MAT+INSUL+MAR, data3))


# for pH

summary(lm(INSUL~MAT+pH+RS+CEC+SND+FF+FI, data3))

summary(lm(MAT~INSUL+pH+RS+CEC+SND+FF+FI, data3))

summary(lm(pH~MAT+INSUL+RS+CEC+SND+FF+FI, data3))

summary(lm(RS~MAT+INSUL+pH+CEC+SND+FF+FI, data3))

summary(lm(CEC~RS+MAT+INSUL+pH+SND+FF+FI, data3))

summary(lm(SND~CEC+RS+MAT+INSUL+pH+FF+FI, data3))

summary(lm(FF~SND+CEC+RS+MAT+INSUL+pH+FI, data3))

summary(lm(FI~FF+SND+CEC+RS+MAT+INSUL+pH, data3))

# no evidence of multicollinearity for the remaining variables


# prepare sets of polygon neighbourhood objects to test for spatial autocorrelation in the residuals and, if needed, to perform spatial regression

# with islands (for megafauna metrics)

shape = readOGR(dsn = "D:/Work/Ongoing/Projects/Neotropical Large Herbivores and Ecorregions/Ecoregions2017")

neot=shape[shape$REALM=="Neotropic",]

polyneight<-poly2nb(neot)

polyneight

neilistw=nb2listw(polyneight,zero.policy = TRUE)# the argument zero.policy = TRUE is needed because of islands

# match ecoregion column name in "data3" and "neot"
 
names(data3)[1]="ECO_NAME"

polyneight3 <- subset(polyneight, neot$ECO_NAME%in%data3$ECO_NAME)

neilistw3 <- nb2listw(polyneight3,zero.policy = TRUE)





############################### Regressions ############################


######################### Preditors of Megafauna ###########################

## Mrich

# with MAR

step(lm(Mrich~INSUL+MAT+MAR+RS+CEC+SND+FF+FI,data3))

summary(lm(Mrich~INSUL+MAR+RS+CEC+SND+FF,data3))

mega1=lm(Mrich~INSUL+MAR+RS+CEC+SND+FF,data3)



#with pH

step(lm(Mrich~INSUL+MAT+RS+CEC+SND+pH+FF+FI,data3))

summary(lm(Mrich~INSUL+RS+CEC+SND+FF,data3))

summary(lm(Mrich~INSUL+RS+CEC+FF,data3))

mega2=lm(Mrich~INSUL+RS+CEC+FF,data3)


# which model is best?

AIC(mega1)

AIC(mega2)

# mega1 has the lowest AIC

summary(mega1)

# are residuals normally distributed in at least one test?

ols_test_normality(mega1)

# yes

# are the residuals homoscedastic?

bptest(mega1)

# no

# are the significant association robust?

coeftest(mega1, vcov = vcovHC(mega1))

# yes

# do the residuals show spatial autocorrelation?

lm.morantest(mega1, listw=neilistw3, zero.policy=T) 

# no spatial autocorrelation detected!

# Thus, let's get the main results

AIC(mega1)-AIC(lm(Mrich~1,data3))

mega1=lm(Mrich~INSUL+MAR+RS+CEC+SND+FF,data3)

# mean R2 preditor variable contribution

averageContribution(dominanceAnalysis(mega1))

summary(mega1)

# number of observations

dim(data3)[1]-sum(is.na(data3$Mrich))

params1<-parameters(mega1)

t_to_r(params1$t[-1], n=157,df_error = params1$df_error[-1])

params1

standardize_parameters(mega1,method="basic",robust=F)








##Mbm

#with MAR

step(lm(Mbm~INSUL+MAT+MAR+RS+CEC+SND+FF+FI,data3))

summary(lm(Mbm~INSUL+MAR+RS+CEC+FF,data3))

summary(lm(Mbm~INSUL+MAR+RS+CEC,data3))

mega3=lm(Mbm~INSUL+MAR+RS+CEC,data3)




# with pH

step(lm(Mbm~INSUL+MAT+RS+CEC+SND+pH+FF+FI,data3))

summary(lm(Mbm~INSUL + RS + CEC + FF,data3))

summary(lm(Mbm~INSUL + RS + CEC,data3))

mega4=lm(Mbm~INSUL + RS + CEC,data3)



# which is best?

AIC(mega3)

AIC(mega4)


# mega3 is best

ols_test_normality(mega3)

# not normal

mega5=lm(log(Mbm+1)~INSUL+MAR+RS+CEC,data3)

ols_test_normality(mega5)

# solved (Kolmogorov-Smirnov non-significant)

bptest(mega5)

# heteroscedasticity detected in the residuals. Are the results robust?

# are the significances robust?

coeftest(mega5, vcov = vcovHC(mega5))

# yes

# is there spatial autocorrelation in the residuals?

lm.morantest(mega5, listw=neilistw3, zero.policy=T)

# no spatial autocorrelation in the residuals

summary(mega5)

AIC(mega5)-AIC(lm(log(Mbm)~1,data3))

averageContribution(dominanceAnalysis(mega5))

dim(data3)[1]-sum(is.na(data3$Mbm))

length(mega5$residuals)

params3<-parameters(mega5)

t_to_r(params3$t[-1], n=157,df_error = params3$df_error[-1])

params3

standardize_parameters(mega5,method="basic",robust=F)





# Grazers

# MAR

step(lm(MGrich~INSUL+MAT+MAR+RS+CEC+SND+FF+FI,data3))

summary(lm(MGrich~INSUL+MAR+RS+CEC+SND+FF+FI,data3))

summary(lm(MGrich~INSUL+MAR+RS+CEC+SND+FF,data3))

gmod=lm(MGrich~INSUL+MAR+RS+CEC+SND+FF,data3)






# pH

step(lm(MGrich~INSUL+MAT+pH+RS+CEC+SND+FF+FI,data3))

summary(lm(MGrich~INSUL + MAT + pH + RS + CEC + SND + FF + FI,data3))

summary(lm(MGrich~INSUL + MAT + pH + RS + CEC + SND + FF,data3))





# which is best?

AIC(lm(MGrich~INSUL + MAT + pH + RS + CEC + SND + FF,data3))

AIC(gmod)

# gmod

ols_test_normality(gmod)

bptest(gmod)

gmod.1=lm(log(MGrich+1)~INSUL+SND+MAR+RS+CEC+FF,data3)

bptest(gmod.1)

# did not solved

# are predictors robust?

coeftest(gmod, vcov = vcovHC(gmod))

# sand is not significant

gmod.2=lm(MGrich~INSUL+MAR+RS+CEC+FF,data3)

coeftest(gmod.2, vcov = vcovHC(gmod.2))

# now, all predictors a robust


# are residuals spatially autocorrelated

lm.morantest(gmod.2, listw=neilistw3, zero.policy=T)

# no spatial autocorrelation in the residuals

summary(gmod.2)

AIC(gmod.2)-AIC(lm(MGrich~1,data3))

dim(data3)[1]-sum(is.na(data3$MGrich))

length(is.na(residuals(gmod.2))==F)

averageContribution(dominanceAnalysis(gmod.2))

params4<-parameters(gmod.2)

t_to_r(params4$t[-1], n=157,df_error = params4$df_error[-1])

params4

standardize_parameters(gmod.2,method="basic",robust=F)





# Browsers

# MAR

step(lm(MBrich~INSUL+MAT+MAR+RS+CEC+SND+FF+FI,data3))

summary(lm(MBrich~INSUL+MAR+RS+CEC+SND+FF+FI,data3))

bmod=lm(MBrich~INSUL+MAR+RS+CEC+SND+FF+FI,data3)

summary(bmod)



# pH

step(lm(MBrich~INSUL+MAT+pH+RS+CEC+SND+FF+FI,data3))

summary(lm(MBrich~INSUL + MAT + RS + CEC + SND + FF + FI,data3))

summary(lm(MBrich~INSUL + MAT + RS + CEC + SND + FF,data3))


# which is best?

AIC(lm(MBrich~INSUL + MAT + RS + CEC + SND + FF,data3))

AIC(bmod)

# bmod is the best

ols_test_normality(bmod)

# marginally significant, let't try transformation

bmod1=lm(log(MBrich+1)~INSUL+MAR+RS+CEC+SND+FF+FI,data3)

ols_test_normality(bmod1)

#greatly improved

bptest(bmod1)

# homoscedastic!

summary(bmod1)

# fire intensity lost significance

bmod2=lm(log(MBrich+1)~INSUL+MAR+RS+CEC+SND+FF,data3)

summary(bmod2)

ols_test_normality(bmod2)

bptest(bmod2)

# homoscedastic!


# is the transformation and removal of fire justifiable

AIC(bmod)

AIC(bmod1)

AIC(bmod2)

AIC(bmod1)-AIC(bmod2)

# yes


lm.morantest(bmod2, listw=neilistw3, zero.policy=T)

# residuals spatially independent

summary(bmod2)

AIC(bmod2)-AIC(lm(log(MBrich+1)~1,data3))

averageContribution(dominanceAnalysis(bmod2))

summary(bmod2)

dim(data3)[1]-sum(is.na(data3$MBrich))

params5<-parameters(bmod2)

t_to_r(params5$t[-1], n=157,df_error = params5$df_error[-1])

params5

standardize_parameters(bmod2,method="basic",robust=F)








# Mixed Feeders


step(lm(MMfrich~INSUL+MAT+MAR+RS+CEC+SND+FF+FI,data3))

summary(lm(MMfrich~INSUL + MAT + RS + CEC + SND + FF,data3))

summary(lm(MMfrich~INSUL + MAT + RS + CEC + FF,data3))

AIC(lm(MMfrich~INSUL + MAT + RS + CEC + FF,data3))


#pH

step(lm(MMfrich~INSUL+MAT+pH+RS+CEC+SND+FF+FI,data3))

summary(lm(MMfrich~INSUL + MAT + pH + RS + CEC + FF,data3))

AIC(lm(MMfrich~INSUL + MAT + pH + RS + CEC + FF,data3))



# the second (pH) model is better


modmf=lm(MMfrich~INSUL + MAT + pH + RS + CEC + FF,data3)

ols_test_normality(modmf)

bptest(modmf)

# heteroscedastic

coeftest(modmf,vcov = vcovHC(modmf))

# all robust

summary(modmf)

lm.morantest(modmf, listw=neilistw3, zero.policy=T)

# no spatial autocorrelation in the residuals

summary(modmf)


# averageContribution(dominanceAnalysis(modmf.1))

dim(data3)[1]-sum(is.na(data3$modmf))

AIC(modmf)-AIC(lm(MMfrich~1,data3))


params6<-parameters(modmf)

t_to_r(params6$t[-1], n=157,df_error = params6$df_error[-1])

params6

standardize_parameters(modmf,method="basic",robust=F)

averageContribution(dominanceAnalysis(modmf))





######################### Predictors of traits #############################

## WD

names(data2)[1]="ECO_NAME"


data4=data2[,c("ECO_NAME","WD","Mrich","Mbm","MGBdif","Hrich","Hbm","HGBdif","pH","INSUL","FF","MAR","FI","MAT","RS","CEC","SND","HUR")]

data4=data4[complete.cases(data4[,c("ECO_NAME","WD","Mrich","Mbm","MGBdif","Hrich","Hbm","HGBdif","pH","INSUL","FF","MAR","FI","MAT","RS","CEC","SND","HUR")]),]

names(data4)


# MAR

step(lm(WD~Mrich+Mbm+MGBdif+Hrich+Hbm+HGBdif+FF+FI+MAR+MAT+RS+HUR+CEC+SND, data4[data4$INSUL==0,]))

summary(mod..1<-lm(WD~Mrich + MGBdif + HGBdif + MAR + MAT + 
    HUR + SND, data4[data4$INSUL==0,]))

summary(mod..1<-lm(WD~Mrich + MGBdif + MAR + MAT + 
    HUR + SND, data4[data4$INSUL==0,]))

summary(mod..1<-lm(WD~Mrich+MGBdif+MAT+HUR+SND, data4[data4$INSUL==0,]))

AIC(mod..1)






#pH

step(lm(WD~Mrich+Mbm+MGBdif+Hrich+Hbm+HGBdif+FF+FI+MAT+RS+HUR+CEC+SND+pH, data4[data4$INSUL==0,]))

summary(m..2<-lm(WD~Mrich + MGBdif + HGBdif + MAT + RS + 
    HUR + SND, data4[data4$INSUL==0,]))

summary(m..2<-lm(WD~Mrich + MGBdif + MAT + RS + 
    HUR + SND, data4[data4$INSUL==0,]))

summary(m..2<-lm(WD~Mrich + MGBdif + MAT + 
    HUR + SND, data4[data4$INSUL==0,]))

# same model

data5=data4[data4$INSUL==0,]

dim(data5)

mod1=lm(WD~Mrich+MGBdif+MAT+HUR+SND, data5)

summary(mod1)

ols_test_normality(mod1)

bptest(mod1)

AIC(mod1)-AIC(lm(WD~1, data4[data4$INSUL==0,]))

summary(mod1)

averageContribution(dominanceAnalysis(mod1))

summary(mod1)

dim(data5)[1]-sum(is.na(data5$WD))


polyneight4 <- subset(polyneight, neot$ECO_NAME%in%data5$ECO_NAME)

neilistw4 <- nb2listw(polyneight4,zero.policy = TRUE)

lm.morantest(mod1, listw=neilistw4, zero.policy=T)

# no spatial autocorrelation in the residuals

params7<-parameters(mod1)

t_to_r(params7$t[-1], n=143,df_error = params7$df_error[-1])

params7

standardize_parameters(mod1,method="basic",robust=F)





## SPINES

names(data2)

data..6=data2[data2$INSUL==0,]

data..6=data..6[complete.cases(data..6[,c("SSp_yes","SSp_no","Mrich","Mbm","MBrich","Hrich","Hbm","HGBdif","pH","INSUL","FF","MAR","FI","MAT","RS","CEC","SND")]),]

dim(data..6)

y..1=cbind(data..6$SSp_yes, data..6$SSp_no)


#MAR


step(glm.binomial.disp(glm(y..1~Mrich+Mbm+I(Mbm^2)+MGBdif+Hrich+HGBdif+Hbm+FI+FF+MAT+MAR+RS+CEC+SND,data..6, family=binomial), maxit = 30, verbose = TRUE))

summary(mod3<-glm.binomial.disp(glm(y..1~ Mbm + I(Mbm^2) + MGBdif + FI + MAT + 
    MAR + RS,data..6, family=binomial), maxit = 30, verbose = TRUE))

summary(mod3<-glm.binomial.disp(glm(y..1~Mbm + I(Mbm^2) + FI + MAT + 
    MAR + RS,data..6, family=binomial), maxit = 30, verbose = TRUE))

summary(mod3<-glm.binomial.disp(glm(y..1~Mbm + I(Mbm^2) + MAT + 
    MAR + RS,data..6, family=binomial), maxit = 30, verbose = TRUE))



# pH

step(glm.binomial.disp(glm(y..1~Mrich+Mbm+I(Mbm^2)+MGBdif+Hrich+HGBdif+Hbm+FI+FF+MAT+pH+RS+CEC+SND,data..6, family=binomial), maxit = 30, verbose = TRUE))

summary(mod4<-glm.binomial.disp(glm(y..1~Mbm + I(Mbm^2) + MGBdif + Hrich + HGBdif + 
    FI + MAT + pH + CEC,data..6, family=binomial), maxit = 30, verbose = TRUE))

summary(mod4<-glm.binomial.disp(glm(y..1~Mbm + I(Mbm^2) + MGBdif + Hrich + HGBdif + 
    FI + MAT + pH,data..6, family=binomial), maxit = 30, verbose = TRUE))



# which is best?

AIC(mod3)

AIC(mod4)

# mod4 is the best!

summary(mod4)

# check residuals spatial autocorrelation

mod4<-glm.binomial.disp(glm(y..1~Mbm + I(Mbm^2) + MGBdif + Hrich + HGBdif + 
    FI + MAT + pH,data..6, family=binomial), maxit = 30, verbose = TRUE)

polyneight5<- subset(polyneight, neot$ECO_NAME%in%data..6$ECO_NAME)

neilistw5 <- nb2listw(polyneight5,zero.policy = TRUE)

lm.morantest(mod4, listw=neilistw5, zero.policy=T)

# no spatial autocorrelation

pR2(mod4)

summary(mod4)

AIC(mod4)-AIC(glm(y..1~1,data..6, family=binomial,weights=mod4$disp.weights))

dim(data..6)

averageContribution(dominanceAnalysis(mod4))

params12<-parameters(mod4)

params12

z_to_r(params12$z[-1], n=143,df_error = params12$df_error[-1])

summary(mod4)

standardize_parameters(mod4,method="basic",robust=F)



## PALM LEAF SPINES

names(data2)

data7=data2[data2$INSUL==0,]

data7=data7[complete.cases(data7[,c("LSp_yes","LSp_no","Mrich","Mbm","MGBdif","Hrich","Hbm","HGBdif","pH","INSUL","FF","MAR","FI","MAT","RS","CEC","SND")]),]

y2=cbind(data7$LSp_yes,data7$LSp_no)




#MAR

step(glm.binomial.disp(glm(y2~Mrich+Mbm+MGBdif+Hrich+HGBdif+Hbm+FF+FI+RS+MAT+MAR+CEC+SND,data7, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y2~ Mrich + Hbm + RS + MAT + MAR,data7, family=binomial), maxit = 30, verbose = TRUE))

mod5=glm.binomial.disp(glm(y2~ Mrich + Hbm + RS + MAT + MAR,data7, family=binomial), maxit = 30, verbose = TRUE)

pR2(mod5)

AIC(mod5)


#pH

step(glm.binomial.disp(glm(y2~Mrich+Mbm+MGBdif+Hrich+HGBdif+Hbm+FF+FI+RS+MAT+CEC+pH+SND,data7, family=binomial), maxit = 30, verbose = TRUE))

# same model selected

mod6=glm.binomial.disp(glm(y2~ Mrich + RS + MAT + CEC + pH,data7, family=binomial), maxit = 30, verbose = TRUE)

mod6=glm.binomial.disp(glm(y2~ Mrich + RS + MAT + CEC,data7, family=binomial), maxit = 30, verbose = TRUE)

mod6=glm.binomial.disp(glm(y2~ Mrich + RS + MAT,data7, family=binomial), maxit = 30, verbose = TRUE)


pR2(mod6)

AIC(mod6)

nullmod=glm(y2~1,data7, family=binomial)

AIC(mod6)-AIC(nullmod)

dim(data7)


polyneight4 <- subset(polyneight, neot$ECO_NAME%in%data7$ECO_NAME)

neilistw4 <- nb2listw(polyneight4,zero.policy = TRUE)

lm.morantest(mod6, listw=neilistw4, zero.policy=T)

# no spatial autocorrelation in the residuals

summary(mod6)

averageContribution(dominanceAnalysis(mod6))

dim(data7)[1]-sum(is.na(data7$LSp_yes))

params8<-parameters(mod6)

z_to_r(params8$z[-1], n=132,df_error = params8$df_error[-1])

params8

standardize_parameters(mod6,method="basic",robust=F)





# Leaf Size

names(data2)

data8=data2[data2$INSUL==0,]

data8=data8[complete.cases(data8[,c("LSz","Mrich","Mbm","MGBdif","Hrich","Hbm","HGBdif","pH","INSUL","FF","MAR","FI","MAT","RS","CEC","SND")]),]




#MAR

step(lm(LSz~Mrich+Mbm+MGBdif+Hrich+HGBdif+Hbm+FF+FI+RS+MAT+MAR+CEC+SND, data8))

summary(lm(LSz~Mrich + MGBdif + Hrich + FI + MAT + MAR, data8))

mod8=lm(LSz~Mrich + MGBdif + Hrich + FI + MAT + MAR, data8)


# since a positive effect of extant mammal was detected, we exclude extant mammal related variables as it disagree to the pattern initially hypothesized, suggesting other causes

step(lm(LSz~Mrich+Mbm+MGBdif+FF+FI+RS+MAT+MAR+CEC+SND, data8))

summary(lm(LSz~Mrich + MGBdif + FI + MAR + CEC + SND, data8))

mod8=lm(LSz~Mrich + MGBdif + FI + MAR + CEC + SND, data8)





#pH

step(lm(LSz~Mrich+Mbm+MGBdif+Hrich+HGBdif+Hbm+FF+FI+RS+MAT+CEC+pH+SND, data8))

summary(lm(LSz ~Mrich + MGBdif + Hrich + FI + MAT + CEC + pH, data8))

summary(lm(LSz ~Mrich + MGBdif + Hrich + FI + MAT + pH, data8))

# since a positive effect of extant mammal was detected, we exclude extant mammal related variables

step(lm(LSz~Mrich+Mbm+MGBdif+FF+RS+MAT+CEC+pH+SND, data8))

summary(lm(formula = LSz ~Mrich + MGBdif + FF + MAT + pH + SND, data = data8))

summary(lm(formula = LSz ~Mrich + MGBdif + MAT + pH + SND, data = data8))

mod9=lm(formula = LSz ~Mrich + MGBdif + MAT + pH + SND, data = data8)




# which is best?

AIC(mod8)

AIC(mod9)


# mod9 is best

ols_test_normality(mod9)

bptest(mod9)

# heteroscedastic residuals

mod10=lm(log(LSz)~ Mrich + MGBdif + MAT + pH + SND, data = data8)

bptest(mod10)

# log did not solved

coeftest(mod9,vcov = vcovHC(mod9))

polyneight6 <- subset(polyneight, neot$ECO_NAME%in%data8$ECO_NAME)

neilistw6 <- nb2listw(polyneight6,zero.policy = TRUE)

lm.morantest(mod9, listw=neilistw6, zero.policy=T)

summary (mod9)

AIC(mod9)-AIC(lm(LSz~1, data8))

dim(data8)[1]-sum(is.na(data8$LSz))

averageContribution(dominanceAnalysis(mod9))

params9<-parameters(mod9)

t_to_r(params9$t[-1], n=143,df_error = params9$df_error[-1])

params9

standardize_parameters(mod9,method="basic",robust=F)








# LATEX

names(data2)

data10=data2[data2$INSUL==0,]

data10=data10[complete.cases(data10[,c("LAT_yes","LAT_no","Mrich","MGBdif","Mbm","Hrich","Hbm","HGBdif","pH","INSUL","FF","MAR","FI","MAT","RS","CEC","SND")]),]

y10=cbind(data10$LAT_yes,data10$LAT_no)




#MAR

step(glm.binomial.disp(glm(y10~Mrich+Mbm+MGBdif+Hrich+HGBdif+Hbm+FF+FI+RS+MAT+MAR+CEC+SND,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~ Mrich + Hrich + Hbm + CEC,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~ Mrich + Hbm + CEC,data10, family=binomial), maxit = 30, verbose = TRUE))

# here, it is megafauna richness that is inverselly related to defense. We therefore, exclude it.

step(glm.binomial.disp(glm(y10~Hrich+HGBdif+Hbm+FF+FI+RS+MAT+MAR+CEC+SND,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~  HGBdif + Hbm + CEC,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~  Hbm + CEC,data10, family=binomial), maxit = 30, verbose = TRUE))

mod10=glm.binomial.disp(glm(y10~ Hbm + CEC,data10, family=binomial), maxit = 30, verbose = TRUE)




#pH

step(glm.binomial.disp(glm(y10~Mrich+Mbm+MGBdif+Hrich+HGBdif+Hbm+FF+FI+RS+MAT+CEC+pH+SND,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~ Mrich + Hbm + FI + CEC + pH,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~ Mrich + Hbm + CEC + pH,data10, family=binomial), maxit = 30, verbose = TRUE))

# here, it is megafauna richness that is inversely related to defense. We therefore, exclude it.

step(glm.binomial.disp(glm(y10~Hrich+HGBdif+Hbm+FF+FI+RS+MAT+CEC+SND+pH,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~HGBdif + Hbm + CEC + pH,data10, family=binomial), maxit = 30, verbose = TRUE))

summary(glm.binomial.disp(glm(y10~Hbm + CEC + pH,data10, family=binomial), maxit = 30, verbose = TRUE))

mod11<-glm.binomial.disp(glm(y10~Hbm + CEC + pH,data10, family=binomial), maxit = 30, verbose = TRUE)

# same model

polyneight7 <- subset(polyneight, neot$ECO_NAME%in%data10$ECO_NAME)

neilistw7 <- nb2listw(polyneight7,zero.policy = TRUE)

lm.morantest(mod10, listw=neilistw7, zero.policy=T)

# no spatial autcorrelation in the residuals

pR2(mod11)

AIC(mod11)-AIC(nullmod)

averageContribution(dominanceAnalysis(mod11))

dim(data10)[1]-sum(is.na(data10$LAT_yes))

params10<-parameters(mod11)

z_to_r(params10$z[-1], n=143,df_error = params10$df_error[-1])

params10

standardize_parameters(mod10,method="basic",robust=F)


#############################Plotting Statistical results (Figure 2)###########################


stats<-rbind(
	data.frame(t_to_r(params7$t[-1], n=143,df_error = params7$df_error[-1])),
	data.frame(z_to_r(params12$z[-1], n=143,df_error = params12$df_error[-1])),
	data.frame(z_to_r(params8$z[-1], n=132,df_error = params8$df_error[-1])),
	data.frame(t_to_r(params9$t[-1], n=143,df_error = params9$df_error[-1])),
	data.frame(z_to_r(params10$z[-1], n=143,df_error = params10$df_error[-1])),
	data.frame(t_to_r(paramsA$t[-1], n=143,df_error = paramsA$df_error[-1])),
	data.frame(t_to_r(paramsB$t[-1], n=143,df_error = paramsB$df_error[-1]))
)


(dim(data.frame(params7))[1]-1)

head(stats)

stats$tt<-c(
	rep("wd",(dim(data.frame(params7))[1]-1)),
	rep("ssp",(dim(data.frame(params12))[1]-1)),
	rep("lsp",(dim(data.frame(params8))[1]-1)),
	rep("lsz",(dim(data.frame(params9))[1]-1)),
	rep("lat",(dim(data.frame(params10))[1]-1)),
	rep("dim1",(dim(data.frame(paramsA))[1]-1)),
	rep("dim3",(dim(data.frame(paramsB))[1]-1))
)

head(stats)

stats$pred=c(
data.frame(params7)[-1,1],
data.frame(params12)[-1,1],
data.frame(params8)[-1,1],
data.frame(params9)[-1,1],
data.frame(params10)[-1,1],
data.frame(paramsA)[-1,1],
data.frame(paramsB)[-1,1]
)

params9

params12

params10

# edit names in pred and colunm names to match existing code



################## Making the Plots ######################

p.1<-ggplot(stats[stats$tt=="wd",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('Mrich'='seagreen3','MGBdif'='chartreuse','MAT'='sienna1','HUR'='magenta','SND'='khaki')) +
	geom_pointrange(color=c('Mrich'='seagreen3','MGBdif'='chartreuse','MAT'='sienna1','HUR'='magenta','SND'='khaki'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	ggtitle("a) Wood Density")+
	geom_text(x=4.5, y=0.7, label=expression("R"^2*""["adj"]*"= 0.47"),size=3, color="grey50")+
	coord_cartesian(ylim = c(-0.1, 0.75))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))


p.1


p.2<-ggplot(stats[stats$tt=="ssp",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('darkolivegreen4','darkolivegreen2','chartreuse','royalblue1','royalblue4','red3','sienna1','goldenrod')) +
	geom_pointrange(color=c('darkolivegreen4','darkolivegreen2','chartreuse','royalblue1','royalblue4','red3','sienna1','goldenrod'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	ggtitle("b) Stem Spines")+
	geom_text(x=7.2, y=0.72, label=expression("R"^2*""["McFad"]*"= 0.53"),size=3, color="grey50")+
	coord_cartesian(ylim = c(-0.40, 0.75))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))+
	scale_y_continuous(labels=function(x) sprintf("%.1f", x))


p.2



p.3<-ggplot(stats[stats$tt=="lsz",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('seagreen3','chartreuse','sienna1','purple','goldenrod')) +
	geom_pointrange(color=c('seagreen3','chartreuse','sienna1','purple','goldenrod'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	ggtitle("c) Leaf Size")+
	geom_text(x=4.2, y=0.55, label=expression("R"^2*""["adj"]*"= 0.59"),size=3, color="grey50")+
	coord_cartesian(ylim = c(-0.8, 0.6))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,1,0.5,0.5), "cm"))+
	scale_y_continuous(labels=function(x) sprintf("%.1f", x))

p.3


p.4<-ggplot(stats[stats$tt=="lsp",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('seagreen3','turquoise','sienna1')) +
	geom_pointrange(color=c('seagreen3','turquoise','sienna1'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	ggtitle("d) Leaf Spines")+
	geom_text(x=2.3, y=0.65, label=expression("R"^2*""["McFad"]*"= 0.15"),size=3,color="gray50")+
	coord_cartesian(ylim = c(-0.4, 0.65))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))+
	scale_y_continuous(labels=function(x) sprintf("%.1f", x))


p4

p.5<-ggplot(stats[stats$tt=="lat",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('darkslateblue','purple','goldenrod')) +
	geom_pointrange(color=c('darkslateblue','purple','goldenrod'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	ggtitle("e) Latex")+
	geom_text(x=2.5, y=0.55, label=expression("R"^2*""["McFad"]*"= 0.19"),size=3,color="gray50")+
	coord_cartesian(ylim = c(-0.4, 0.55))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))+
	scale_y_continuous(labels=function(x) sprintf("%.1f", x))

p.5


windows()

grid.arrange(arrangeGrob(p.1,p.2, ncol=2, nrow=1,widths=c(0.65,1)),arrangeGrob(p.3,p.4,p.5, ncol=3, nrow=1,widths=c(1,0.7,0.8)), nrow=2)





################################# Antiherbiome Cluster #########################################


# Generate predicted probabilities for leaf spines


# Generate predicted probabilities for leaf spines

names(data2)

data71=data2[,c(1,2,8,10,21,27,28)]

names(data71)

data71=data71[complete.cases(data71),]

data71<-data71[data71$INSUL==0,]


y71=cbind(data71$LSp_yes,data71$LSp_no)

library(dispmod)

model=glm.binomial.disp(glm(y71~Mrich+MAT+RS,data71, family=binomial), maxit = 30, verbose = TRUE)

data2$lsp.prob=predict(model, data2, type="response")

data2$prop.lsp=data2$LSp_yes/(data2$LSp_yes+data2$LSp_no)

data2$lsp.prob.pred=ifelse(is.na(data2$prop.lsp), data2$lsp.prob, data2$prop.lsp)

data2$prop.ssp=round(data2$SSp_yes/(data2$SSp_yes+data2$SSp_no),3)

data2$prop.lat=round(data2$LAT_yes/(data2$LAT_yes+data2$LAT_no),3)

names(data2)

data1=data2[,c(1,25,26,35,36,37)]

head(data1)

names(data1)[c(2:6)]

names(data1)[c(2:6)]=c("WD","LSz","SSp","Lat","LSp")

# Principal Component Analysis

names(data1[,c(2:6)])

res = PCA(data1[,c(2:6)])

# Hierarchical ascendant clustering

res.hcpc = HCPC(res)

# select cluster cut

# get clusters

data1$clus=res.hcpc$data.clust$clust

# assign cluster names

data1$clus2=ifelse(data1$clus==1,"SLT",
	ifelse(data1$clus==3, "BCL","ILW"
	)
)

data1$clus2=factor(data1$clus2, levels = c("BCL", "ILW", "SLT"))

# get PCA scores

data1$dim1<-res$ind$coord[,1]

data1$dim2<-res$ind$coord[,2]

data1$dim3<-res$ind$coord[,3]


# merge to original data

data3=merge(data2,data1[,c(1,4:11)],by="ECO_NAME",all=T)

data3$ph1=data3$pH/10


## Pairwise Antiherbiome comparison of PCA scores

kruskal.test(dim1~clus2,data3[data3$INSUL==0,])$ p.value

kruskal.test(dim2~clus2,data3[data3$INSUL==0,])$ p.value

kruskal.test(dim3~clus2,data3[data3$INSUL==0,])$ p.value


# Create a vector of P-values to apply the "BH" correction

aa1=c(kruskal.test(dim1~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(dim2~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(dim3~clus2,data3[data3$INSUL==0,])$ p.value
)

# Corrected P-values

round(p.adjust(aa1,method="BH"),3)

# results did not change. Only PC1 and PC3 differed among antiherbiomes

# post-hoc test:

dunnTest(dim1~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(dim3~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")






# Regression of PCA axes that were different among antiherbiomes

head(data3)

names(data3)

data31<-data3[complete.cases(data3[,c("Mrich","Mbm","MGBdif","Hrich","Hbm","HGBdif","FF","FI","MAT","MAR","RS","pH","CEC","SND","INSUL")]),]

data31<-data31[data31$INSUL==0,]


# dim.1

#MAR

step(lm(dim1~Mrich+Mbm+MGBdif+Hrich+Hbm+HGBdif+FF+FI+MAT+MAR+RS+CEC+SND, data=data31))

summary(lm(formula = dim1 ~ Mrich + MGBdif + Hrich + HGBdif + MAR + 
    SND, data = data31))

summary(lm(formula = dim1 ~ Mrich + MGBdif + Hrich + HGBdif + MAR, data = data31))

summary(lm(formula = dim1 ~ Mrich + MGBdif + Hrich + MAR, data = data31))

modA<-lm(formula = dim1 ~ Mrich + MGBdif + Hrich + MAR, data = data31)




#pH
step(lm(dim1~Mrich+Mbm+MGBdif+Hrich+Hbm+HGBdif+FF+FI+MAT+RS+pH+CEC+SND, data=data31))


summary(lm(dim1~Mrich + MGBdif + Hrich + HGBdif + FI + 
    pH + CEC, data=data31))

summary(lm(dim1~Mrich + MGBdif + Hrich + FI + pH + CEC, data=data31))

modB<-lm(dim1~Mrich + MGBdif + Hrich + FI + pH + CEC, data=data31)

AIC(modA)

AIC(modB)

# model B is the best

ols_test_normality(modB)

bptest(modB)

bptest(lm(log(dim1+10)~Mrich + MGBdif + Hrich + FI + pH + CEC, data=data31))

#log did not solved

# are the significance in the predictors in modB robust

coeftest(modB,vcov = vcovHC(modB))

# FI and Hrich are not

modC<-lm(dim1~Mrich + MGBdif + Hrich + pH + CEC, data=data31)

summary(modC)

bptest(modC)

coeftest(modC,vcov = vcovHC(modC))

modD<-lm(dim1~Mrich + MGBdif + pH + CEC, data=data31)

bptest(modD)

coeftest(modD,vcov = vcovHC(modD))

# all significant

polyneightA <- subset(polyneight, neot$ECO_NAME%in%data31$ECO_NAME)

neilistwA <- nb2listw(polyneightA,zero.policy = TRUE)

lm.morantest(modD, listw=neilistwA, zero.policy=T)

# no autocorrelation in the residuals


# no spatial autocorrelation in the residuals

paramsA<-parameters(modD)

dim(data31)

t_to_r(paramsA$t[-1], n=143,df_error = paramsA$df_error[-1])

summary(modD)

averageContribution(dominanceAnalysis(modD))

AIC(modE)-AIC(lm(dim1~1, data=data31))







# dim 3

#MAR

step(lm(dim3~Mrich+Mbm+MGBdif+Hrich+Hbm+HGBdif+FF+FI+MAT+MAR+RS+CEC+SND, data=data31))

summary(lm(dim3~Mrich+HGBdif+MAT+MAR+RS+CEC, data=data31))

summary(lm(dim3~Mrich+HGBdif+MAT+MAR+CEC, data=data31))

summary(lm(dim3~Mrich+HGBdif+MAR+CEC, data=data31))

summary(lm(dim3~Mrich+HGBdif+MAR, data=data31))

modE<-lm(dim3~Mrich+HGBdif+MAR, data=data31)


# pH

step(lm(dim3~Mrich+Mbm+MGBdif+Hrich+Hbm+HGBdif+FF+FI+MAT+RS+pH+CEC+SND, data=data31))

summary(lm(dim3~Mrich+Hrich+HGBdif+pH+SND, data=data31))

summary(lm(dim3~Mrich+HGBdif+pH+SND, data=data31))

summary(lm(dim3~Mrich+HGBdif+pH, data=data31))

modF<-lm(dim3~Mrich+HGBdif+pH, data=data31)


AIC(modF)
AIC(modE)

# modF is better

ols_test_normality(modF)

bptest(modF)

lm.morantest(modF, listw=neilistwA, zero.policy=T)

# everything ok with residuals

paramsB<-parameters(modF)

t_to_r(paramsB$t[-1], n=143,df_error = paramsB$df_error[-1])

summary(modF)

averageContribution(dominanceAnalysis(modF))

paramsB

AIC(modF)-AIC(lm(dim3~1, data=data31))





###################### Final plot antiherbiomes ##########################


neot.3=merge(neot, data3, by="ECO_NAME",all=T)

class(neot.3$clus2)

p.1=fviz_pca_ind(res, axes = c(1, 3), label="none", habillage=data1$clus2,addEllipses=F,palette = c("forestgreen","orange","olivedrab1"),title=list("c)"),legend = "none")

p.2=fviz_pca_var(res, axes = c(1, 3), col.var="steelblue",title="b)",repel=T)+
  theme_minimal()+
	theme(plot.margin = unit(c(0.2,0,0.2,-0.5), "cm"))

p.3=spplot(neot.3, "clus2", col = "transparent", col.regions=c("forestgreen","orange","olivedrab1"),par.settings = list(fontsize = list(text = 10),axis.line = list(col = "transparent")),main=list(expression("a)"),x=0.1,y=0.5),colorkey = list(axis.line = list(col = "black")))


p.4=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=dim1, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="d)",x="", y = expression("Physical "%->%" Chemical (Dim1)"), fill = "Group")+
	coord_cartesian(ylim = c(-4,3.5))+
	geom_text(x=1.8,  y=2, label="b",size=5)+
	geom_text(x=0.8, y=3, label="a",size=5)+
	geom_text(x=2.8,y=0, label="c",size=5)



p.5=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=dim3, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="e)",x="", y = expression("Thorny "%->%" Woody (Dim3)"), fill = "Group")+
	#coord_cartesian(ylim = c(0,35))+
	geom_text(x=1.8,  y=3, label="b",size=5)+
	geom_text(x=0.8, y=1, label="a",size=5)+
	geom_text(x=2.8,y=0.5, label="c",size=5)


windows()

grid.arrange(arrangeGrob(p.3),arrangeGrob(p.2,p.1,p.4,p.5, ncol=2, nrow=2),nrow=1,widths=c(1,1))










########################## Differences among Antiherbiomes ####################


# Kruskal-Wallis

Creating a vector of P-values for applying the "BH" correction

aa=c(kruskal.test(FF~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(FI~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(Mrich~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(CEC~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(SND~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(Mbm~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(MAT~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(MAR~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(RS~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(pH~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(MGrich~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(MBrich~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(MMfrich~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(MGBdif~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(Hbm~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(Hrich~clus2,data3[data3$INSUL==0,])$ p.value,
kruskal.test(HGBdif~clus2,data3[data3$INSUL==0,])$ p.value
)

round(p.adjust(aa,method="BH"),3)

# non-significant: FI, SND, RS e Hrich

# Dunn post-hoc tests

dunnTest(Mrich~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")


dunnTest(Mbm~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")


dunnTest(MMfrich~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(MGrich~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(MBrich~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")



dunnTest(FF~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")


dunnTest(MAT~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(MAR~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(CEC~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(pH~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")


dunnTest(Hbm~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(HGBdif~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")

dunnTest(MGBdif~clus2,
              data=data3[data3$INSUL==0,],
              method="bh")


# Changing variable scale to improve plotting

data3$Hbm1=data3$Hbm/100000

data3$Mbm1=data3$Mbm/100000

data3$pH1=data3$pH/10


# Boxplots showing differences among antiherbiomes (Figure 4)

p1=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=Mrich, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="a)",x="", y = expression("M"[rich]), fill = "Group")+
	coord_cartesian(ylim = c(0,35))+
	geom_text(x=1.8,  y=28, label="b",size=5)+
	geom_text(x=0.8, y=20, label="a",size=5)+
	geom_text(x=2.8,y=27, label="ab",size=5)

p2=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=Mbm1, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="b)",x="", y = expression("M"[bm]*" (10"^5*"g)"), fill = "Group")+
	coord_cartesian(ylim = c(0,60))+
	geom_text(x=1.8, y=30, label="b",size=5)+
	geom_text(x=0.8, y=57, label="a",size=5)+
	geom_text(x=2.8,  y=55, label="a",size=5)


p3=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=MGrich, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="c)",x="", y = expression("MG"[rich]), fill = "Group")+
	coord_cartesian(ylim = c(0,9))+
	geom_text(x=1.8,  y=8, label="b",size=5)+
	geom_text(x=0.8, y=4, label="a",size=5)+
	geom_text(x=2.8,y=8, label="b",size=5)

p4=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=MGBdif, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	coord_cartesian(ylim = c(-1,3))+
	labs(title="d)",x="", y = expression("MGB"[dif]), fill = "Group")+
	geom_text(x=1.8, y=2, label="a",size=5)+
	geom_text(x=0.8, y=2, label="a",size=5)+
	geom_text(x=2.8, y=3, label="b",size=5)


p5=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=MMfrich, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="e)",x="", y = expression("MMf "[rich]), fill = "Group")+
	coord_cartesian(ylim = c(0,13))+
	geom_text(x=1.8,  y=12, label="b",size=5)+
	geom_text(x=0.8, y=10, label="a",size=5)+
	geom_text(x=2.8,y=10, label="a",size=5)


p6=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=Hbm1, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="f)",x="", y = expression("H"[bm]*" (10"^5*"g)"), fill = "Group")+
	coord_cartesian(ylim = c(0,13))+
	geom_text(x=1.8, y=8, label="b",size=5)+
	geom_text(x=0.8, y=9, label="a",size=5)+
	geom_text(x=2.8, y=12, label="c",size=5)

p7=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=HGBdif, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="g)",x="", y = expression("HGB"[dif]), fill = "Group")+
	#coord_cartesian(ylim = c(20,300))+
	geom_text(x=1.8, y=3, label="a",size=5)+
	geom_text(x=0.8, y=3, label="a",size=5)+
	geom_text(x=2.8, y=-2, label="b",size=5)


p8=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=MAR, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"))+ 
	theme_minimal()+
	labs(title="h)",x="", y = "MAR (mm)", fill = "Group")+
	coord_cartesian(ylim = c(0,3700))+
	geom_text(x=1.8,  y=3700, label="a",size=5)+
	geom_text(x=0.8, y=3700, label="a",size=5)+
	geom_text(x=2.8,y=2500, label="b",size=5)

p9=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=MAT, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="i)",x="", y =expression("MAT ( "^o*"C)"), fill = "Group")+
	coord_cartesian(ylim = c(5,30))+
	geom_text(x=1.8,  y=15, label="b",size=5)+
	geom_text(x=0.8, y=9, label="a",size=5)+
	geom_text(x=2.8,y=29, label="c",size=5)


p10=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=pH1, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="j)",x="", y = "Soil pH", fill = "Group")+
	geom_text(x=1.8, y=7, label="a",size=5)+
	geom_text(x=0.8, y=7, label="a",size=5)+
	geom_text(x=2.8, y=5.5, label="b",size=5)

p11=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=CEC, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="k)",x="", y = expression("CEC (cmlc.kg"^-1*")"), fill = "Group")+
	geom_text(x=1.8, y=40, label="b",size=5)+
	geom_text(x=0.8, y=36, label="a",size=5)+
	geom_text(x=2.8, y=35, label="a",size=5)

p12=ggplot(data3[data3$INSUL==0,], aes(x=clus2, y=FF, fill=clus2)) +
	geom_boxplot(fill = c("forestgreen","orange","olivedrab1"),outlier.shape = NA)+ 
	theme_minimal()+
	labs(title="l)",x="", y = "Fire Count", fill = "Group")+
	coord_cartesian(ylim = c(0,2000))+
	geom_text(x=1.8, y=1900, label="b",size=5)+
	geom_text(x=0.8, y=800, label="a",size=5)+
	geom_text(x=2.8, y=1100, label="a",size=5)

# full pannel

grid.arrange(p1,p2, p3, p4, p5,p6, p7,p8,p9,p10, p11, p12, nrow=3)




############### Codes for making Figure 5 ############################

# building data frame "stats" with statistical results for axis 1 and 3

tt<-c(rep("dim1",4),rep("dim3",3))

pred<-c("Mrich","MGBdif","pH","CEC","Mrich","HGBdif","pH")

r=c(-0.49,-0.24,-0.72,0.22,0.41,-0.27,-0.40)

CI_low<-c(-0.60,-0.38,-0.78,0.06,0.27,-0.41,-0.52)

CI_high<-c(-0.36,-0.07,-0.63,0.37,0.53,-0.11,-0.26)


stats<-data.frame(tt,pred,r,CI_low,CI_high)


# plotting

p61<-ggplot(stats[stats$tt=="dim1",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('seagreen3','chartreuse','goldenrod','purple')) +
	geom_pointrange(color=c('seagreen3','chartreuse','goldenrod','purple'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	#ggtitle("c) CheLeaf-PhyShoot")+
	geom_text(x=3.5, y=0.35, label=expression("R"^2*""["adj"]*"= 0.57"),size=3,color="gray50")+
	coord_cartesian(ylim = c(-0.75, 0.42))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))+
	scale_y_continuous(labels=function(x) sprintf("%.1f", x))


p71<-ggplot(stats[stats$tt=="dim3",], aes(x = reorder(pred, -r), y = r,ymax = CI_low, ymin = CI_high)) +
 	geom_point(size = 3,color=c('seagreen3','royalblue4','goldenrod')) +
	geom_pointrange(color=c('seagreen3','royalblue4','goldenrod'))+ 
	theme_classic()+
	xlab("Predictor Variables")+
	#ggtitle("d) Woody-Thorny")+
	geom_text(x=2.5, y=0.37, label=expression("R"^2*""["adj"]*"= 0.42"),size=3,color="gray50")+
	coord_cartesian(ylim = c(-0.41, 0.42))+
	theme(axis.line.x = element_line(colour = 'white'))+
	geom_hline(yintercept=0, linetype="dashed", color = "grey", size=1)+
	theme(axis.text.x=element_text(angle=-45, hjust=0, vjust=0.5))+
 	theme(plot.margin = unit(c(0.5,1.5,0.5,0.2), "cm"))+
	scale_y_continuous(labels=function(x) sprintf("%.1f", x))

p10=spplot(neot.3, "dim1", main = list(expression("a) Physical "%->%" Chemical"),cex=1, font=2, x=0.30, y=0.7), col = "transparent", col.regions=viridis(10),par.settings = list(fontsize = list(text = 10),axis.line = list(col = "transparent")),cut=9,colorkey = list(axis.line = list(col = "black")))

p11=spplot(neot.3, "dim3", main = list(expression("b) Thorny " %->% " Woody"),cex=1, font=2, x=0.30, y=0.7), col = "transparent", col.regions=viridis(10),par.settings = list(fontsize = list(text = 10),axis.line = list(col = "transparent")),cut=9,colorkey = list(axis.line = list(col = "black")))

windows()


########################## Defining ecoregions that experienced biome shifts ##############################

# Classifying Neotropical Ecoregions as "forest" (2), "savanna" (1) or "xeric" (0)formations

table(neot.3@data$BIOME_NAME,neot.3@data$BIOME_NUM)

forest=c(1,2,3,4,14)

grass=c(7,8,9,10)

xeric=c(12,13)

neot.3@data$form=ifelse(neot.3@data$BIOME_NUM%in%forest,2,ifelse(neot.3@data$BIOME_NUM%in%grass,1,0))

neot.3@data$form1=neot.3@data$form

# The Dry Chaco is more often classified as forest, so we changed

neot.3@data[grepl("Dry Chaco",neot.3@data$ECO_NAME),"form1"]=2

quantile(neot.3@data$Mrich,0.75)

quantile(neot.3@data$MGrich,0.75,na.rm=T)

# Classify areas as megafauna and megagrazer rich (1) vs poor (0)

neot.3@data$mega2=ifelse(neot.3@data$Mrich>=14&neot.3@data$MGrich>=3,1,0)

table(neot.3@data$mega2, neot.3@data$form1)


# if an ecoregion is not in the BCL antiherbiome, are currently forest and used to have lots of megafauna, it is assumed to have shifted from savanna to forest(1; and 0 otherwise) 

neot.3@data$shift2=ifelse((neot.3@data$clus!=3)&(neot.3@data$form1==2)&(neot.3@data$mega2==1),1,0)

# separating shifting dry (2) from moist (1) forests 

neot.3@data[neot.3@data$ECO_NAME%in%c("Southern Andean Yungas","Dry Chaco","Chiquitano dry forests","Caatinga","Brazilian Atlantic dry forests"),"shift2"]

neot.3@data[neot.3@data$ECO_NAME%in%c("Southern Andean Yungas","Dry Chaco","Chiquitano dry forests","Caatinga","Brazilian Atlantic dry forests"),"shift2"]=c(2,2,2,2,2)



#separating areas that were and still are savannas (stable savannas: 3)

neot.3@data[neot.3@data$ECO_NAME%in%c("Humid Chaco","Southern Cone Mesopotamian savanna","Paraná flooded savanna","Espinal","Uruguayan savanna","Cerrado","Beni savanna","Pantanal","Guianan savanna","Llanos","Campos Rupestres montane savanna","Orinoco wetlands"),"shift2"]=c(3,3,3,3,3,3,3,3,3,3,3,3)

neot.3@data[neot.3@data$ECO_NAME%in%c("Humid Chaco","Southern Cone Mesopotamian savanna","Paraná flooded savanna","Espinal","Uruguayan savanna","Cerrado","Beni savanna","Pantanal","Guianan savanna","Llanos","Campos Rupestres montane savanna","Orinoco wetlands"),"shift2"]

# turn to factor

neot.3@data$shift2=factor(neot.3@data$shift2)


# Calculate the area in each chategory in Km2


sum(area(neot.3[neot.3@data$shift5=="Sav to Moist For",])/1000000)

sum(area(neot.3[neot.3@data$shift5=="Sav to Dry For",])/1000000)

sum(area(neot.3[neot.3@data$shift5=="Stable Savanna",])/1000000)

neot.3@data[neot.3@data$shift5=="Stable Savanna","ECO_NAME"]

sum(area(neot.3[neot.3@data$ECO_NAME=="Llanos",])/1000000)


# create pretty labels for ploting

neot.3@data$shift5=ifelse(neot.3@data$shift2==0,"Other",
ifelse(neot.3@data$shift2==1,"Sav to Moist For",
ifelse(neot.3@data$shift2==2,"Sav to Dry For","Stable Savanna"
)
)
)

neot.3@data$shift5=factor(neot.3@data$shift5)


# load fossil site data (provided in the Supplementary Information of the article)

vegfos1=read.csv(file.choose(),header=T, sep=",")

head(vegfos1)

# shift 1 describes whether the vegetation was savanna ("grassy", or moisaic, here called "mixed") in the mid holocene (mid.h.bio), in the last glacial maximum (lgm.bio) or in the mid holocene ("mid.h.bio"), in both ("both"), or none ("") 

vegfos1$shift1<-

ifelse(vegfos1$mid.h.bio%in%c("grassy","mixed")&vegfos1$lgm.bio%in%c("grassy","mixed"),"both",

ifelse(vegfos1$mid.h.bio%in%c("grassy","mixed")&vegfos1$lgm.bio%in%c("forest",""),"holo",

ifelse(vegfos1$mid.h.bio%in%c("forest","")&vegfos1$lgm.bio%in%c("grassy","mixed"),"lgm","")))

vegfos1$shift1

head(vegfos1)


vegfos3<-vegfos1

coordinates(vegfos3) <- c("long","lat")

proj4string(vegfos3) <- proj4string(neot)


# Obtain the ecoregion of each fossil site
aa1=extract(neot,vegfos3)$ECO_NAME

vegfos1$eco<-aa1

# select only the site name, the coordinates and the shift1 column

vegfos2<-vegfos1[,c(1,2,3,12)]

head(vegfos2)

# load .csv file with coordinates of present day Amazonian savannas

presav=read.csv(file.choose(),header=T, sep=";")

head(presav)

# get only sites names and coordinates

presav1=presav[,c(1,2,3)]

# create a shift1 colum to match the fossil data.frame, adding it to the class "pres" (for present savannas)

presav1$shift1=rep("pres",7)

names(presav1)=c("sites","lat","long","shift1")

head(presav1)

points<-rbind(vegfos2,presav1)

head(points)

coordinates(points) <- c("long","lat")

proj4string(points) <- proj4string(neot)

points@data

# Plot a map of past savanna areas in which colours correspond to their shift cathegory 

p9=spplot(neot.3, "shift5", col = "transparent", col.regions=c("grey90","darkolivegreen2","seagreen4","Khaki1"),par.settings = list(fontsize = list(text = 10),axis.line = list(col = "transparent")),colorkey = list(axis.line = list(col = "black")))

# and add dots with location of fossil sites and present savannas

p10=update(p9,sp.layout = list(c('sp.points', points[points@data$shift1=="holo",], col='cyan', pch=16), c('sp.points', points[points@data$shift1=="lgm",], col='red', pch=16),c('sp.points', points[points@data$shift1=="both",], col='black', pch=16), c('sp.points', points[points@data$shift1=="pres",], col='darkorange', pch=16)))

windows()

p10