####Script for LCBD and SCBD analysis of insects####


####Datasets####
# Aquatic insects sampled in 100 streams
# Dataset is a site (rows) by genera (columns) matrix
# The two first columns are grouping ID variables (catchment, stream)

# Local environmental variables from the same 100 streams
# The two first columns are grouping ID variables (catchment, stream)

# Land use variables obtained within a 400 m radius along each stream
# The three first columns are grouping ID variables (ID, catchment, stream)


# Prepared by Fabiana Schneck (fabiana.schneck@gmail.com) in March/2021
# Revised by FS in October/2021
#===============================================================================.

#loading required packages
library(adespatial)
library(vegan)
library(pgirmess) #correlog
library(usdm) #vifstep
library(betareg)
library(FD) #traits dissimilarity (gowdist function)
library(adiv) #taxonomic dissimilarity

####Preparing data sets####

#Insects data matrix
inv<-read.table("inverts20_5.txt",header=TRUE) 
inv<-inv[,-c(1,2)] #exclude ID variables
dim(inv) #100 sites/83 spp

#Exclude sites with reduced insect counts (12 streams)
excluir.sites.inv<-which(rowSums(inv)<40)
inv<-inv[-excluir.sites.inv,] 

#There are no genera with abundance = zero
which(colSums(inv)==0)

#The data set has now 88 sites and 83 species 
dim(inv) 


#Environmental data matrix (the same used for diatoms)
env<-read.table("env20_5.txt",header=TRUE) 

#Geographical coordinates from all 100 streams
geo.data<-env[,c("lat","lon")]

#Select variables of interest
env<-env[,-c(1,2,13,16,18,19,22,23)]

#Standardize env variables
env.s.inv<-decostand(env[-excluir.sites.inv,],"standardize",MARGIN=2)
dim(env.s.inv) #88 sites/15 variables


#Land-use data matrix (the same used for diatoms)
land<-read.csv("streams_buffer_400m.csv",h=T)

land.inv<-land[-excluir.sites.inv,-c(1:6)]
dim(land.inv) #88 sites/10 variables



#########################.
#### LCBD per stream ####
#LCBD calculated together for all streams
set.seed(1)
LCBD.inv<-beta.div(inv,method="hellinger",nperm=9999)
LCBD.inv$beta #Total beta diversity

beta.inv <- data.frame(LCBD=LCBD.inv$LCBD,
                       S=specnumber(inv),
                       Domi=1-diversity(inv,"simpson"),
                       Lat=geo.data[-excluir.sites.inv,1],
                       Lon=geo.data[-excluir.sites.inv,2])


####Spatial autocorrelation of original LCBD data####
correlog.inv <- correlog(coords=beta.inv[,4:5], z=beta.inv$LCBD, method="Moran",alternative="greater")
p.adjust(correlog.inv[,"p.value"],method = "bonferroni")



####Pearson correlations between LCBD and species richness or Simpson dominance index####
cor.test(beta.inv$LCBD,beta.inv$S,method="pearson")
cor.test(beta.inv$LCBD,beta.inv$Domi,method="pearson")



#Now, I need to create the predictors of environmental uniqueness

####Environmental uniqueness - All variables####
#LCEH = local contribution to enviromental heterogeneity
#env.s.diat = standardized environmental matrix
env.dist.inv<-vegdist(env.s.inv,"euclidean")
LCEH.tot.inv<-LCBD.comp(env.dist.inv,sqrt.D = F)

####Environmental uniqueness - physical variables#### 
envPHY.dist.inv<-vegdist(env.s.inv[,1:10],"euclidean")
LCEH.PHY.inv<-LCBD.comp(envPHY.dist.inv,sqrt.D = F)

####Environmental uniqueness -water chemistry variables ####
envWC.dist.inv<-vegdist(env.s.inv[,11:15],"euclidean")
LCEH.WC.inv<-LCBD.comp(envWC.dist.inv,sqrt.D = F)

####Land use uniqueness#### 
#LCEH.LU = local contribution to land-use heterogeneity
#I am using arcsine square-root transformation here 
land.dist.inv<-vegdist(asin(sqrt(land.inv)),"euclidean")
LCEH.LU.inv<-LCBD.comp(land.dist.inv,sqrt.D = F)


#Including the predictors in the beta.inv dataframe
beta.inv<-data.frame(beta.inv,LCEH_tot=LCEH.tot.inv$LCBD,
                      LCEH_phy=LCEH.PHY.inv$LCBD,
                      LCEH_wc=LCEH.WC.inv$LCBD,
                      LCEH_lu=LCEH.LU.inv$LCBD,
                      Forest=land.inv$Forest)


#Correlations between predictors
cor(beta.inv[,6:10])
#LCEH_tot excluded due to its high correlation with LCEH_phy

#All remaining predictors have a VIF lower than 3
usdm::vifstep(beta.inv[,c(7:10)], th=3)


####Beta regression model LCBD####
#Predictors were arcsine square-root transformed to meet normality assumption
#Function 'scale' was used to generate standardized coefficients
LCBD.inv.model<-betareg(LCBD~
                          scale(asin(sqrt(LCEH_phy)))+
                          scale(asin(sqrt(LCEH_wc)))+ 
                          scale(asin(sqrt(LCEH_lu)))+
                          scale(asin(sqrt(Forest))),data=beta.inv)

plot(LCBD.inv.model)
summary(LCBD.inv.model)

#Spatial autocorrelation in the model residuals#
#There is no spatial strucutre in the residuals
correlog.res.inv <- pgirmess::correlog(coords=beta.inv[,4:5], z=LCBD.inv.model$residuals, method="Moran",alternative="two.sided")
p.adjust(correlog.res.inv[,"p.value"],method = "bonferroni")




##########################.
#### SCBD per species ####

#object LCBD.inv contains also SCBD results 
LCBD.diat$SCBD

####Preparing SCBD matrix####
SCBD.inv<-data.frame(genus=names(LCBD.inv$SCBD),SCBD=LCBD.inv$SCBD)
SCBD.inv<-SCBD.inv[order(SCBD.inv[,1]),]
SCBD.inv$genus<-NULL
head(SCBD.inv)

#How many genera have SCBD values higher than the mean
sum(SCBD.inv$SCBD>mean(SCBD.inv$SCBD))


SCBD.inv$Abund<-colSums(inv[,order(colnames(inv))]) #abundance of each genus
SCBD.inv$Occurrence<-specnumber(inv[,order(colnames(inv))], MARGIN = 2) #occupancy of each genus


####Correlations between SCBD and abudance or occupancy####
cor.test(SCBD.inv$SCBD,SCBD.inv$Abund)  
cor.test(SCBD.inv$SCBD,SCBD.inv$Occurrence) 


####Creating the trait predictors####
traits.inv<-read.table("inverts_traits20_5.txt",row.names=1,header=TRUE,stringsAsFactors=TRUE)
head(traits.inv)
dim(traits.inv)

traits.inv<-traits.inv[order(row.names(traits.inv)),]

traits.inv[,"size"]<-as.factor(traits.inv[,"size"])

#Just checking whether species are correctly ordered in both data sets
all.equal(row.names(traits.inv),row.names(SCBD.inv))
which(row.names(traits.inv)!=row.names(SCBD.inv))

#Dissimilarity using Gower distance coefficient, followed by a PCOA
traits.inv.dist<-gowdis(traits.inv)
pcoa.traits.inv<-capscale(traits.inv.dist~1)
head(summary(pcoa.traits.inv))


####Creating the taxonomic relatedness predictors####
taxo.inv<-read.table("inverts_phylo20_5.txt",h=T,row.names=1,stringsAsFactors = T)
head(taxo.inv)
dim(taxo.inv)

taxo.inv<-taxo.inv[order(row.names(taxo.inv)),]

taxo.inv<-as.taxo(taxo.inv) #function from package ade4

#Function 'as.taxo' alters the previously defined genus order in the dataframe
#So, I reordered it
taxo.inv<-taxo.inv[order(row.names(taxo.inv)),] 


#Just checking whether genera are correctly ordered in all data sets
all.equal(row.names(taxo.inv),row.names(SCBD.inv))
all.equal(row.names(taxo.inv),row.names(traits.inv))


#Taxonomic distances and PCOA
taxo.inv.dist<-adiv::dsimTaxo(taxo.inv,type="dissimilarity",method=1)
pcoa.taxo.inv<-capscale(taxo.inv.dist~1) 
head(summary(pcoa.taxo.inv))


#Dataframe with SCBD and PCOA axes traits/taxo
SCBD.inv$Ax1traits<-scores(pcoa.traits.inv)$sites[,1]
SCBD.inv$Ax2traits<-scores(pcoa.traits.inv)$sites[,2]

SCBD.inv$Ax1taxo<-scores(pcoa.taxo.inv)$sites[,1]
SCBD.inv$Ax2taxo<-scores(pcoa.taxo.inv)$sites[,2]

head(SCBD.inv)

#Correlations between predictors
cor(SCBD.inv)
usdm::vifstep(SCBD.inv[,4:7], th=3)


####Beta regression model SCBD####
SCBD.inv.model<-betareg(SCBD~
                             scale(Ax1traits)+
                             scale(Ax2traits)+
                             scale(Ax1taxo)+
                             scale(Ax2taxo),data=SCBD.inv)

plot(SCBD.inv.model)
summary(SCBD.inv.model)



##########################.
#### SCBD versus local and regional environmental predictors####

#Fourth corner analysis
library(ade4)

#beta.inv[,c(7:10)] = LCEH_phy, LCEH_wc, LCEH_lu, Forest cover

set.seed(1)
fourth.corner.inv<-fourthcorner(beta.inv[,c(7:10)],
                                inv,
                                data.frame(LCBD.inv$SCBD),
                                modeltype = 6,nrepet = 2999)
summary(fourth.corner.inv)


#See  every used package and their version
sessionInfo() 

