####Script for LCBD and SCBD analysis of diatoms####


####Datasets####
# Benthic diatoms sampled in 100 streams
# Dataset is a site (rows) by species (columns) matrix
# The two first columns are grouping ID variables (catchment, stream)

# Local environmental variables from the same 100 streams
# The two first columns are grouping ID variables (catchment, stream)

# Land use variables obtained within a 400 m radius along each stream
# The three first columns are grouping ID variables (ID, catchment, stream)


# Prepared by Fabiana Schneck (fabiana.schneck@gmail.com) in March/2021
# Revised by FS in October/2021
#===============================================================================.

rm(list=ls(all=TRUE))

#loading required packages
library(adespatial)
library(vegan)
library(pgirmess) #correlog
library(usdm) #vifstep
library(betareg)
library(FD) #traits dissimilarity (gowdist function)
library(adiv) #taxonomic dissimilarity


####Preparing data sets####

#Diatoms data matrix
diat<-read.table("diatoms20_5.txt",header=TRUE) 
diat<-diat[,-c(1,2)] #exclude ID variables
dim(diat) #100 sites/360 spp
rowSums(diat)

#Exclude sites with reduced diatom counts (17 streams, all with fewer than 100 diatom valves)
excluir.sites.diat<-which(rowSums(diat)<490) 
diat<-diat[-excluir.sites.diat,] 

#Now, exclude species with abundance = zero
excluir.sp.diat<-which(colSums(diat)==0)
diat<-diat[,-excluir.sp.diat] 

#The data set has now 83 sites and 356 species 
dim(diat) 



#Environmental data matrix
env<-read.table("env20_5.txt",header=TRUE) 

#Geographical coordinates from all 100 streams
geo.data<-env[,c("lat","lon")]

#Select variables of interest
env<-env[,-c(1,2,13,16,18,19,22,23)]

#Exclude sites for which we excluded diatoms and standardize env variables
env.s.diat<-decostand(env[-excluir.sites.diat,],"standardize",MARGIN=2)
dim(env.s.diat) #83 sites/15 variables



#Land-use data matrix
land<-read.csv("streams_buffer_400m.csv",h=T)

land.diat<-land[-excluir.sites.diat,-c(1:6)]
dim(land.diat) #83 sites/10 variables


#########################.
#### LCBD per stream ####
#LCBD calculated together for all streams
set.seed(1)
LCBD.diat<-beta.div(diat,method="hellinger",nperm=9999)

LCBD.diat$beta #Total beta diversity

#Creating a dataframe with LCBD values, species richness, Simpson dominance index, 
#and geographical coordinates for each stream site
beta.diat <- data.frame(LCBD=LCBD.diat$LCBD,
                        S=specnumber(diat),
                        Domi=1-diversity(diat,"simpson"),
                        Lat=geo.data[-excluir.sites.diat,1],
                        Lon=geo.data[-excluir.sites.diat,2])



####Spatial autocorrelation of original LCBD data####
correlog.diat <- correlog(coords=beta.diat[,4:5], z=beta.diat$LCBD, method="Moran",alternative="greater")
p.adjust(correlog.diat[,"p.value"],method = "bonferroni")



####Pearson correlations between LCBD and species richness or Simpson dominance index####
cor.test(beta.diat$LCBD,beta.diat$S,method="pearson")
cor.test(beta.diat$LCBD,beta.diat$Domi,method="pearson")


#Now, I need to create the predictors of environmental uniqueness

####Environmental uniqueness - All variables####
#LCEH = local contribution to enviromental heterogeneity
#env.s.diat = standardized environmental matrix
env.dist.diat<-vegdist(env.s.diat,"euclidean") 
LCEH.tot.diat<-LCBD.comp(env.dist.diat,sqrt.D = F)

####Environmental uniqueness - physical variables#### 
envPHY.dist.diat<-vegdist(env.s.diat[,1:10],"euclidean")
LCEH.PHY.diat<-LCBD.comp(envPHY.dist.diat,sqrt.D = F)

####Environmental uniqueness -water chemistry variables ####
envWC.dist.diat<-vegdist(env.s.diat[,11:15],"euclidean")
LCEH.WC.diat<-LCBD.comp(envWC.dist.diat,sqrt.D = F)

####Land use uniqueness#### 
#LCEH.LU = local contribution to land-use heterogeneity
#I am using arcsine square-root transformation here 
land.dist.diat<-vegdist(asin(sqrt(land.diat)),"euclidean") 
LCEH.LU.diat<-LCBD.comp(land.dist.diat,sqrt.D = F)


#Including the predictors in the beta.diat dataframe
beta.diat<-data.frame(beta.diat,LCEH_tot=LCEH.tot.diat$LCBD,
                                LCEH_phy=LCEH.PHY.diat$LCBD,
                                LCEH_wc=LCEH.WC.diat$LCBD,
                                LCEH_lu=LCEH.LU.diat$LCBD,
                                Forest=land.diat$Forest) 


#Correlations between predictors
cor(beta.diat[,6:10])
#LCEH_tot excluded due to its high correlation with LCEH_phy

#All remaining predictors have a VIF lower than 3
usdm::vifstep(beta.diat[,c(7:10)], th=3)


####Beta regression model LCBD####
#Predictors were arcsine square-root transformed to meet normality assumption
#Function 'scale' was used to generate standardized coefficients
LCBD.diat.model<-betareg(LCBD~
                              scale(asin(sqrt(LCEH_phy)))+
                              scale(asin(sqrt(LCEH_wc)))+
                              scale(asin(sqrt(LCEH_lu)))+
                              scale(asin(sqrt(Forest))),data=beta.diat)

plot(LCBD.diat.model)
summary(LCBD.diat.model)

#Spatial autocorrelation in the model residuals#
correlog.res.diat <- pgirmess::correlog(coords=beta.diat[,4:5], z=LCBD.diat.model$residuals, method="Moran",alternative="two.sided")
p.adjust(correlog.res.diat[,"p.value"],method = "bonferroni")

#Correlogram showed a significative negative autocorrelation in the ninth distance class
#Thus, we generated dbMEM's 
#and proceeded with a global test on the model residuals against all MEM variables
SWE.d <- adespatial::listw.candidates(beta.diat[,4:5],nb = "pcnm")  

set.seed(123)
adespatial::mem.select(x = LCBD.diat.model$residuals,listw= SWE.d[[1]], 
                       method = "FWD", MEM.autocor = "all", nperm = 9999, alpha = 0.05)

#Since both positive and negative eigenvectors were not significant, 
#we did not add spatial variables to the regression model



##########################.
#### SCBD per species ####

#object LCBD.diat contains also SCBD results 
LCBD.diat$SCBD

####Preparing SCBD matrix####
SCBD.diat<-data.frame(species=names(LCBD.diat$SCBD),SCBD=LCBD.diat$SCBD)
SCBD.diat<-SCBD.diat[order(SCBD.diat[,1]),]
SCBD.diat$species<-NULL
head(SCBD.diat)

#How many species have SCBD values higher than the mean
sum(SCBD.diat$SCBD>mean(SCBD.diat$SCBD))


SCBD.diat$Abund<-colSums(diat[,order(colnames(diat))])  #abundance of each species
SCBD.diat$Occurrence<-specnumber(diat[,order(colnames(diat))], MARGIN = 2) #occupancy of each species


####Correlations between SCBD and abudance or occupancy####
cor.test(SCBD.diat$SCBD,SCBD.diat$Abund) 
cor.test(SCBD.diat$SCBD,SCBD.diat$Occurrence) 


####Creating the trait predictors####
traits.diat<-read.table("diatoms_traits20_5.txt",header=TRUE,stringsAsFactors=TRUE)
head(traits.diat)
dim(traits.diat) #we need to exclude the four species with zero abundance

traits.diat<-traits.diat[order(traits.diat[,1]),]
traits.diat<-traits.diat[-excluir.sp.diat,] 

traits.diat[,"size_class"]=as.factor(traits.diat[,"size_class"])

row.names(traits.diat)<-traits.diat[,1]
traits.diat[,1]<-NULL

#Just checking whether species are correctly ordered in both data sets
all.equal(row.names(traits.diat),row.names(SCBD.diat))
which(row.names(traits.diat)!=row.names(SCBD.diat))


#Dissimilarity using Gower distance coefficient, followed by a PCOA
traits.diat.dist<-FD::gowdis(traits.diat)
pcoa.traits.diat<-capscale(traits.diat.dist~1)
head(summary(pcoa.traits.diat))



####Creating the taxonomic relatedness predictors####
taxo.diat<-read.table("diatoms_phylo20_5.txt",h=T,row.names=1,stringsAsFactors = FALSE)
head(taxo.diat)
dim(taxo.diat) #we need to exclude the four species with zero abundance

taxo.diat<-taxo.diat[order(row.names(taxo.diat)),]
taxo.diat<-taxo.diat[-excluir.sp.diat,] 


#as factor: this needs to be performed after excluding species  with abundance = zero. 
#Otherwise, there will be an error in function 'as.taxo' 
taxo.diat[,"Genus"]<-as.factor(taxo.diat$Genus)
taxo.diat[,"Family"]<-as.factor(taxo.diat$Family)
taxo.diat[,"Order"]<-as.factor(taxo.diat$Order)
taxo.diat[,"Subclass.Round."]<-as.factor(taxo.diat$Subclass.Round.)
taxo.diat[,"Class"]<-as.factor(taxo.diat$Class)
taxo.diat[,"Subdivision"]<-as.factor(taxo.diat$Subdivision)

taxo.diat<-as.taxo(taxo.diat) #function from package ade4

#Function 'as.taxo' alters the previously defined species order in the dataframe
#So, I reordered it
taxo.diat<-taxo.diat[order(row.names(taxo.diat)),] 
row.names(taxo.diat)


#Just checking whether species are correctly ordered in all data sets
all.equal(row.names(taxo.diat),row.names(SCBD.diat))
all.equal(row.names(taxo.diat),row.names(traits.diat))


#Taxonomic distances and PCOA
taxo.diat.dist<-adiv::dsimTaxo(taxo.diat,type="dissimilarity",method=1)
pcoa.taxo.diat<-capscale(taxo.diat.dist~1) 
head(summary(pcoa.taxo.diat))


#Dataframe with SCBD and PCOA axes traits/taxo
SCBD.diat$Ax1traits=scores(pcoa.traits.diat)$sites[,1]
SCBD.diat$Ax2traits=scores(pcoa.traits.diat)$sites[,2]

SCBD.diat$Ax1taxo=scores(pcoa.taxo.diat)$sites[,1]
SCBD.diat$Ax2taxo=scores(pcoa.taxo.diat)$sites[,2]

head(SCBD.diat)

#Correlations between predictors
cor(SCBD.diat)
usdm::vifstep(SCBD.diat[,4:7], th=3)


####Beta regression model SCBD####
SCBD.diat.model<-betareg(SCBD~
                              scale(Ax1traits)+
                              scale(Ax2traits)+
                              scale(Ax1taxo)+
                              scale(Ax2taxo), data=SCBD.diat)

plot(SCBD.diat.model)
summary(SCBD.diat.model)



##########################.
#### SCBD versus local and regional environmental predictors####

#Fourth corner analysis
library(ade4)

#beta.diat[,c(7:10)] = LCEH_phy, LCEH_wc, LCEH_lu, Forest cover

set.seed(1)
fourth.corner.diat<-fourthcorner(beta.diat[,c(7:10)],
                                 diat,
                                 data.frame(LCBD.diat$SCBD),
                                 modeltype = 6,nrepet = 2999)
summary(fourth.corner.diat)



#See every used package and their version
sessionInfo() 


