#author: Dr. Sarah Kimball

#First set your working directory to the folder where you've stored the csv file:
#setwd()

#next, read in the csv file
rawdata <- read.csv("REC_SppMatrix.csv", header=TRUE)
str(rawdata)

#install and load necessary packages
library(vegan)
library(labdsv)
library(ade4)
library(ecodist)
library(fso)
library(vegclust)   
library(tidyverse)


#making a dataset that only contains the species matrix
species <- rawdata %>% select (-(Year.Sample.RestStat:Status.Num))
str(species)

#making a dataset that only includes the environmental details related to each quadrat, 
#such as the restoration status.  1=degraded, 2=restored, 3=intact
#ideally you will match up the plots with the PI data and add information regarding 
#% cover of different functional groups (ex:cover of non-native grasses) 
env <- rawdata  %>% select(-(ACMGLA:TRIGRA))
str(env)

#site=quadrat
#calculating some diversity metrics
### Gamma (regional) diversity
gamma <- sum(colSums(species) > 0)
gamma
### Alpha (per-site) diversity
alpha <- rowSums(species > 0)
alpha    # within-site
avgalpha <- mean(rowSums(species > 0))
avgalpha # average within-site
### Beta (among-site) diversity: Whittaker's
gamma    <- sum(colSums(species) > 0)
avgalpha <- mean(rowSums(species > 0))
beta     <- gamma / avgalpha - 1
beta
### Beta diversity: dust bunny indices
### 1 -- proportion of zeros in the matrix
###   (independent of abundance)
eps      <- .Machine$double.eps # machine tolerance
propzero <- sum(species < eps) / prod(dim(species))
cat('Proportion of zeros in matrix:', propzero, '\n')
### 2 -- "dust bunny index" of McCune and Root (2015)
###   (integrates abundances)
dbi <- 1 - mean(as.matrix(vegan::decostand(species, method='max')))
cat('Dust bunny index:', dbi, '\n')
### Beta-diversity: no-share sites
### how many site-pairs share no species in common?
z <- vegan::no.shared(species)
propnoshare <- sum(z) / length(z)
cat('Proportion of no-share sites:', propnoshare, '\n')

####    Ordination (unconstrained)    ####
#Here is the most simple way to run an ordination on a species matrix
#It's not the best way, because it uses a bunch of default settings that may not be appropriate for your data
m <- metaMDS(species)
# Sometimes the metaMDS does not find the same best solution twice, and in that case you cannot be sure that the found best solution really is the global optimum. If you are uncertain, you can continue the iterations from your current solution by giving the name of your solution in the argument previous.best:
m <- metaMDS(species, previous = m)
m

#palette code from Ecology R workshop
`get_palette` <- function() {
  pal <- c('#414487E6','#404688E6','#3F4889E6','#3E4989E6','#3E4C8AE6',
           '#3D4E8AE6','#3C508BE6','#3B528BE6','#3A548CE6','#39558CE6',
           '#38588CE6','#375A8CE6','#365C8DE6','#355E8DE6','#35608DE6',
           '#34618DE6','#33638DE6','#32658EE6','#31678EE6','#30698EE6',
           '#306A8EE6','#2F6C8EE6','#2E6E8EE6','#2D708EE6','#2C718EE6',
           '#2C738EE6','#2B748EE6','#2A768EE6','#2A788EE6','#297A8EE6',
           '#287C8EE6','#287D8EE6','#277F8EE6','#26818EE6','#26828EE6',
           '#25848EE6','#24868EE6','#24878EE6','#23898EE6','#228B8DE6',
           '#228D8DE6','#218F8DE6','#21908CE6','#20928CE6','#20938CE6',
           '#1F958BE6','#1F978BE6','#1F998AE6','#1F9A8AE6','#1E9C89E6',
           '#1F9E89E6','#1FA088E6','#1FA187E6','#20A386E6','#20A486E6',
           '#21A685E6','#22A884E6','#24AA83E6','#25AC82E6','#26AD81E6',
           '#28AE80E6','#2AB07FE6','#2DB27DE6','#2FB47CE6','#32B67AE6',
           '#34B679E6','#37B878E6','#3ABA76E6','#3DBC74E6','#40BD72E6',
           '#43BF71E6','#47C06FE6','#4AC16DE6','#4EC36BE6','#52C569E6',
           '#55C668E6','#59C864E6','#5DC863E6','#60CA60E6','#65CB5EE6',
           '#68CD5BE6','#6DCD59E6','#71CF57E6','#75D054E6','#7AD151E6',
           '#7FD34EE6','#83D44CE6','#87D549E6','#8CD646E6','#90D743E6',
           '#95D840E6','#9AD93CE6','#9FDA3AE6','#A3DA37E6','#A8DB34E6',
           '#ADDC30E6','#B2DD2DE6','#B7DE2AE6','#BBDF27E6')
  return(pal)
}

`colvec` <- function(x) {
  pal <- get_palette()
  return(pal[cut(as.numeric(x), breaks=length(pal), include.lowest=TRUE)])
}

#plotting the species in each site:
vegan::tabasco(species, col=get_palette())

#Another method of conducting an ordination is to first calculate a distance (aka dissimilarity) matrix.  
#This method is preferred because it gives you more control
#for example, since we're using presence/absence data, we can select binary=true

species.distance <- vegdist(species, method="bray", binary=TRUE)
#next we run the ordination on this matrix
#here I'm asking for a 2-dimensional solution:
ord.2D <- metaMDS(species.distance, try=c(50,500),k=2,wascores = T,autotransform = F, distance="bray")
#the stress is kind of high, so let's try a 3d solution:
ord <- metaMDS(species.distance, try=c(50,500),k=3,wascores = T,autotransform = F, distance="bray")

stressplot(ord)
#here's how to investigate relationships between numerical env. variables and ord space
head(env)
fit <- envfit(ord,env = env[,2:5], display = 'sites', perm=9999, choices = c(1,2))
head(fit)
#here's the output that you'll care about, demonstrating which env variables are significantly related to ord space
#ignore page number, but include the results of year and status in your results:
fit$vectors

#here's how to determine what species are significantly related to ord space
spp.fit <- envfit(ord, species, permutations = 999)
head(spp.fit)
#this table is what you should include in your results:
spp.fit$vectors

#to plot species in ord space:
spp.arrows <- as.data.frame(spp.fit$vectors$arrows*sqrt(spp.fit$vectors$r)/2)
spp.arrows

#to plot env variables as arrows
env.arrows <- as.data.frame(fit$vectors$arrows*sqrt(fit$vectors$r)/2)
env.arrows

#Create a dataframe with the ord results for plotting in ggplot
sites_scores <- as.data.frame(scores(ord, display = "sites")) #save NMDS results into dataframe
#######spp_cor <- as.data.frame(spp_cor)
sites_scores <- cbind(sites_scores, Status = env$Status) #add grouping variable "Status" to dataframe
sites_scores <- cbind(sites_scores, Year = env$Year) #add Year variable if you want to display on plot

head(sites_scores)


#Create a dataframe with the species scores
spp.scrs <- cbind(spp.arrows, Species = rownames(spp.arrows))
spp.scrs <- cbind(spp.scrs, pval=spp.fit$vectors$pvals)
sig.spp.scrs <- spp.scrs[spp.scrs$pval < 0.05,]
sig.spp.scrs


#plot the ordination
#this plots the sites in ordination space based on their degree of dissimilarity
#This plot also adds vectors for all species that are significantly related to ordination space
recruitment.mds<- ggplot(sites_scores, aes(x=NMDS1, y=NMDS2))+ 
  geom_point(aes(NMDS1, NMDS2, shape = factor(Status), colour = factor(Status)), size = 4)+   
  coord_fixed()+
  geom_segment(data = sig.spp.scrs,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),arrow=arrow(length = unit(0.25, "cm"))) +
  geom_text(data = sig.spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),size = 3)+
  scale_colour_grey(start = 0.3, end = 0.7)+
  theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid"))+
  labs(colour = "Status", shape = "Status")+ 
  theme(legend.position = "right", legend.text = element_text(size = 12), legend.title = element_text(size = 12), axis.text = element_text(size = 10)) 

recruitment.mds

#here are some other, more simple plots:
plot(ord, "sites")   # Produces distance 
orditorp(ord, "sites")   # Gives points labels

#Here's another method for correlating species with ord space:
spp_cor <-
  cor(species,
      ord$points,
      use = "complete.obs",
      method = "pearson")
#viewing the correlation matrix
view(spp_cor)
#printing the file with the Pearson correlation scores
write.csv(spp_cor, file = "spp_PearsonCor.csv")


## Display of Factors
#This plots the ordination with the plots color-coded by restoration status.
plot(scores(ord), pch=16, col=colvec(env$Status.Num))
ordihull(ord, env$Status_Num, col = "pink")
#you can change the color if you want
ordihull(ord, env$Status_Num, col = "blue")
#but there's no legend.  

#Here I'm using ggplot  In this plot, I'm also including a different shape for year
#this would be a visual way to demonstrate no sig effect of year

Tmt_colors <- c("#FDB309", "#5E4780", "#4E6135")
colors.recruitment.mds<- ggplot(sites_scores, aes(x=NMDS1, y=NMDS2))+ 
  geom_point(aes(NMDS1, NMDS2, shape = factor(Year), colour = factor(Status)), size = 4)+   
  coord_fixed()+
  #geom_segment(data = sig.scrs,
  #aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),arrow=arrow(length = unit(0.25, "cm"))) +
  #geom_text(data = sig.scrs, aes(x = NMDS1, y = NMDS2, label = Species),size = 3)+
  theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid"))+
  labs(colour = "Status", shape = "Year", x = "Axis 1", y = "Axis 2")+ # add legend labels for Management and Landuse
  theme(legend.position = "right", legend.text = element_text(size = 12), legend.title = element_text(size = 12), axis.text = element_text(size = 10)) +
  scale_color_manual(values = Tmt_colors, labels = c("Degraded","Intact","Restored")) +
  stat_ellipse(aes(NMDS1, NMDS2, colour = factor(Status)), size = 1)

colors.recruitment.mds
#this is the main plot we want to show

# The plots of factor fitting will only show the class centroids. We may be interested in seeing the variation or scatter of class members. Some commands add graphical descriptions of the items into an existing plot:
plot(scores(ord), pch=16, col=colvec(env$Status.Num))
ordispider(ord, env$Year, col = "skyblue") # Function ordispider connects class members to their centroid with lines
ordihull(ord, env$Status.Num, col = "pink") # ordihull draws a convex hull enclosing all points
#below are different options of how to display that the sites are clumped in ord space based on their status
#don't use all of these:  just pick one.  I like the ordihull best, followed by elipse, but it's up to you
ordiellipse(ord, env$Status.Num) 
ordiellipse(ord, env$Status.Num, kind = "se", conf = 0.95, col = "red") # ordiellipse draws (in this case) 95 % confidence ellipses around class centroids. If these confidence ellipses do no overlap, the classes probably are significantly different at level P ≤ 0.05.

## 2.2  Fitting Vectors

# Vector fitting implies a linear trend surface. In the following graph, you can estimate the the relative thickness of A1 horizon in different plots:
plot(ord, dis = "sites", type = "t") 
ef <- envfit(ord ~ Status.Num, env)  
plot(ef, add = TRUE) # The arrow shows the direction of the (increasing) gradient, and the length of the arrow is proportional to the correlation between the variable and the ordination.
ef
yf <- envfit(ord ~ Year, env)  
plot(yf, add = TRUE) # The arrow shows the direction of the (increasing) gradient, and the length of the arrow is proportional to the correlation between the variable and the ordination.
yf


### Visualizing ordinations
# color vector for plotting
u <- get_palette()
u <- u[1:nrow(species)]

plot(ord$points, type='n', xlab='NMDS1', ylab='NMDS2')
text(ord$points, rownames(ord$points),cex=.8, col=u)



plot(scores(ord), pch=16, col=colvec(env$Status.Num))
### overlay gradients on the NMS using point colors

plot(scores(ord), pch=16, col=colvec(env$Status.Num))
plot(scores(ord), pch=16, col=colvec(env$Year))
par(mfrow=c(1,1))
### overlay gradients on the NMS by fitting a GAM surface
f1 <- ordisurf(ord ~ env$Status.Num, plot = FALSE)
plot(scores(ord), pch = 16, col = colvec(env$Status.Num))
plot(f1, add = TRUE, col = 1, lwd = 2)

#this is the end of code showing different ways to plot the ordination


### Test for difference in community compositions
### permanova: test for differences in multivariate *centroid*
a1 <- adonis(species.distance ~ env$Status.Num, permu=999)
a1





