#This is for performing a dbRDA and for selecting environmental variables

library(vegan)
library(ggplot2)

rm(list = ls())

#load data

env <- read.table("data/trans_env.txt", header = T, sep = "\t")
env2 <- env[, -c(1:6)]

crust <- read.table("data/trans_crust.txt", header = T, sep = "\t")
crust2 <- crust[, -c(1:6)]

#RDA for all explanatory variables----
#perform the complete model with all variables and test if it 
#is significant (if not, forward selection not must be done)

rda.com <- dbrda(crust2 ~ ., env2, distance = "bray") # model including all variables

anova.cca(rda.com, permutations = 999) #Test if the model is significant

#Forward selection of explanatory variables ----

#adjusted R2 for stopping criterion
adjR2.rda <- RsquareAdj (rda.com)$adj.r.squared 

#model containing only species matrix and intercept
rda.0 <- dbrda(crust2 ~ 1, env2, distance = "bray") 

#perform the forward selection of the variables
env.sel <- ordiR2step (rda.0, scope = formula (rda.com), 
                       R2scope = adjR2.rda, 
                       direction = 'forward', permutations = 999)
summary(env.sel)
env.sel$anova

plot(env.sel)

#selected variables
anova.cca(env.sel, permutations = 999)


#Extract selected explanatory variables ----

selected <- cbind(env[,1:6], env$temp, env$depth, env$NO3, env$CHLA, env$SO4,
                  env$cond, env$richness, env$ALK, env$vflo, env$pH,
                  env$NO2, env$vsub, env$oxy)

colnames(selected)[7:19] <- c("temp", "depth", "NO3", "CHLA", "SO4", "cond",
                              "richness", "ALK", "vflo", "pH", "NO2", "vsub", 
                              "oxy")

write.table(selected, file = "data/env_sel_dbRDA.txt", sep = "\t",
            row.names = F, col.names = T)


#Plotting the RDA ----

#extract, sp, env and sites data for plotting with ggplot
#add species scores
sppscores(env.sel) <- crust2

scores <- summary(env.sel)

sp <- as.data.frame(scores[["species"]][,1:2])
var <- as.data.frame(scores[["biplot"]][,1:2])
st <- as.data.frame(scores[["sites"]][,1:2])

#indicator species
ind.sp <- readRDS("data/ind_sp.rds")

#extract the indicator species from community matrix to plot the RDA
species <- as.data.frame(ind.sp[["sign"]])

species$sp <- row.names(species)

selected2 <- data.frame(subset(species$p.value, 
                               species$p.value <0.05))

names<- subset(species$sp, species$p.value <0.05)

row.names(selected2) <- names

#subset sp data by indicator species
sp <- sp[row.names(selected2), ]

#add groups data to sites data
st <- cbind(st, env[,1:6])

#create convex hulls for the groups (habitat)
r <- st[st$habitat == "rice", ][chull(st[st$habitat == "rice", c("dbRDA1", "dbRDA2")]), ]
m <- st[st$habitat == "marsh", ][chull(st[st$habitat == "marsh", c("dbRDA1", "dbRDA2")]), ]
l <- st[st$habitat == "limnocrene", ][chull(st[st$habitat == "limnocrene", c("dbRDA1", "dbRDA2")]), ]

hull.data <- rbind(r, m, l)

#create the plot

p <- ggplot() + 
  geom_point(data=st, aes(x=dbRDA1, y=dbRDA2, shape=time, 
                          color=habitat)) +
  scale_shape_manual(values=c(16, 1, 17, 2)) +
  scale_colour_manual(values=c("#0072b2", "#009e73", "#d55e00")) +
  scale_fill_manual(values=c("#0072b2", "#009e73", "#d55e00")) +
  geom_point(data=sp, aes(x=dbRDA1, y=dbRDA2), color= "black", shape = 15) + 
  geom_text(data=sp, aes(x=dbRDA1, y=dbRDA2), label=rownames(sp)) +
  geom_segment(data=var, aes(x=0, y=0, xend=dbRDA1, yend=dbRDA2), 
               arrow = arrow(length=unit(.3, 'cm'))) + 
  geom_text(data=var, aes(x=dbRDA1, y=dbRDA2), label=rownames(var)) +
  geom_polygon(data=hull.data, aes(x=dbRDA1, y=dbRDA2, fill = habitat),
               alpha=0.10) +
  xlab("dbRDA 1 (9.8 %)") +
  ylab("dbRDA 2 (8.4 %)")

print(p)

ggsave(p, file = "results/dbRDA.pdf", width = 15, height = 10)
 
#This is done