#This is for the analysis of environmental data

library(psych)
library(corrplot)
library(ggplot2)
library(ggfortify)

rm(list = ls())

#load data

env <- read.table("data/raw_env.txt", header = T, sep = "\t")
env2 <- env[,-c(1:6)]

#summary data

mean.dat <- t(as.data.frame(lapply(env2, mean)))
sd.dat <- t(as.data.frame(lapply(env2, sd)))
min.dat <- t(as.data.frame(lapply(env2, min)))
max.dat <- t(as.data.frame(lapply(env2, max)))

sum.dat <- cbind(mean.dat, sd.dat, min.dat, max.dat)
colnames(sum.dat) <- c("Mean", "SD", "Minimum", "Maximum")

write.table(sum.dat, file = "results/summary.csv", sep = "\t",
            row.names = T, col.names = T)

#Transformation of the data ----

#Histograms of each variable to see if they need transformation
loop.vector <- colnames(env2)
test <- list()
test.p <- list()
for (i in loop.vector) {
  
  x <- env2[,i]
  
  hist(x, main = i) #histogram of each variable
  
  output <- shapiro.test(x) #normality test for each variable (if p > 0.05, no transformation)    
  test[[i]] <- output
  test.p[[i]] <- output[["p.value"]]
}

#log transformation
loop.vector2 <- colnames(env2[,c(1:12, 14:17)])
env.t <- list()
for (i in loop.vector2) {
  
  x <- env2[,i]
  
  y <- log(x)
  
  env.t[[i]] <- y
}

#for proportions
loop.vector3 <- colnames(env2[,18:21])
for (i in loop.vector3) {
  
  x <- env2[,i]
  
  y <- asin(sqrt(x))
  
  env.t[[i]] <- y
}

#transform the list to a df
env.t <- as.data.frame(env.t)

env.t$pH <- env2$pH
env.t <- env.t[, c(1:12, 21, 13:20)]

#eliminate the correlated variables (0,8 (-0,8) as criterion for elimination)

cor.matrix <- corr.test(env.t, method = "spearman", alpha = 0.05, ci = F)
corr <- cor.matrix[["r"]]
p.mat <- cor.matrix[["p"]]
p.mat[lower.tri(p.mat)] <- t(p.mat)[lower.tri(p.mat)] #to get only the adjusted p.Values symmetrically over the plot

p.mat[lower.tri(p.mat, diag = T)] <- 1 #to set the lower triangle to 1

pdf(file = "results/corplot.pdf", width = 12, height = 9)

cor.plot <- corrplot.mixed(corr,
                           order = 'original', 
                           lower = "color",
                           upper = "number",
                           tl.pos = "lt",
                           p.mat = cor.matrix$p,
                           insig = "label_sig",
                           tl.col = "black",
                           tl.cex = 0.7,
                           pch.cex=2, 
                           number.font = 1,
                           number.cex=0.7,
                           addCoef.col = 'black'
)

dev.off()


#we decided to delete: CL, sal, sat
env.t <- env.t[, -c(3, 15, 17)]

#Save the transformed df for environmental variables
env.t <- as.data.frame(c(env[,1:6], env.t))

write.table(env.t, file = "data/trans_env.txt", sep = "\t",
            row.names = F, col.names = T)

#Do a PCA with all variables ----
data.pca <- prcomp(env.t[,7:24], scale. = T)

plot1 <- autoplot(data.pca, data = env.t, colour= 'habitat', shape="time", 
                  size=3,
                  frame = TRUE, 
                  frame.colour = 'habitat',
                  loadings = TRUE, 
                  loadings.label = TRUE, 
                  loadings.label.size  = 5,
                  loadings.colour = "black",
                  loadings.label.colour="black",
                  loadings.label.repel=TRUE) +
  scale_colour_manual(values=c("#0072b2", "#009e73", "#d55e00")) +
  scale_fill_manual(values=c("#0072b2", "#009e73", "#d55e00")) +
  scale_shape_manual(values=c(16, 1, 17, 2)) +
  theme_bw()

print(plot1)

ggsave(plot1, file = "results/pca_env.pdf", width = 9, height = 6)

#This is done