# Conceptual framework
# 
# We investigate how trait variation at different scales (of different quality) influence the observed functional structure and inferred assembly process.
# 
# 
# For this, we use 6 methods to measure community trait patterns; each method uses trait variation of different 'quality', from best to poorest:
#   
# M4. TPD considering species level traits (Plants and Ants)
# 
# M5. TPD using single average trait values (bandwidth method), species level (Plants and Ants)
# 
# M6. Convex hulls, global average (Plants and Ants)
# 
# 
##############################################################################################
##############################################################################################
############################################################ 
# (1) DATA TRANSFORMATION AND REPLACE NAs
##########################################################
library(TPD)
library(FD)
source("Rao.for.dissim.R") 
source("melodic.Rao.R") 
# Load raw trait dataset for continuous morpho traits at individual level. NB: all traits (except 'size') are already size-corrected.
traits.raw <- read.csv("ant.traits.csv", row.names = 9)
traits.raw <- traits.raw[,1:8] # extract morpho traits only
traits.raw <- traits.raw[,c(2:8,1)] # move sp column to the end
#
#####
# LOG TRANSFORM AND SCALE DATA
#####
# Log transform data and also scale to have mean 0 and unit variance
scaled.values <- apply(log(traits.raw[,1:7]), MARGIN=2, scale) 
traits <- traits.raw
traits[,1:7] <- scaled.values[,1:7] # put scaled values in traits dataset. from here onwards we will use this transformed data in 'traits'
#
#####
# REMOVE INDIVIDUALS WITH >3 NAs; REPLACE REMAINING NAs WITH SPECIES MEANS
#####
# In the steps below we remove individuals with >3 NAs, and for the remaining individuals with NAs, we impute the mean value of that trait for their species. We will do this with a copy of 'traits' (traits2). 
traits2 <- traits # make copy
maxNA <- 3 # set maximum no. of NA we will tolerate.
rowsRemove <- which(rowSums(is.na(traits)) > maxNA) ### Remove individuals which exceed maxNA
traits2 <- traits2[-rowsRemove,] # take out individuals which exceed maxNA
# Now, this function fills in NAs of individuals with the means of their sp for that trait.      
for (i in which(sapply(traits2, is.numeric))) {
  for (j in which(is.na(traits2[, i]))) {
    traits2[j, i] <- mean(traits2[traits2[, "sp"] == traits2[j, "sp"], i],  na.rm = TRUE)
  }
}
#
########
# SUMMARISE SPECIES MEAN TRAIT VALUES FOR PCA
#####
traits.sp <- aggregate.data.frame(traits2[1:7], by = traits2['sp'], FUN = mean, na.rm = T) 
row.names(traits.sp) <- traits.sp[,1] # make sp the rownames
traits.sp <- traits.sp[,2:8] # get rid of the sp column
#
#####
# SUMMARISE SPECIES ABUNDANCES IN COMMUNITIES 
#####
abun_ants.raw <- read.csv("ant.freq.csv") # load abundance dataset
abun_ants <- abun_ants.raw[,2:30] # identify the abundances
invasionAnts <- abun_ants.raw$treatment
row.names(abun_ants) <- abun_ants.raw$sites # identify the sites

envir <- read.csv("env.data.csv", row.names = 1) # load abundance dataset
envir <- envir[rownames(abun_ants),]
envir$invasionAnts <- invasionAnts

########################################################
# (2) USE PCA TO PREDICT NEW TRAIT VALUES FOR ALL INDIVIDUALS
##########################################################
# Run PCA
pca.sp <- princomp(traits.sp, cor=T) 
ev <- pca.sp$sdev^2
ev #  suggests to keep Comp.1 & 2
summary(pca.sp) # also suggests to keep Comp. 1 & 2
biplot(pca.sp)
# Predict new values for all individuals
traits.pca <- as.data.frame(predict(pca.sp, traits2[,1:7])) # make a new dataframe with the predicted trait values for each individual
traits.pca$sp <- traits2$sp # put the sp column in
traits.pca <- traits.pca[,c(1:2,8)] # Keeping just Comp.1 and 2.

######################################################################
#### M4. TPDs using all individuals of species (no populations). ####
##########
# TPDs by species.
TPDs_ants<- TPDs(species = traits.pca$sp, traits.pca[,1:2], alpha = 0.99) 
sapply(TPDs_ants$TPDs, sum) # check that all = 1
# TPDc
TPDc_ants <- TPDc(TPDs = TPDs_ants, sampUnit = abun_ants)
sapply(TPDc_ants$TPDc$TPDc, sum) # check that all = 1
# TPD FD indices from TPDs by species.
FDtpd <- REND(TPDc = TPDc_ants) 
M4_summary <- cbind.data.frame(FDtpd$communities$FRichness, FDtpd$communities$FEvenness, FDtpd$communities$FDivergence) # make a summary of the data.
names(M4_summary) <- c("FRic", "FEve", "FDiv") # add names

dissim_ants <- dissim(TPDs_ants) # required for Rao
dissim_ants$species <- dissim_ants$populations
dissim_ants$populations <- NULL
Rao_ants <- Rao(diss = dissim_ants, TPDc = TPDc_ants, regional=F) # calculate Rao. 
M4_summary$Rao <- Rao_ants$alpha_rao # add Rao into summary
alpha <- list()
alpha$M4 <- M4_summary


##########
# M5a. TPD using single average trait values (TPDsMean method), species level (ANTS)
# In this case, I will make the TPD using the TPDsMean function, assigning to each species and trait bandwidth
# value estimated using the ks::Hpi.diag
speciesMeans <- as.data.frame(matrix(data = NA, nrow = length(unique(traits.pca$sp)),
                                     ncol = 2, 
                                     dimnames = list(unique(traits.pca$sp),
                                                     c("Comp.1", "Comp.2"))))
speciesMeans[, "Species"] <- unique(traits.pca$sp)
Comp.1Aux <- tapply(traits.pca$Comp.1, traits.pca$sp, mean, na.rm=T)
speciesMeans[, "Comp.1"] <- Comp.1Aux[rownames(speciesMeans)]
Comp.2Aux <- tapply(traits.pca$Comp.2, traits.pca$sp, mean, na.rm=T)
speciesMeans[, "Comp.2"] <- Comp.2Aux[rownames(speciesMeans)]

speciesMeans[, "sdComp.1"] <- speciesMeans[, "sdComp.2"] <- NA
for(i in 1:length(unique(speciesMeans$Species))){
  spSel <-  as.character(unique(speciesMeans$Species)[i])
  traitsSel <- subset(traits.pca, traits.pca$sp == spSel)
  speciesMeans[spSel, c("sdComp.1", "sdComp.2")] <- sqrt(diag(Hpi.diag(traitsSel[, c("Comp.1", "Comp.2")])))
}

TPDsMean_ants_sps<- TPDsMean(species = speciesMeans$Species, means=speciesMeans[,c("Comp.1", "Comp.2")], 
                             sds = speciesMeans[,c("sdComp.1", "sdComp.2")],
                             alpha=0.99)
sapply(TPDsMean_ants_sps$TPDs, sum) #check all = 1

# TPDc for all communities from TPDs by species.
TPDc_ants_meansp <- TPDc(TPDs = TPDsMean_ants_sps, sampUnit = abun_ants) 
sapply(TPDc_ants$TPDc$TPDc, sum) #check all = 1
# plotTPD(TPD=TPDc_ants_meansp) # see plots
#
# TPD FD indices from TPDs by species.
FDtpd <- REND(TPDc = TPDc_ants_meansp) 
M5a_summary <- cbind.data.frame(FDtpd$communities$FRichness, FDtpd$communities$FEvenness, FDtpd$communities$FDivergence) # make a summary of the data.
names(M5a_summary) <- c("FRic", "FEve", "FDiv") # add names

dissim_ants <- dissim(TPDsMean_ants_sps) # required for Rao
dissim_ants$species <- dissim_ants$populations
dissim_ants$populations <- NULL
Rao_ants <- Rao(diss = dissim_ants, TPDc = TPDc_ants_meansp, regional=F) # calculate Rao. 
M5a_summary$Rao <- Rao_ants$alpha_rao # add Rao into summary
alpha$M5a <- M5a_summary

##########
# M5b. TPD using single average trait values (TPDsMean method), species level (ANTS)
# In this case, I will make the TPD using the TPDsMean function, assigning the same
# bandwidth to all the species IN THE POOL. 
speciesMeans <- as.data.frame(matrix(data = NA, nrow = length(unique(traits.pca$sp)),
                                     ncol = 2, 
                                     dimnames = list(unique(traits.pca$sp),
                                                     c("Comp.1", "Comp.2"))))
speciesMeans[, "Species"] <- unique(traits.pca$sp)
Comp.1Aux <- tapply(traits.pca$Comp.1, traits.pca$sp, mean, na.rm=T)
speciesMeans[, "Comp.1"] <- Comp.1Aux[rownames(speciesMeans)]
Comp.2Aux <- tapply(traits.pca$Comp.2, traits.pca$sp, mean, na.rm=T)
speciesMeans[, "Comp.2"] <- Comp.2Aux[rownames(speciesMeans)]

# now, for each community, define bandwidth (standard deviation)
speciesMeans$sdComp.1 <- speciesMeans$sdComp.2 <- rep(NA, nrow(speciesMeans))
speciesMeans[, "sdComp.1"] <- 0.5*apply(speciesMeans[, c("Comp.1", "Comp.2")], 2, sd)[1]
speciesMeans[, "sdComp.2"] <- 0.5*apply(speciesMeans[, c("Comp.1", "Comp.2")], 2, sd)[2]
##### NOTE: THESE VALUES GREATLY OVERESTIMATE THE SD WITHIN SPECIES, LEADING TO ODD RESULTS-
# Using the Hpi.diag approach is actually not much different:
# 0.5SD approach from Lammana et al makes things even worse.
# speciesMeans[, "sdComp.1"] <- sqrt(diag(Hpi.diag(speciesMeans[, c("Comp.1", "Comp.2")])))[1]
# speciesMeans[, "sdComp.2"] <- sqrt(diag(Hpi.diag(speciesMeans[, c("Comp.1", "Comp.2")])))[2]

# mean(tapply(traits.pca[,1], traits.pca$sp, sd))
# mean(tapply(traits.pca[,2], traits.pca$sp, sd))

TPDsMean_ants_sps<- TPDsMean(species = speciesMeans$Species, means=speciesMeans[,c("Comp.1", "Comp.2")], 
                               sds = speciesMeans[,c("sdComp.1", "sdComp.2")],
                               alpha=0.99)
sapply(TPDsMean_ants_sps$TPDs, sum) #check all = 1

# TPDc for 40 communities from TPDs by species.
TPDc_ants_meansp <- TPDc(TPDs = TPDsMean_ants_sps, sampUnit = abun_ants) 
sapply(TPDc_ants$TPDc$TPDc, sum) #check all = 1
# plotTPD(TPD=TPDc_ants_meansp) # see plots
#
# TPD FD indices from TPDs by species.
FDtpd <- REND(TPDc = TPDc_ants_meansp) 
M5b_summary <- cbind.data.frame(FDtpd$communities$FRichness, FDtpd$communities$FEvenness, FDtpd$communities$FDivergence) # make a summary of the data.
names(M5b_summary) <- c("FRic", "FEve", "FDiv") # add names

dissim_ants <- dissim(TPDsMean_ants_sps) # required for Rao
dissim_ants$species <- dissim_ants$populations
dissim_ants$populations <- NULL
Rao_ants <- Rao(diss = dissim_ants, TPDc = TPDc_ants_meansp, regional=F) # calculate Rao. 
M5b_summary$Rao <- Rao_ants$alpha_rao # add Rao into summary
alpha$M5b <- M5b_summary


##########
# M6. Convex-hull and Rao using global means (no TPD)
# 
traits.sp <- aggregate.data.frame(traits.pca[, c("Comp.1", "Comp.2")], by = traits.pca['sp'], FUN = mean, na.rm = T) 
row.names(traits.sp) <- traits.sp[,'sp']
traits.sp <- traits.sp[,-1]
# Now we use dbfd to calculate FD indices
# First, we need to create a matrix of populations x comms and fill with the relative abudances:
FDdbfd_sps <- dbFD(traits.sp, abun_ants, 
                   w.abun = T, stand.x = F, calc.FRic = T, print.pco = T) 
# Use melodic.rao to caclulate Rao.
dist_mat <- compute_dist_matrix(traits.sp, metric = 'euclidean') # make euclidean distance matrix.
dist_mat.scaled <- apply(dist_mat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) # scale distance matrix from 0 to 1.
Raospsxcomm <- melodic.rao(samp=abun_ants, dis=dist_mat.scaled) # i just call it RaoM to remind me it is your melodic.rao 
#
# Make summary of FD indices from dbFD, add RaoM too. 
M6_summary <- cbind.data.frame(FDdbfd_sps$FRic, FDdbfd_sps$FEve, FDdbfd_sps$FDiv, Raospsxcomm$rao) # make summary
names(M6_summary) <- c("FRic", "FEve", "FDiv", "Rao") # change names
alpha$M6 <- M6_summary

########## ############## SAVING RESULTS
saveRDS(alpha, "alphaResultsAnts.rds")
