#load required packages
library(Hmisc)
library(tibble)
library(cli)
library(dplyr)
library(tidyverse)
library(corrplot)
library(MASS)
library(GGally)
library(car)
library(lme4)
library(ggpubr)
library(MuMIn) #https://uoftcoders.github.io/rcourse/lec09-model-selection.html
library(performance)
library(spdep)


# PREPARING DATA ----------------------------------------------------------
#load spreadsheet
data<-read.csv("D:/6299904/NTFS_Vol_TOSHIBA_EXT/GoodFiles/Australia/PhD/PUBLISHING/Environ Drivers Paper/Final submission/Data/environ_variables.csv")
LTR<-read.csv("D:/6299904/NTFS_Vol_TOSHIBA_EXT/GoodFiles/Australia/PhD/PUBLISHING/Environ Drivers Paper/Final submission/Data/liana_measures.csv")


data <- data %>%
  mutate(
    northness = cos(aspect),  # Cosine gives measure of 'northness' between -1 and 1
    eastness = sin(aspect),    # Sine gives measure of 'eastness' between -1 and 1
    slope2 = abs(slope),       # Absolute value of slope values to get rid of negatives
    fertility = Mg..mg.kg. + K..mg.kg. + Ca..mg.kg. + Na..mg.kg.  # Sum of exchangeable bases indicates fertility
  ) %>%
  dplyr::select(-aspect, -slope)


df<-merge(data, LTR, by="plot_name") #merge datasets

#clean up data
df <- df %>%
  mutate(
    bio12_310 = as.numeric(bio12_310),
    bio6_310 = as.numeric(bio6_310),
    bio15_310 = as.numeric(bio15_310),
    elevation = as.numeric(elevation),
    canopy.x = as.factor(canopy.x)
  ) %>%
  rename(
    canopy = canopy.x,
    P=P..mg.kg., 
    bio12=bio12_310,
    bio6=bio6_310,
    bio15=bio15_310)

#CORRELATION MATRIX
df2<-dplyr::select(df, Lat, Lon, elevation,  bio6, bio12, bio15, pH, P, fertility, slope2, northness, eastness, woody_n, woody_BA, LTR, rattan_n, tree_BA, rattan_BA)
summary(df2)
str(df2)
df2 <- mutate_all(df2, as.numeric)
corrplot(cor(df2), addCoef.col="white", number.cex=0.8, number.digits=1, diag=FALSE, bg="grey", 
         outline="black", addgrid.col="white", mar=c(1,1,1,1))
GGally::ggpairs(df2) #another way to visualize all correlations (and distributions)
cormat<-as.data.frame(cor(df2, method="pearson"))
cormat


# EXPLORING RELATIONSHIPS -------------------------------------------------
#Create scatterplots of each predictor and response variable. 
#Different colours for each canopy type to visualise potential interaction effect. 
ggplot(data=df, aes(x=bio12, y=LTR))+
  geom_point(aes(fill=canopy), pch=21, size=5, alpha=0.7, colour="black")+
  scale_fill_manual(values=c("#414487ff","#FDE725FF"))+
  labs(x="Bio12", y="Liana: tree ratio")+ #label axis 
  theme_classic(base_size=15)+
  guides(fill = 'none')


#STANDARDIZE DATA
df_scaled<- df %>% 
  mutate(bio12_scaled=scale(bio12), scale=TRUE) %>% 
  mutate(bio15_scaled=scale(bio15)) %>% 
  mutate(bio6_scaled=scale(bio6)) %>% 
  mutate(elevation_scaled=scale(elevation)) %>% 
  mutate(P_scaled=scale(P)) %>% 
  mutate(pH_scaled=scale(pH)) %>% 
  mutate(fertility_scaled=scale(fertility)) %>% 
  mutate(slope_scaled=scale(slope2)) %>% 
  mutate(northness_scaled=scale(northness)) %>% 
  mutate(eastness_scaled=scale(eastness))
df_scaled


# LTR FULL MODEL --------------------------------------------------------------
#intercorrelated variables removed and interaction term added
LTR.glb<-glm(LTR~canopy*bio12_scaled+P_scaled+fertility_scaled
             +slope_scaled+northness_scaled+eastness_scaled, 
             na.action="na.fail", data=df_scaled, family=Gamma(link="log"))
summary(LTR.glb)
performance(LTR.glb)
check_model(LTR.glb)

# DREDGE ------------------------------------------------------------------
dr_LTR<-dredge(LTR.glb, beta="none") #list all models
dr_LTR

# AVERAGED MODEL ----------------------------------------------------------
#Averaged model based on top-ranked models (delta AICc <2)
LTR_avg<-model.avg(dr_LTR, subset=delta<2, fit=TRUE)
summary(LTR_avg)
summary_LTR<-summary(LTR_avg) #check model summary
summary_LTR_df<-as.data.frame(summary_LTR$coefmat.full)
LTR_CI<-as.data.frame(confint(summary_LTR, full=TRUE)) #get confidence intervals
LTR_im_df<-as.data.frame(sw(summary(model.avg(dr_LTR, subset=delta<2)))) #get variable importance
LTR_im_df

# CHECK TOP RANKED MODELS -------------------------------------------------
#check all models with AICc<2
best<-get.models(dr_LTR, 1)[[1]] 
summary(best)
confint(best)
performance(best)

best2<-get.models(dr_LTR, 2)[[1]] 
summary(best2)
confint(best2)
performance(best2)

# WOODY VINE BASAL AREA FULL MODEL ----------------------------------------
LBA.glb<-glm(woody_BA~canopy+bio12_scaled+P_scaled+fertility_scaled
             +slope_scaled+northness_scaled+eastness_scaled,na.action="na.fail", data=df_scaled, family=Gamma(link="log"))
summary(LBA.glb) 
performance(LBA.glb)
check_model(LBA.glb)

# DREDGE ------------------------------------------------------------------
dr_LBA<-dredge(LBA.glb, beta="none") #list all models
dr_LBA

# AVERAGED MODEL ----------------------------------------------------------
LBA_avg<-model.avg(dr_LBA, subset=delta<2, fit=TRUE)
summary(LBA_avg)
summary_LBA<-summary(LBA_avg)
summary_LBA_df<-as.data.frame(summary_LBA$coefmat.full) #get summary
LBA_CI<-as.data.frame(confint(summary_LBA, full=TRUE)) #get confidence intervals
LBA_im_df<-as.data.frame(sw(model.avg(dr_LBA, subset=delta<2))) #get variable importance
LBA_im_df

# CHECK TOP-RANKED MODELS -------------------------------------------------
best<-get.models(dr_LBA, 1)[[1]]
summary(best)
performance(best)
#check all models with AICc<2

# WOODY VINE STEM DENSITY FULL MODEL --------------------------------------
LSD.glb<-glm.nb(woody_n~canopy*bio12_scaled+P_scaled+fertility_scaled
                +slope_scaled+northness_scaled+eastness_scaled, na.action="na.fail", data=df_scaled)
summary(LSD.glb)
model_performance(LSD.glb)
check_model(LSD.glb)


# DREDGE ------------------------------------------------------------------
dr_LSD<-dredge(LSD.glb, beta="none")
dr_LSD

# AVERAGED MODEL ----------------------------------------------------------
LSD_avg<-model.avg(dr_LSD, subset=delta<2, fit=TRUE, beta="none") #average all models with delta <2
summary_LSD<-summary(LSD_avg)
summary_LSD_df<-as.data.frame(summary_LSD$coefmat.full) #get model summary
LSD_CI<-as.data.frame(confint(summary_LSD, full=TRUE))
LSD_im_df<-as.data.frame(sw(summary(model.avg(dr_LSD, subset=delta<2)))) #get relative importance of variable
LSD_im_df

# CHECK TOP-RANKED MODELS -------------------------------------------------
best<-(get.models(dr_LSD, 1)[[1]]) 
summary(best)
performance(best)
#check all models with AICc<2

# RATTAN FULL MODEL -------------------------------------------------------
R.glb<-glm.nb(rattan_n~canopy*elevation_scaled+P_scaled+fertility_scaled
              +slope_scaled+eastness_scaled+northness_scaled, na.action="na.fail", data=df_scaled)
summary(R.glb) #explore output
check_model(R.glb)
model_performance(R.glb)


# DREDGE ------------------------------------------------------------------
dr_R<-dredge(R.glb, beta="none") #list all models
dr_R

# AVERAGED MODEL ----------------------------------------------------------
R_avg<-model.avg(dr_R, subset=delta<2, fit=TRUE, beta="none") #average all models with delta <2
summary_R<-summary(R_avg)
summary_R_df<-as.data.frame(summary_R$coefmat.full)#get summary
R_CI<-as.data.frame(confint(summary_R, full=TRUE)) #get confidence intervals
R_im_df<-as.data.frame(sw(model.avg(dr_R, subset=delta<2))) #get relative importance of variable
R_im_df

# CHECK TOP-RANKED MODELS -------------------------------------------------
best<-(get.models(dr_R, 1)[[1]]) 
summary(best)
performance(best)
#check all models with AICc<2

# MAKE COEFFICIENT PLOTS --------------------------------------------------
# LTR ---------------------------------------------------------------------
# prepare data ---------------------------------------------------
summary_LTR_df <- summary_LTR_df %>% 
  cbind(LTR_CI) %>%
  slice(-1) %>%
  mutate(term = c("Precipitation", "Disturbance", "Precipitation: Disturbance")) %>%
  rename(upr = `97.5 %`,
         lwr = `2.5 %`)

# coefficient plot --------------------------------------------------------
LTR_coef_plot<-ggplot(summary_LTR_df, aes(x = Estimate, y = term, xmin = lwr, xmax = upr)) +
  geom_errorbar(width = 0, linewidth=1.5) +
  geom_point(size=6, pch=21, colour="black", fill ="red") +
  labs(x = "", y = "") +
  xlim(-1,2)+
  geom_vline(xintercept=0, linetype="dashed")+
  ggtitle("Liana-tree ratio")+
  #annotate("text", label="(a)", x=-1, y=3.5, size=6, fontface="bold")+
  theme_classic(base_size=15)+
  theme(plot.title=element_text(face="bold", hjust=0.5))+
  geom_text(x=1.54, y = 1.2, label="***", size=8)+
  geom_text(x=0.36, y = 2.2, label="*", size=8)
LTR_coef_plot


# WOODY VINE BASAL AREA --------------------------------------------------------
# prepare data ---------------------------------------------------
summary_LBA_df <- summary_LBA_df %>% 
  cbind(LBA_CI) %>%
  slice(-1) %>%
  mutate(term=c("Precipitation", "Disturbance", "Total exchangeable bases")) %>%
  rename(upr = `97.5 %`,
         lwr = `2.5 %`)

# coefficient plot --------------------------------------------------------
LBA_coef_plot<-ggplot(summary_LBA_df, aes(x = Estimate, y = term, xmin = lwr, xmax = upr)) +
  geom_errorbar(width = 0, linewidth=1.5) +
  geom_point(size=6, pch=21, colour="black", fill ="red") +
  labs(x = "", y = "") +
  xlim(-1,2)+
  geom_vline(xintercept=0, linetype="dashed")+
  ggtitle("Woody vine basal area")+
  #annotate("text", label="(b)", x=-1, y=3.5, size=6, fontface="bold")+
  theme_classic(base_size=15)+
  theme(plot.title=element_text(face="bold", hjust=0.5))
LBA_coef_plot

# WOODY VINE STEM DENSITY --------------------------------------------------------
# prepare data ---------------------------------------------------
summary_LSD_df <- summary_LSD_df %>% 
  cbind(LSD_CI) %>%
  slice(-1) %>%
  mutate(term=c("Precipitation", "Disturbance", "Eastness")) %>%
  rename(upr = `97.5 %`,
         lwr = `2.5 %`)

# coefficient plot --------------------------------------------------------
LSD_coef_plot<-ggplot(summary_LSD_df, aes(x = Estimate, y = term, xmin = lwr, xmax = upr)) +
  geom_errorbar(width = 0, linewidth=1.5) +
  geom_point(size=6, pch=21, colour="black", fill ="red") +
  labs(x = "", y = "") +
  xlim(-1,2)+
  geom_vline(xintercept=0, linetype="dashed")+
  ggtitle("Woody vine stem density")+
  theme_classic(base_size=15)+
  #annotate("text", label="(c)", x=-1, y=3.5, size=6, fontface="bold")+
  theme(plot.title=element_text(face="bold", hjust=0.5))+
  geom_text(x=0.3, y = 3.2, label="**", size=8)+
  geom_text(x=0.64, y = 1.2, label="**", size=8)
LSD_coef_plot

# RATTAN STEM DENSITY --------------------------------------------------------
# prepare data ---------------------------------------------------
summary_R_df <- summary_R_df %>% 
  cbind(R_CI) %>%
  slice(-1) %>%
  mutate(term=c("Disturbance", "Elevation", "Phosphorous", "Northness", "Elevation: disturbance")) %>%
  rename(upr = `97.5 %`,
         lwr = `2.5 %`)

# coefficient plot --------------------------------------------------------
R_coef_plot<-ggplot(summary_R_df, aes(x = Estimate, y = term, xmin = lwr, xmax = upr)) +
  geom_errorbar(width = 0, linewidth=1.5) +
  geom_point(size=6, pch=21, colour="black", fill ="red") +
  labs(x = "", y = "") +
  xlim(-1,2.8)+
  geom_vline(xintercept=0, linetype="dashed")+
  ggtitle("Rattan stem density")+
  #annotate("text", label="(d)", x=-1, y=5.4, size=6, fontface="bold")+
  theme_classic(base_size=15)+
  theme(plot.title=element_text(face="bold", hjust=0.5))+
  geom_text(x=1.63, y = 1.3, label="***", size=8)
R_coef_plot

Fig2<-ggarrange(LTR_coef_plot, LBA_coef_plot, LSD_coef_plot, R_coef_plot, 
          nrow=2, ncol=2, align="v", labels=c("(a)", "(b)", "(c)", "(d)"), hjust=-5, font.label = list(size = 20, face = "bold"))
Fig2<-annotate_figure(Fig2, bottom=text_grob("Standardized coefficient", hjust=0.3, vjust=-0.2, face="bold", size = 20))
Fig2
# CHECKING FOR SPATIAL AUTOCORRELATION WITH MORANS I ----------------------
# CALCULATE RESIDUALS FOR EACH AVERAGED MODEL
#Create distance matrix
dist_matrix<-dist(df[,c("Lon", "Lat")])
dist_matrix

# Create a spatial weights matrix based on distance
neighbor_dist <- dnearneigh(df[,c("Lon", "Lat")], d1 = 0, d2 = 5, longlat=TRUE) #distance 5km
lw<-nb2listw(neighbor_dist, style="W", zero.policy=TRUE)

#LTR
d <- df %>% 
  bind_cols(predict(LTR_avg, backtransform=TRUE)) %>% #function to predict with MuMIn
  rename(y = "...22") #rename column of predicted values
with(d, plot(y ~ LTR)) #check plot

#get residuals and make dataframe including Lat, Long (for Moran's I) 
d <- d %>% 
  mutate(resid = LTR - y) #calculate residuals: actual - fitted values
resid_LTR<- d %>% 
  dplyr::select(resid, plot_name, LTR, Lon, Lat) #make dataframe of Lat, long and residuals

#Moran's I test of residuals
moran.test(resid_LTR$resid, lw, zero.policy=TRUE)
moran.plot(resid_LTR$resid, lw, zero.policy=TRUE) #check plot

#WOODY VINE BASAL AREA
d <- df %>% 
  bind_cols(predict(LBA_avg, backtransform=TRUE)) %>% #function to predict with MuMIn
  rename(y = "...22") #rename column of predicted values
with(d, plot(y ~ woody_BA)) #check plot

#get residuals and make dataframe including Lat, Long (for Moran's I) 
d <- d %>% 
  mutate(resid = woody_BA - y) #calculate residuals: actual - fitted values
resid_W_BA<- d %>% 
  dplyr::select(resid, plot_name, LTR, Lon, Lat) #make dataframe of Lat, long and residuals

#Moran's I test of residuals
moran.test(resid_W_BA$resid, lw, zero.policy=TRUE)
moran.plot(resid_W_BA$resid, lw, zero.policy=TRUE) #check plot

#WOODY VINE STEM DENSITY
d <- df %>% 
  bind_cols(predict(LSD_avg, backtransform=TRUE)) %>% #function to predict with MuMIn
  rename(y = "...22") #rename column of predicted values
with(d, plot(y ~ woody_n)) #check plot

#get residuals and make dataframe including Lat, Long (for Moran's I) 
d <- d %>% 
  mutate(resid = woody_n - y) #calculate residuals: actual - fitted values
resid_W_n<- d %>% 
  dplyr::select(resid, plot_name, LTR, Lon, Lat) #make dataframe of Lat, long and residuals

#Moran's I test of residuals
moran.test(resid_n_BA$resid, lw, zero.policy=TRUE)
moran.plot(resid_n_BA$resid, lw, zero.policy=TRUE) #check plot

#RATTAN STEM DENSITY
d <- df %>% 
  bind_cols(predict(R_avg, backtransform=TRUE)) %>% #function to predict with MuMIn
  rename(y = "...22") #rename column of predicted values
with(d, plot(y ~ rattan_n_2022)) #check plot

#get residuals and make dataframe including Lat, Long (for Moran's I) 
d <- d %>% 
  mutate(resid = rattan_n_2022 - y) #calculate residuals: actual - fitted values
resid_R<- d %>% 
  dplyr::select(resid, plot_name, LTR, Lon, Lat) #make dataframe of Lat, long and residuals

#Moran's I test of residuals
moran.test(resid_R$resid, lw, zero.policy=TRUE)
moran.plot(resid_R$resid, lw, zero.policy=TRUE) #check plot