############################################

# Integrating landscape ecology into generic surveillance plans for wood-boring beetles 
# Ecological Applications
# Nardi et al. 2025

# R Script for reproducing analyses 

# This script is licensed under the Creative Commons Attribution 4.0 International License.
# https://creativecommons.org/licenses/by/4.0/
# You are free to use, modify, and share this script,
# but must give appropriate credit to the original author.

# Author: Davide Nardi, PhD
# Date: 03/09/2025
############################################

##### 0. SETUP: Load libraries and initialize environment #####

# Ensure required packages are installed
required_packages <- c(
  "dplyr", "data.table", "tibble", "stringr",      # Data manipulation
  "bipartite", "vegan", "tnet", "betapart", "BAT", # Biodiversity / ecology
  "iNEXT", "iNEXT.4steps", 
  "lme4", "lmerTest", "DHARMa", "car",  # Statistics & modeling
  "effects", "visreg", "emmeans", "broom",
  "sf", "raster", "landscapemetrics",              # Spatial & landscape
  "ggplot2", "ggrepel", "grid",  "ggeffects",                  # Visualization
  "sjmisc", "sjPlot"
)

# Install any missing packages
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load packages
invisible(lapply(required_packages, library, character.only = TRUE))

# Reproducibility
set.seed(1234)

##### 1. LOAD ENVIRONMENTAL VARIABLES AND COMPUTE URBANIZATION INDEX #####

# Load environmental variables
env.var <- read.table("Data_published/env_data_trapscale.txt", sep = "\t", header = TRUE)

# Rescale configuration variable
env.var$configuration <- scales::rescale(env.var$configuration, to = c(0, 100))

# Compute urbanization index as Euclidean distance from a natural reference point
ref <- c(100, 100)
env.var$urbanization_index <- sqrt((env.var$composition - ref[1])^2 + (env.var$configuration - ref[2])^2)
env.var$urbanization_index <- scales::rescale(env.var$urbanization_index, to = c(0, 100))

##### 2. LOAD AND AGGREGATE BIOLOGICAL DATA #####

# Load species-level data
data_3 <- read.table("Data_published/bio_data_trapscale.txt", sep = "\t", header = TRUE)

# Aggregate counts (N = total individuals, S = species richness)
data_3 <- data_3 %>%
  group_by(SITE, CELL_ID, STATUS) %>%
  summarise(N = sum(N), S = n(), .groups = "drop") %>%
  left_join(env.var, by = c("SITE", "CELL_ID"))

##### 3. TRAP SCALE MODELS #####

## 3.1 Species Richness Models ##

# Native richness ~ urbanization
m_native_S <- glmer(S ~ scale(urbanization_index) + (1 | SITE),
                    family = poisson,
                    data = filter(data_3, STATUS == "Native"))
simulateResiduals(m_native_S, plot = TRUE)
summary(m_native_S)
car::Anova(m_native_S, type = 3)
plot(allEffects(m_native_S), main = "Native species richness")

# Exotic richness ~ urbanization (Negative Binomial model)
m_exotic_S <- glmer.nb(S ~ scale(urbanization_index) + (1 | SITE),
                       data = filter(data_3, STATUS == "Exotic"))
simulateResiduals(m_exotic_S, plot = TRUE)
summary(m_exotic_S)
plot(allEffects(m_exotic_S), main = "Exotic species richness")

## 3.2 Species Abundance Models ##

# Native abundance ~ urbanization (Negative Binomial model)
m_native_N <- glmer.nb(N ~ scale(urbanization_index) + (1 | SITE),
                       data = filter(data_3, STATUS == "Native"))
simulateResiduals(m_native_N, plot = TRUE)
summary(m_native_N)
car::Anova(m_native_N, type = 3)
plot(allEffects(m_native_N), main = "Native species abundance")

# Exotic abundance ~ urbanization (Negative Binomial model)
m_exotic_N <- glmer.nb(N ~ scale(urbanization_index) + (1 | SITE),
                       data = filter(data_3, STATUS == "Exotic"))
simulateResiduals(m_exotic_N, plot = TRUE)
summary(m_exotic_N)
car::Anova(m_exotic_N, type = 3)
plot(allEffects(m_exotic_N), main = "Exotic species abundance")


##### 4. TURNOVER ANALYSIS #####

# Load additional libraries not included in the first section
library(BAT)       # For beta diversity metrics
library(ggrepel)   # For non-overlapping labels in ggplot2

# Read site-level community data
data <- read.table("Data_published/bio_cast.txt", sep = "\t", header = TRUE)

# Split data by SITE and remove metadata columns (first two)
data <- data %>% split(.$SITE)
data <- lapply(data, function(x) x[, -c(1, 2)])

# Compute pairwise multiple-site beta diversity (presence-absence based)
beta.1 <- lapply(data, function(x) BAT::beta.multi(x, abund = FALSE))

# Extract replacement component (turnover) from the result
beta.2 <- lapply(beta.1, function(x) x[2, 1]) %>% unlist()
beta.2 <- data.frame(plot_id = names(beta.2), beta_repl = beta.2)

# Load environmental variables at site scale
site.info <- read.table("Data_published/env_data_sitescale.txt", sep = "\t", header = TRUE)

# Merge beta diversity with site-level covariates
beta.3 <- left_join(beta.2, site.info)

# Manual reordering of plot IDs (used for labeling)
beta.3$ID <- c(12, 8, 1, 11, 4, 9, 2, 3, 6, 7, 5, 10, 13)

# Basic statistics
mean(beta.3$beta_repl)                  # Mean turnover
sd(beta.3$beta_repl) / sqrt(13)         # Standard error

ggplot(beta.3, aes(x = forest_4000 / 100, y = beta_repl, color = forest_4000 / 100)) +
  geom_point(size = 3) +
  geom_text_repel(aes(label = ID), size = 5, color = "black") +
  scale_color_gradientn(colors = c("#d81b60", "#ffc107", "#004d40")) +
  theme_classic(base_size = 18) +
  theme(legend.position = "bottom") +
  labs(x = "Tree cover at the site scale (2000 m)",
       y = "Species turnover", color = "")

# Linear model to test relationship with forest cover
lm <- lm(beta_repl ~ forest_4000, data = beta.3)
car::qqPlot(residuals(lm))
summary(lm)
car::Anova(lm, type = 3)

##### 5. SAMPLING EFFORT  #####

# Load additional libraries not included in the first section
library(iNEXT)
library(iNEXT.4steps)

# Load and prepare data
data2 <- read.table("Data_published/bio_cast.txt", sep = "\t", header = TRUE)
species_cols <- names(data2)[3:ncol(data2)]
data.cell1 <- data2 %>%
  group_by(SITE) %>%
  summarise(across(all_of(species_cols), sum), .groups = "drop")

# Count traps per SITE and merge with data.cell1
data.cell <- data2 %>%
  count(SITE, name = "traps") %>%
  left_join(data.cell1, by = "SITE") %>%
  replace(is.na(.), 0) %>%
  column_to_rownames("SITE")
data.col <- data.frame(ID = rownames(data.cell))

# Prepare frequency incidence list (non-zero values per SITE)
data.incidence.freq <- lapply(split(data.cell, rownames(data.cell)), function(x) {
  x <- as.numeric(unlist(x))
  x[x > 0]
})

# Prepare raw incidence matrix (species presence per trap, per SITE)
data.incidence.raw <- lapply(split(data2, data2$SITE), function(x) {
  x <- x[, -c(1, 2)]
  x <- x[, colSums(x) > 0, drop = FALSE]
  t(as.matrix(x))
})

## 5.1 Sample Coverage and Sample Completenness ##

out=list()
for (i in 1:length(data.incidence.raw)) {
  tryCatch({
    out[[i]]=iNEXT.4steps::Completeness(data.incidence.raw[[i]], q=c(0,1, by=0.5), datatype="incidence_raw", nboot = 100)#$iNextEst$size_based
  }, error=function(e){cat("NA")})
}

# Assign names to the 'out' list based on data3
names(out) <- names(data.incidence.raw)[1:length(out)]

# Remove NULL elements and bind into a single dataframe with SITE as ID column
out.df <- out[!sapply(out, is.null)] %>%
  rbindlist(idcol = "plot_id") %>%
  as.data.frame()

# Filter by Order.q
out.q0 <- out.df %>% filter(Order.q == 0)

site.info <- read.table("Data_published/env_data_sitescale.txt", sep = "\t", header = TRUE)

out.q0=left_join(out.q0, site.info, by="plot_id")
out.q1=left_join(out.q1, site.info, by="plot_id")


m=lm(data=out.q0, Estimate.SC~forest_4000) 
summary(m)

plot(allEffects(m))
car::Anova(m, type="III", test.statistic="F")
mean(out0$Estimate.SC)
min(out0$Estimate.SC)
max(out0$Estimate.SC)

m=lm(data=out.q1, Estimate.SC~forest_4000) 
summary(m)
plot(allEffects(m))
car::Anova(m, type="III", test.statistic="F")



## 5.2 Estimating Sample Coverage at different sampling effort ##

out2=list()
for (i in 1:length(data.incidence.freq)) {
  tryCatch({
    out2[[i]]=iNEXT::iNEXT(data.incidence.freq[[i]], q=c(0,1,2), datatype="incidence_freq", nboot = 100)
  }, error=function(e){cat("NA")})
}
# Assign names to out2 based on data.incidence.freq
names(out2) <- names(data.incidence.freq)

# Extract size_based estimates and remove NULLs
out2.df <- out2 %>%
  lapply(function(x) x$iNextEst$size_based) %>%
  Filter(Negate(is.null), .) %>%
  rbindlist(idcol = "SITE") %>%
  as.data.frame()

# Filter for Order.q == 1
out2.q0 <- out2.df %>% filter(Order.q == 0)
m=lm(data=subset(out2.q0,Method =="Observed" ), SC~qD) 
m=lm(data=subset(out2.q0,t ==16 ), SC~qD) 
summary(m)
car::Anova(m, type="III", test.statistic="F")


out2.q1 <- out2.df %>% filter(Order.q == 1)

site.info <- read.table("Data_published/env_data_sitescale.txt", sep = "\t", header = TRUE)
colnames(site.info)[1]="SITE"

out2.q1=out2.q1%>%left_join(site.info)%>%as.data.frame()
m=lm(data=subset(out2.q1, t==32), SC~forest_4000) 
summary(m)
plot(allEffects(m))
DHARMa::simulateResiduals(m, plot = T)
car::Anova(m, type="III", test.statistic="F")

m=lm(data=subset(out2.q1, t==16), SC~forest_4000) 
summary(m)
plot(allEffects(m))
DHARMa::simulateResiduals(m, plot = T)
car::Anova(m, type="III", test.statistic="F")

m=lm(data=subset(out2.q1, t==8), SC~forest_4000) 
summary(m)
plot(allEffects(m))
DHARMa::simulateResiduals(m, plot = T)
car::Anova(m, type="III", test.statistic="F")

m=lm(data=subset(out2.q1, t==4), SC~forest_4000) 
summary(m)
plot(allEffects(m))
DHARMa::simulateResiduals(m, plot = T)
car::Anova(m, type="III", test.statistic="F")


library(ggplot2)
library(dplyr)
library(broom)

t_vals <- c(4, 8, 16, 32)

mods <- lapply(t_vals, function(tt) {
  m <- lm(SC ~ scale(forest_4000), data = subset(out2.q1, t == tt))
  broom::tidy(m) %>% 
    filter(term == "scale(forest_4000)") %>% 
    mutate(t = tt,
           signif = ifelse(p.value < 0.05, "significant", "nonsignificant"))
})

mod_df <- bind_rows(mods)

plot_df <- out2.q1 %>% filter(t %in% t_vals)

# Plot
ggplot(plot_df, aes(x = forest_4000, y = SC, color = factor(t))) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE,
              aes(linetype = factor(t)), size = 2) +
  scale_linetype_manual(values = ifelse(mod_df$signif == "significant", "solid", "dashed")) +
  scale_color_brewer(palette = "Dark2", name = "t") +
  labs(x = "Forest cover (4000 m buffer)",
       y = "Sample coverage (SC)") +
  guides(linetype = "none") +
  theme_bw(base_size = 16) +
  theme(
    text = element_text(color = "black"),      
    axis.text = element_text(color = "black"), 
    axis.title = element_text(color = "black"),
    legend.text = element_text(color = "black"),
    legend.title = element_text(color = "black"),
    panel.border = element_blank(),            
    panel.grid = element_blank(),              
    axis.line = element_line(color = "black")  
  )


##### 6. REDUCING SAMPLING EFFORT #####

## 6.1 Preparing data ##

data <- read.table("Data_published/bio_cast.txt", sep = "\t", header = TRUE)
data <- data %>% split(.$SITE)
data <- lapply(data, function(x) x[, -1])
data <- lapply(data, function(x) x[,colSums(x)>0])

## 6.2 Random Scenario ##

plots <- names(data)
results_plot <- list()
plots_graphs <- list()

for (p in seq_along(data)) {
  
  mat <- decostand(data[[p]], method = "pa")
  n_traps <- nrow(mat)
  beetle_sp <- ncol(mat)

  set.seed(42)
  B <- 100  # number of randomizations
  extinction_mat <- matrix(0, nrow = B, ncol = n_traps)
  
  for (b in 1:B) {
    trap_order <- sample(n_traps)
    mat_copy <- mat
    ext <- numeric(n_traps)
    
    for (j in seq_along(trap_order)) {
      mat_copy[trap_order[j], ] <- 0
      alive <- colSums(mat_copy) > 0
      ext[j] <- (1 - sum(alive) / beetle_sp) * 100
    }
    extinction_mat[b, ] <- ext
  }
  
  se <- c(0, apply(extinction_mat, 2, function(x) sd(x)/sqrt(length(x))))
  mean_loss <- c(0, colMeans(extinction_mat))
  removal_pct <- 0:n_traps
  
  df <- data.frame(
    trap_removed = removal_pct,
    species_loss = mean_loss,
    ymin = mean_loss - 1.96 * se,
    ymax = mean_loss + 1.96 * se
  )
  
  plots_graphs[[p]] <- ggplot(df, aes(x = trap_removed, y = species_loss)) +
    geom_line(color = "blue", linewidth = 1.2) +
    geom_ribbon(aes(ymin = ymin, ymax = ymax), fill = "blue", alpha = 0.2) +
    labs(x = "N removed traps", y = "% missing species (no caught)", title = plots[p]) +
    scale_y_reverse() + theme_minimal()
  
  results_plot[[p]] <- data.frame(
    species_loss = mean_loss,
    trap_removed = removal_pct,
    se = se
  )
}

names(plots_graphs) <- plots
names(results_plot) <- plots


## 6.3 Predefined trap removing Scenario ##

env.var <- read.table("Data_published/env_data_trapscale.txt", sep = "\t", header = TRUE)
traps.data = split(env.var, env.var$SITE)

set.seed(42)  # For reproducibility

data.loss <- list()

for (site in names(data)) {
  
  mat <- decostand(data[[site]], method = "pa")
  n_traps <- nrow(mat)
  beetle_sp <- ncol(mat)
  
  ordered <- traps.data[[site]] %>%
    arrange(desc(urbanization_index)) %>%  
   # arrange((urbanization_index)) %>%  
    pull(CELL_ID) ### CHANGE HERE to specify the order of removal. descending in this example
  
  plant_order <- sample(n_traps)
  mat_copy <- mat
  loss <- numeric(n_traps)
  
  for (j in seq_along(plant_order)) {
    mat_copy[plant_order[j], ] <- 0
    remaining <- colSums(mat_copy) > 0
    loss[j] <- (1 - sum(remaining) / beetle_sp) * 100
  }
  
  data.loss[[site]] <- data.frame(
    species_loss_perc = c(0, loss),
    n_trap_removed = 0:n_traps
  )
}

# output saving

data.loss.urbanization <- rbindlist(data.loss, idcol = "site")
#data.loss.urbanization.a <- data.loss.urbanization %>%
#  mutate(factor = "urbanization_index", type = "ascending")
data.loss.urbanization.d <- data.loss.urbanization %>%
  mutate(factor = "urbanization_index", type = "descending")


### run models

data.loss.final2= readRDS("Data_published/simulations.rds") #load aggregated simulation dataset

data.loss.final2.50=data.loss.final2%>%subset(n_trap_removed==8)
data.loss.final2.50$class=paste0(data.loss.final2.50$factor, "_", data.loss.final2.50$type)
data.loss.final2.50 %>%
  group_by(class) %>%
  summarise(
    mean = mean(species_loss_perc))  # Total number of species


m=lmer(species_loss_perc~class+(1|site), data = data.loss.final2.50)
summary(m)
DHARMa::simulateResiduals(m, plot = T)
library(emmeans)
car::Anova(m)
emmeans(m, "class", lmer.df = "satterthwaite")
emmeans(m, pairwise ~ class)


ggplot(data.loss.final2.50, aes(x = factor, y = species_loss_perc , fill = type)) +
  geom_boxplot() +
  labs(
    title = "Effect on sampling using 8 traps",
    x = "Factor",
    y = "Percentage of species loss"
  ) +
  scale_fill_manual(values = c("ascending" = "skyblue", "descending" = "salmon")) +
  theme_minimal()



data.loss.final2.75=data.loss.final2%>%subset(n_trap_removed==12)
data.loss.final2.75$class=paste0(data.loss.final2.75$factor, "_", data.loss.final2.75$type)
data.loss.final2.75 %>%
  group_by(class) %>%
  summarise(
    mean = mean(species_loss_perc))  # Total number of species


m=lmer(species_loss_perc~class+(1|site), data = data.loss.final2.75)
summary(m)
DHARMa::simulateResiduals(m, plot = T)
library(emmeans)
car::Anova(m)
emmeans(m, "class", lmer.df = "satterthwaite")
emmeans(m, pairwise ~ class)

### effect of tree cover

data=subset(data.loss.final2, factor=="random")
site.info=read.table("paesaggi/Landscape_metrics_portlevel_all.txt", sep="\t",header = T)
colnames(site.info)[1]="site"
data=left_join(data, site.info)

m=lm(species_loss_perc~forest_4000, data=subset(data, n_trap_removed==8))
summary(m)
DHARMa::simulateResiduals(m,plot = T)
plot(allEffects(m))


m=lm(species_loss_perc~forest_4000, data=subset(data, n_trap_removed==12))
summary(m)
DHARMa::simulateResiduals(m_gam,plot = T)
plot(allEffects(m))
car::Anova(m, type="III", test.statistic="F")


library(ggplot2)
library(ggeffects)

d4 <- subset(data, n_trap_removed == 12)
d8 <- subset(data, n_trap_removed == 8)

m4 <- lm(species_loss_perc ~ forest_4000, data = d4)
m8 <- lm(species_loss_perc ~ forest_4000, data = d8)

p4 <- ggpredict(m4, terms = "forest_4000 [all]")  
p8 <- ggpredict(m8, terms = "forest_4000 [all]")  

sig4 <- summary(m4)$coefficients["forest_4000", "Pr(>|t|)"] < 0.05
sig8 <- summary(m8)$coefficients["forest_4000", "Pr(>|t|)"] < 0.05
lt_vals <- c("4" = if (sig4) "solid" else "dashed",
             "8" = if (sig8) "solid" else "dashed")

ggplot() +
  geom_point(data = d4, aes(forest_4000, species_loss_perc, color = "4"), alpha = 0.6) +
  geom_point(data = d8, aes(forest_4000, species_loss_perc, color = "8"), alpha = 0.6) +
  geom_ribbon(data = p4, aes(x, ymin = conf.low, ymax = conf.high, fill = "4"), alpha = 0.12, inherit.aes = FALSE) +
  geom_ribbon(data = p8, aes(x, ymin = conf.low, ymax = conf.high, fill = "8"), alpha = 0.12, inherit.aes = FALSE) +
  geom_line(data = p4, aes(x, predicted, color = "4", linetype = "4"), size = 1) +
  geom_line(data = p8, aes(x, predicted, color = "8", linetype = "8"), size = 1) +
  scale_color_brewer(palette = "Dark2", name = "Traps", labels = c("4","8")) +
  scale_fill_brewer(palette = "Dark2", guide = "none") +
  scale_linetype_manual(values = lt_vals, guide = "none") +
  labs(x = "Forest cover (4000 m buffer)", y = "Species loss (%)") +
  theme_bw(base_size = 16) +
  theme(
    text = element_text(color = "black"),
    axis.text = element_text(color = "black"),
    axis.title = element_text(color = "black"),
    legend.text = element_text(color = "black"),
    legend.title = element_text(color = "black"),
    panel.border = element_blank(),
    panel.grid = element_blank(),
    axis.line = element_line(color = "black")
  )