# Composite Vocal Divergence Index (CVDI) and Multi-Component Significance Mapping
library(dplyr)
library(ggplot2)
library(tidyr)
library(reshape2)

# Set directory and load pre-processed effect size and significance data
base_dir <- "/Desktop/"

hpc1 <- read.csv(paste0(base_dir, "hedgespc1spp-positivos.csv"))
hpc2 <- read.csv(paste0(base_dir, "hedgespc2spp-positivos.csv"))
hpc3 <- read.csv(paste0(base_dir, "hedgespc3spp-positivos.csv"))

colnames(hpc1)[1:2] <- c("spp_ref", "spp_foc")
colnames(hpc2)[1:2] <- c("spp_ref", "spp_foc")
colnames(hpc3)[1:2] <- c("spp_ref", "spp_foc")

EMMPC1_contrasts <- read.csv(paste0(base_dir, "pvalues_pc1.csv"))
EMMPC2_contrasts <- read.csv(paste0(base_dir, "pvalues_pc2.csv"))
EMMPC3_contrasts <- read.csv(paste0(base_dir, "pvalues_pc3.csv"))

# Define taxonomic mapping and standardized species order
nombre_completo <- c(
  "carolinus" = "M. carolinus", "aurifrons" = "M. aurifrons", "santacruzi" = "M. santacruzi",
  "radiolatus" = "M. radiolatus", "superciliaris" = "M. superciliaris", "uropygialis" = "M. uropygialis",
  "hoffmannii" = "M. hoffmannii", "rubricapillus" = "M. rubricapillus", "chrysogenys" = "M. chrysogenys",
  "pygmaeus" = "M. pygmaeus", "flavifrons" = "M. flavifrons", "hypopolius" = "M. hypopolius",
  "cruentatus" = "M. cruentatus", "chrysauchen" = "M. chrysauchen", "pulcher" = "M. pulcher",
  "pucherani" = "M. pucherani", "herminieri" = "M. herminieri", "portoricensis" = "M. portoricensis",
  "erythrocephalus" = "M. erythrocephalus", "lewis" = "M. lewis", "formicivorus" = "M. formicivorus",
  "cactorum" = "M. cactorum", "candidus" = "M. candidus", "striatus" = "M. striatus",
  "percussus" = "X. percussus", "nuchalis" = "S. nuchalis", "ruber" = "S. ruber",
  "varius" = "S. varius", "thyroideus" = "S. thyroideus"
)
orden_deseado <- names(nombre_completo)
orden_deseado_completo <- unname(nombre_completo)

# Integration of effect sizes and calculation of the cumulative divergence index
hpc_total <- rbind(hpc1, hpc2, hpc3)

indice_divergencia <- hpc_total %>%
  mutate(species1 = pmin(spp_ref, spp_foc), species2 = pmax(spp_ref, spp_foc)) %>%
  group_by(species1, species2) %>%
  summarise(indice_suma_g = sum(hedg_g), .groups = 'drop') %>%
  rename(spp_ref = species1, spp_foc = species2)

# Summarize significance frequency across acoustic components (P < 0.05)
all_sig_pairs <- rbind(
  EMMPC1_contrasts %>% filter(p.value < 0.05) %>% select(spp_ref, spp_foc),
  EMMPC2_contrasts %>% filter(p.value < 0.05) %>% select(spp_ref, spp_foc),
  EMMPC3_contrasts %>% filter(p.value < 0.05) %>% select(spp_ref, spp_foc)
)

significance_summary <- all_sig_pairs %>%
  mutate(species1 = pmin(spp_ref, spp_foc), species2 = pmax(spp_ref, spp_foc)) %>%
  group_by(species1, species2) %>%
  summarise(significant_in = n(), .groups = 'drop') %>%
  rename(spp_ref = species1, spp_foc = species2)

# Data restructuring for matrix-based visualization
hpc_matrix <- indice_divergencia %>%
  bind_rows(rename(., spp_ref = spp_foc, spp_foc = spp_ref)) %>%
  mutate(spp_ref = factor(spp_ref, levels = orden_deseado),
         spp_foc = factor(spp_foc, levels = orden_deseado)) %>%
  complete(spp_ref, spp_foc, fill = list(indice_suma_g = NA)) %>%
  pivot_wider(names_from = spp_foc, values_from = indice_suma_g) %>%
  as.data.frame() %>% {rownames(.) <- .$spp_ref; .} %>%
  select(-spp_ref) %>% as.matrix()

hpc_matrix <- hpc_matrix[orden_deseado, orden_deseado]
hpc_matrix[is.na(hpc_matrix)] <- 0

hpc_long <- reshape2::melt(hpc_matrix, varnames = c("spp_ref", "spp_foc"), value.name = "indice_suma_g") %>%
  mutate(spp_ref = factor(spp_ref, levels = orden_deseado),
         spp_foc = factor(spp_foc, levels = orden_deseado),
         row_num = as.numeric(spp_ref), col_num = as.numeric(spp_foc)) %>%
  filter(row_num >= col_num) %>% 
  mutate(spp_ref = nombre_completo[as.character(spp_ref)],
         spp_foc = nombre_completo[as.character(spp_foc)]) %>%
  mutate(spp_ref = factor(spp_ref, levels = orden_deseado_completo),
         spp_foc = factor(spp_foc, levels = orden_deseado_completo))

# Final data merge and CVDI heatmap generation with significance overlays
summary_para_unir <- significance_summary %>%
  mutate(spp_ref_fmt = nombre_completo[spp_ref],
         spp_foc_fmt = nombre_completo[spp_foc],
         pair_id = paste(pmin(spp_ref_fmt, spp_foc_fmt), pmax(spp_ref_fmt, spp_foc_fmt), sep = "-")) %>%
  select(pair_id, significant_in)

hpc_long_con_llave <- hpc_long %>%
  mutate(pair_id = paste(pmin(as.character(spp_ref), as.character(spp_foc)),
                         pmax(as.character(spp_ref), as.character(spp_foc)), sep = "-"))

hpc_long_final <- left_join(hpc_long_con_llave, summary_para_unir, by = "pair_id")

ggplot(hpc_long_final, aes(x = spp_foc, y = spp_ref)) +
  geom_tile(aes(fill = indice_suma_g), color = "#4a4a4a", linewidth = 0.5) +
  geom_text(aes(label = significant_in), color = "black", size = 4, fontface = "bold", na.rm = TRUE) +
  scale_fill_distiller(palette = "RdYlBu", direction = -1, name = "(CVDI)", trans = "log",
                       breaks = c(0.5, 1, 2, 5, 10), na.value = "grey40") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 17, face = "italic"),
        axis.text.y = element_text(hjust = 1, size = 13, face = "italic"),
        panel.grid = element_blank(), legend.position = "right",
        text = element_text(family = "Times New Roman"),
        axis.ticks.x = element_line(linewidth = 0.9, color = "black")) +
  labs(x = "", y = "") +
  coord_fixed()
